# Subset example data
data(ExampleDb, package="alakazam")
db <- subset(ExampleDb, c_call %in% c("IGHA", "IGHG") & sample_id == "+7d" &
clone_id %in% c("3100", "3141", "3184"))
# thresholdedFreq method, resolving ties deterministically without using ambiguous characters
clones <- collapseClones(db, cloneColumn="clone_id", sequenceColumn="sequence_alignment",
germlineColumn="germline_alignment_d_mask",
method="thresholdedFreq", minimumFrequency=0.6,
includeAmbiguous=FALSE, breakTiesStochastic=FALSE)
# mostCommon method, resolving ties deterministically using ambiguous characters
clones <- collapseClones(db, cloneColumn="clone_id", sequenceColumn="sequence_alignment",
germlineColumn="germline_alignment_d_mask",
method="mostCommon",
includeAmbiguous=TRUE, breakTiesStochastic=FALSE)
# Make a copy of db that has a mutation frequency column
db2 <- observedMutations(db, frequency=TRUE, combine=TRUE)
# mostMutated method, resolving ties stochastically
clones <- collapseClones(db2, cloneColumn="clone_id", sequenceColumn="sequence_alignment",
germlineColumn="germline_alignment_d_mask",
method="mostMutated", muFreqColumn="mu_freq",
breakTiesStochastic=TRUE, breakTiesByColumns=NULL)
# mostMutated method, resolving ties deterministically using additional columns
clones <- collapseClones(db2, cloneColumn="clone_id", sequenceColumn="sequence_alignment",
germlineColumn="germline_alignment_d_mask",
method="mostMutated", muFreqColumn="mu_freq",
breakTiesStochastic=FALSE,
breakTiesByColumns=list(c("duplicate_count"), c(max)))
# Build consensus for V segment only
# Capture all nucleotide variations using ambiguous characters
clones <- collapseClones(db, cloneColumn="clone_id", sequenceColumn="sequence_alignment",
germlineColumn="germline_alignment_d_mask",
method="catchAll", regionDefinition=IMGT_V)
# Return the same number of rows as the input
clones <- collapseClones(db, cloneColumn="clone_id", sequenceColumn="sequence_alignment",
germlineColumn="germline_alignment_d_mask",
method="mostCommon", expandedDb=TRUE)
Run the code above in your browser using DataLab