# For the following example files, there are two ways to merge denoised directions.
# Because the read sequences are in order, `mergePairs()` works.
# `mergePairsByID` always works,
# because it uses the read IDs to match denoised pairs.
exFileF = system.file("extdata", "sam1F.fastq.gz", package="dada2")
exFileR = system.file("extdata", "sam1R.fastq.gz", package="dada2")
srF = ShortRead::readFastq(exFileF)
srR = ShortRead::readFastq(exFileR)
derepF = derepFastq(exFileF)
derepR = derepFastq(exFileR)
dadaF <- dada(derepF, err=tperr1, errorEstimationFunction=loessErrfun, selfConsist=TRUE)
dadaR <- dada(derepR, err=tperr1, errorEstimationFunction=loessErrfun, selfConsist=TRUE)
# Run and compare
ex1time = system.time({
ex1 <- mergePairs(dadaF, derepF, dadaR, derepR, verbose = TRUE)
ex1 <- data.table::data.table(ex1)
})
ex1time
# The new function, based on read IDs.
ex2time = system.time({
ex2 = dada2:::mergePairsByID(dadaF = dadaF, derepF = derepF, srF = srF,
dadaR = dadaR, derepR = derepR, srR = srR, verbose = TRUE)
})
ex2time
# Compare results (should be identical)
ex2[(accept)]
data.table::setkey(ex2, sequence)
ex2[(accept), list(abundance = sum(abundance)), by = sequence]
# Same sequence set (exactly)
setequal(x = ex1$sequence,
y = ex2[(accept)]$sequence)
# Test concatenation functionality
ex1cattime = system.time({
ex1cat <- mergePairs(dadaF, derepF, dadaR, derepR, justConcatenate = TRUE, verbose = TRUE)
sapply(ex1cat, class)
# need to convert to a character
ex1cat$sequence <- unlist(ex1cat$sequence)
ex1cat <- data.table::data.table(ex1cat)
})
ex1cattime
ex2cattime = system.time({
ex2cat <- dada2:::mergePairsByID(dadaF = dadaF, derepF = derepF, srF = srF,
dadaR = dadaR, derepR = derepR, srR = srR,
justConcatenate = TRUE, verbose = TRUE)
})
ex2cattime
ex2cat[(accept)]
# Compare results (should be identical)
data.table::setkey(ex1cat, sequence)
ex1cat[(accept), list(abundance = sum(abundance)), by = sequence]
data.table::setkey(ex2cat, sequence)
ex2cat[(accept), list(abundance = sum(abundance)), by = sequence]
# Same sequence set (exactly)
setequal(x = ex1cat$sequence,
y = ex2cat$sequence)
intersect(x = ex1cat$sequence,
y = ex2cat$sequence)
ex1cat[, nchar(sequence)]
ex2cat[, nchar(sequence)]
Run the code above in your browser using DataLab