if (all(require(RNAseqData.HNRNPC.bam.chr14) &&
require(GenomicAlignments))) {
## -----------------------------------------------------------------------
## Nucleotide frequency of mapped reads
## -----------------------------------------------------------------------
## In this example nucleotide frequency of mapped reads is computed
## for a single file. The MAP step is run in parallel and REDUCE
## is iterative.
## Create a BamFile and set a 'yieldSize'.
fl <- system.file(package="Rsamtools", "extdata", "ex1.bam")
bf <- BamFile(fl, yieldSize=500)
## Define 'YIELD', 'MAP' and 'REDUCE' functions.
YIELD <- function(X, ...) {
flag = scanBamFlag(isUnmappedQuery=FALSE)
param = ScanBamParam(flag=flag, what="seq")
scanBam(X, param=param, ...)[[1]][['seq']]
}
MAP <- function(value, ...) {
requireNamespace("Biostrings", quietly=TRUE) ## for alphabetFrequency()
Biostrings::alphabetFrequency(value, collapse=TRUE)
}
REDUCE <- `+` # add successive alphabetFrequency matrices
## 'parallel=TRUE' runs the MAP step in parallel and is currently
## implemented for Unix/Mac only.
register(MulticoreParam(3))
reduceByYield(bf, YIELD, MAP, REDUCE, parallel=TRUE)
## -----------------------------------------------------------------------
## Coverage
## -----------------------------------------------------------------------
## If sufficient resources are available coverage can be computed
## across several large BAM files by combining reduceByYield() with
## bplapply().
## Create a BamFileList with a few sample files and a Snow cluster
## with the same number of workers as files.
bfl <- BamFileList(RNAseqData.HNRNPC.bam.chr14_BAMFILES[1:3])
bpparam <- SnowParam(length(bfl))
## 'FUN' is run on each worker. Because these are Snow workers each
## variable used in 'FUN' must be explicitly passed. (This is not the case
## when using Multicore.)
FUN <- function(bf, YIELD, MAP, REDUCE, parallel, ...) {
requireNamespace("GenomicFiles", quietly=TRUE) ## for reduceByYield()
GenomicFiles::reduceByYield(bf, YIELD, MAP, REDUCE, parallel=parallel)
}
## Passing parallel=FALSE to reduceByYield() runs the MAP step in serial on
## each worker. In this example, parallel dispatch is at the file-level
## only (bplapply()).
YIELD <- `readGAlignments`
MAP <- function(value, ...) {
requireNamespace("GenomicAlignments", quietly=TRUE)
GenomicAlignments::coverage(value)[["chr14"]]
}
bplapply(bfl, FUN, YIELD=YIELD, MAP=MAP, REDUCE=`+`,
parallel=FALSE, BPPARAM = bpparam)
## -----------------------------------------------------------------------
## Sample records from a Bam file
## -----------------------------------------------------------------------
fl <- system.file(package="Rsamtools", "extdata", "ex1.bam")
bf <- BamFile(fl, yieldSize=1000)
yield <- function(x)
readGAlignments(x, param=ScanBamParam(what=c( "qwidth", "mapq" )))
map <- identity
## Samples records from successive chunks of aligned reads.
reduceByYield(bf, yield, map, REDUCEsampler(1000, TRUE))
}
Run the code above in your browser using DataLab