if (all(require(RNAseqData.HNRNPC.bam.chr14) &&
require(GenomicAlignments))) {
## -----------------------------------------------------------------------
## Nucleotide frequency of mapped reads
## -----------------------------------------------------------------------
## In this example nucleotide frequency of mapped reads is computed
## for a single file. The MAP step is run in parallel and REDUCE
## is iterative.
fl <- system.file(package="Rsamtools", "extdata", "ex1.bam")
bf <- BamFile(fl, yieldSize=500) ## typically, yieldSize=1e6
param <- ScanBamParam(
flag=scanBamFlag(isUnmappedQuery=FALSE),
what="seq")
YIELD <- function(X, ...) scanBam(X, param, ...)[[1]][['seq']]
MAP <- function(value, ...)
alphabetFrequency(value, collapse=TRUE)
REDUCE <- `+` # add successive alphabetFrequency matrices
reduceByYield(bf, YIELD, MAP, REDUCE, param=param, parallel=TRUE)
## -----------------------------------------------------------------------
## Coverage
## -----------------------------------------------------------------------
## reduceByYield() can be applied to multiple files by combining it
## with bplapply().
## FUN will be run on each worker; it contains the necessary arguments
## to reduceByYield() as well as a call to the function itself.
## reduceByYield() could also be run in parallel (parallel=TRUE)
## but in this example it is not.
FUN <- function(bf) {
library(GenomicAlignments)
library(GenomicFiles)
YIELD <- `readGAlignments`
MAP <- function(value, ...) coverage(value)[["chr14"]]
REDUCE <- `+`
reduceByYield(bf, YIELD, MAP, REDUCE)
}
## BAM files are distributed across Snow workers and each worker applies
## reduceByYield().
bfl <- BamFileList(RNAseqData.HNRNPC.bam.chr14_BAMFILES[1:3])
bplapply(bfl, FUN, BPPARAM = SnowParam(3))
}
Run the code above in your browser using DataLab