if (all(require(RNAseqData.HNRNPC.bam.chr14) &&
require(GenomicAlignments))) {
fls <- RNAseqData.HNRNPC.bam.chr14_BAMFILES ## 8 bam files
## -----------------------------------------------------------------------
## Basics of reduceByFile() and reduceByRange():
## -----------------------------------------------------------------------
## In this first example we provide a MAP only (no REDUCE).
## Ranges of interest.
gr <- GRanges("chr14", IRanges(c(19100000, 106000000), width=1e7))
## The MAP counts the number of junctions in each range
## (i.e., 'N' operations in the CIGAR).
MAP <- function(range, file, ...) {
library(GenomicAlignments)
param = ScanBamParam(which=range)
gal = readGAlignments(file, param=param)
table(njunc(gal))
}
## Length of the output corresponds to the number of files and
## the elementLengths to the number of ranges.
rbf <- reduceByFile(gr, fls, MAP)
length(rbf) ## 8 files
elementLengths(rbf) ## 2 ranges
## Each list element contains a table of counts, one for each range.
rbf[[1]]
## In contrast, reduceByRange() extracts data across files.
rbr <- reduceByRange(gr, fls, MAP)
## Output length corresponds to the number of ranges.
length(rbr) ## 2 ranges
elementLengths(rbr) ## 8 files
## Each list element contains a table of counts, one for each file.
do.call(rbind, rbr[[1]])
## Output a SummarizedExperiment instead of list:
se <- reduceByRange(gr, fls, MAP, summarize=TRUE)
assays(se)
## -----------------------------------------------------------------------
## Computing coverage across files:
## -----------------------------------------------------------------------
## Use reduceByRange() to compute coverage for a group of ranges
## across files.
## Regions of interest.
gr <- GRanges("chr14", IRanges(c(62262735, 63121531, 63980327),
width=214700))
## The MAP computes the pileups ...
MAP <- function(range, file, ...) {
library(GenomicRanges)
param = ScanBamParam(which=range)
coverage(file, param=param)[range]
}
## and the REDUCE adds the last and current results.
REDUCE <- function(mapped, ...)
Reduce("+", mapped)
## Each call to coverage() produces an RleList which accumulate
## on the workers. When the REDUCE is applied iteratively the
## 'current' result is collapsed with the 'last' resulting in a
## maximum of 2 RleLists on a worker at a time.
cov1 <- reduceByRange(gr, fls, MAP, REDUCE, iterate=TRUE)
cov1[[1]]
## If memory use is not a concern (or if MAP output
## is not large) the REDUCE can be applied non-iteratively.
cov2 <- reduceByRange(gr, fls, MAP, REDUCE, iterate=FALSE)
## Results match those obtained with the iterative REDUCE.
cov2[[1]]
## -----------------------------------------------------------------------
## Organizing runs with the GenomicFiles class:
## -----------------------------------------------------------------------
## The GenomicFiles class is a light-weight form of SummarizedExperiment
## that does not have an 'assays' slot.
colData <- DataFrame(method=rep("RNASeq", length(fls)),
format=rep("bam", length(fls)))
gf <- GenomicFiles(files=fls, rowData=gr, colData=colData)
gf
## The object can be subset on ranges or files for different
## experimental runs.
dim(gf)
gf_sub <- gf[2, 3:4]
dim(gf_sub)
## When summarize = TRUE and no REDUCE is provided the reduceBy*
## functions output a SummarizedExperiment object.
se <- reduceByFile(gf, MAP=MAP, summarize=TRUE)
se
## Data from the rowData, colData and exptData slots in the
## GenomicFiles are transferred to the SummarizedExperiment.
colData(se)
## Results are in the assays slot.
assays(se)
}
Run the code above in your browser using DataLab