if (requireNamespace("RNAseqData.HNRNPC.bam.chr14", quietly=TRUE)) {
## -----------------------------------------------------------------------
## Count junction reads in BAM files
## -----------------------------------------------------------------------
fls <- ## 8 bam files
RNAseqData.HNRNPC.bam.chr14::RNAseqData.HNRNPC.bam.chr14_BAMFILES
## Ranges of interest.
gr <- GRanges("chr14", IRanges(c(19100000, 106000000), width=1e7))
## MAP outputs a table of junction counts per range.
MAP <- function(range, file, ...) {
## for readGAlignments(), Rsamtools::ScanBamParam()
requireNamespace("GenomicAlignments", quietly=TRUE)
param = Rsamtools::ScanBamParam(which=range)
gal = GenomicAlignments::readGAlignments(file, param=param)
table(GenomicAlignments::njunc(gal))
}
## -----------------------------------------------------------------------
## reduceByFile:
## With no REDUCE, counts are computed for each range / file combination.
counts1 <- reduceByFile(gr, fls, MAP)
length(counts1) ## 8 files
elementNROWS(counts1) ## 2 ranges each
## Tables of counts for each range:
counts1[[1]]
## With a REDUCE, results are combined on the fly. This reducer sums the
## number of records in each range with exactly 1 junction.
REDUCE <- function(mapped, ...)
sum(sapply(mapped, "[", "1"))
reduceByFile(gr, fls, MAP, REDUCE)
## -----------------------------------------------------------------------
## reduceFiles:
## All ranges are treated as a single group:
counts2 <- reduceFiles(gr, fls, MAP)
## Counts are for all ranges grouped:
counts2[[1]]
## Contrast the above with that from reduceByFile() where counts
## are for each range separately:
counts1[[1]]
## -----------------------------------------------------------------------
## Methods for the GenomicFiles class:
## Both reduceByFiles() and reduceFiles() can operate on a GenomicFiles
## object.
colData <- DataFrame(method=rep("RNASeq", length(fls)),
format=rep("bam", length(fls)))
gf <- GenomicFiles(files=fls, rowRanges=gr, colData=colData)
gf
## Subset on ranges or files for different experimental runs.
dim(gf)
gf_sub <- gf[2, 3:4]
dim(gf_sub)
## When summarize = TRUE and no REDUCE is given, the output is a
## SummarizedExperiment object.
se <- reduceByFile(gf, MAP=MAP, summarize=TRUE)
se
## Data from the rowRanges, colData and metadata slots in the
## GenomicFiles are transferred to the SummarizedExperiment.
colData(se)
## Results are in the assays slot named 'data'.
assays(se)
}
Run the code above in your browser using DataLab