## -----------------------------------------------------------------------
## Isoform expression :
## -----------------------------------------------------------------------
## findSpliceOverlaps() can assist in quantifying isoform expression
## by identifying reads that map compatibly and uniquely to a
## transcript isoform.
library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
library(pasillaBamSubset)
se <- untreated1_chr4() ## single-end reads
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
exbytx <- exonsBy(txdb, "tx")
cdsbytx <- cdsBy(txdb, "tx")
param <- ScanBamParam(which=GRanges("chr4", IRanges(1e5,3e5)))
sehits <- findSpliceOverlaps(se, exbytx, cds=cdsbytx, param=param)
## Tally the reads by category to get an idea of read distribution.
lst <- lapply(mcols(sehits), table)
nms <- names(lst)
tbl <- do.call(rbind, lst[nms])
tbl
## Reads compatible with one or more transcript isoforms.
rnms <- rownames(tbl)
tbl[rnms == "compatible","TRUE"]/sum(tbl[rnms == "compatible",])
## Reads compatible with a single isoform.
tbl[rnms == "unique","TRUE"]/sum(tbl[rnms == "unique",])
## All reads fall in a coding region as defined by
## the txdb annotation.
lst[["coding"]]
## Check : Total number of reads should be the same across categories.
lapply(lst, sum)
## -----------------------------------------------------------------------
## Paired-end reads :
## -----------------------------------------------------------------------
## 'singleEnd' is set to FALSE for a BAM file with paired-end reads.
pe <- untreated3_chr4()
hits2 <- findSpliceOverlaps(pe, exbytx, singleEnd=FALSE, param=param)
## In addition to BAM files, paired-end reads can be supplied in a
## GAlignmentPairs object.
genes <- GRangesList(
GRanges("chr1", IRanges(c(5, 20), c(10, 25)), "+"),
GRanges("chr1", IRanges(c(5, 22), c(15, 25)), "+"))
galp <- GAlignmentPairs(
GAlignments("chr1", 5L, "11M4N6M", strand("+")),
GAlignments("chr1", 50L, "6M", strand("-")))
findSpliceOverlaps(galp, genes)
Run the code above in your browser using DataLab