## Input
## - A BAM file:
bamfile <- BamFile(system.file("extdata", "ex1.bam", package="Rsamtools"))
seqinfo(bamfile) # to see the seqlevels and seqlengths
stackStringsFromBam(bamfile, param="seq1:1-21") # a quick look at
# the reads
## - A GRanges object containing Individual Genomic Positions Of
## Interest:
my_IGPOI <- GRanges(Rle(c("seq1", "seq2"), c(7, 2)),
IRanges(c(1:5, 21, 1575, 1513:1514), width=1))
## Some preliminary massage on 'my_IGPOI'
seqinfo(my_IGPOI) <- merge(seqinfo(my_IGPOI), seqinfo(bamfile))
seqlevels(my_IGPOI) <- seqlevelsInUse(my_IGPOI)
## Load the BAM file in a GAlignments object. We load only the reads
## aligned to the sequences in 'seqlevels(my_IGPOI)' and we filter out
## reads not passing quality controls (flag bit 0x200) and PCR or
## optical duplicates (flag bit 0x400). See ?ScanBamParam and the SAM
## Spec for more information.
which <- as(seqinfo(my_IGPOI), "GRanges")
flag <- scanBamFlag(isNotPassingQualityControls=FALSE,
isDuplicate=FALSE)
what <- c("seq", "qual")
param <- ScanBamParam(flag=flag, what=c("seq", "qual"), which=which)
gal <- readGAlignments(bamfile, param=param)
seqlevels(gal) <- seqlevels(my_IGPOI)
## Extract the read sequences (a.k.a. query sequences) and quality
## strings. Both are reported with respect to the + strand.
qseq <- mcols(gal)$seq
qual <- mcols(gal)$qual
nucl_piles <- pileLettersAt(qseq, seqnames(gal), start(gal), cigar(gal),
my_IGPOI)
qual_piles <- pileLettersAt(qual, seqnames(gal), start(gal), cigar(gal),
my_IGPOI)
mcols(my_IGPOI)$nucl_piles <- nucl_piles
mcols(my_IGPOI)$qual_piles <- qual_piles
my_IGPOI
## Finally, to summarize A/C/G/T frequencies at each position:
alphabetFrequency(nucl_piles, baseOnly=TRUE)
Run the code above in your browser using DataLab