if (all(require(Rsamtools) && 
        require(RNAseqData.HNRNPC.bam.chr14) &&
        require(GenomicAlignments) &&
        require(ShortRead) &&
        .Platform$OS.type != "windows")) {
  ## ----------------------------------------------------------------------
  ## Iterating through a BAM file
  ## ----------------------------------------------------------------------
 
  ## Select a single file and set 'yieldSize' in the BamFile object.
  fl <- RNAseqData.HNRNPC.bam.chr14_BAMFILES[[1]]
  bf <- BamFile(fl, yieldSize = 300000) 
 
  ## bamIterator() is initalized with a BAM file and returns a function.
  ## The return function requires no arguments and iterates through the
  ## file returning data chunks the size of yieldSize.
  bamIterator <- function(bf) {
      done <- FALSE
      if (!isOpen( bf))
          open(bf)
 
      function() {
          if (done)
              return(NULL)
          yld <- readGAlignments(bf) 
          if (length(yld) == 0L) {
              close(bf)
              done <<- TRUE
              NULL
          } else yld
      }
  }
 
  ## Initalize the iterator.
  ITER <- bamIterator(bf)
 
  ## Create a FUN that counts reads in a region of interest.
  roi <- GRanges("chr14", IRanges(seq(19e6, 107e6, by = 10e6), width = 10e6))
  counter <- function(reads, roi, ...) {
      countOverlaps(query = roi, subject = reads)
  }
  ## Create a MulticoreParam and call bpiterate().
  bpparam <- MulticoreParam(workers = 2) 
  res <- bpiterate(ITER, counter, BPPARAM = bpparam, roi = roi)
 
  ## The result length is the same as the number of data chunks.
  length(res)
  colSums(do.call(rbind, res))
 
  ## ----------------------------------------------------------------------
  ## Iterating through a FASTA file
  ## ----------------------------------------------------------------------
 
  ## Set data chunk size with 'n' in the FastqStreamer object.
  sp <- SolexaPath(system.file('extdata', package = 'ShortRead'))
  fl <- file.path(analysisPath(sp), "s_1_sequence.txt")
  fqs <- FastqStreamer(fl, n = 100)
 
  ## Create an iterator that returns data chunks the size of 'n'.
  fastqIterator <- function(fqs) {
      done <- FALSE
      if (!isOpen(fqs))
          open(fqs)
 
      function() {
          if (done)
              return(NULL)
          yld <- yield(fqs) 
          if (length(yld) == 0L) {
              close(fqs)
              done <<- TRUE
              NULL
          } else yld
      }
  }
 
  ## Initialize the iterator.
  ITER <- fastqIterator(fqs)
 
  ## The processor summarizes the number of times each sequence occurs.
  summary <- function(reads, ...) {
       tables(reads, n = 0)$distribution
  }
 
  bpparam <- MulticoreParam(workers = 2) 
  bpiterate(ITER, summary, BPPARAM = bpparam)
 
  ## Results from the workers are combined on the fly when a 
  ## REDUCE function is provided. Collapsing the data in this 
  ## way can substantially reduce memory requirements.
  fqs <- FastqStreamer(fl, n = 100)
  ITER <- fastqIterator(fqs)
  bpiterate(ITER, summary, BPPARAM = bpparam, REDUCE = merge, all = TRUE)
 
  ## ----------------------------------------------------------------------
  ## Multiple files
  ## ----------------------------------------------------------------------
  ## Currently bpiterate() is only implemented for the multi-core
  ## environment (i.e., MulticoreParam). A single machine may not
  ## have enough memory to process multiple files. In this case the
  ## files can be distributed over a snow cluster with bplapply() 
  ## then processed with bpiterate() on each cluster node.
 
  ## Select a subset of files, create a BamFileList.
  fls <- RNAseqData.HNRNPC.bam.chr14_BAMFILES[1:3]
  bfl <- BamFileList(fls, yieldSize = 200000) 
 
  ## Cluster size is defined by the number of files.
  snowp <- SnowParam(workers = length(bfl))
 
  ## Currently bpiterate() is only supported in the multi-core environment
  ## and must use MulticoreParam.
  myFUN <- function(file, bamIterator, counter, ...) {
    library(BiocParallel)  ## for bpiterate
    library(Rsamtools)     ## for Bam file manipulation
    library(GenomicAlignments) ## for readGAlignments
    ITER <- bamIterator(file)
    bpiterate(ITER, counter, BPPARAM = MulticoreParam(workers=2), roi = roi)
  }
 
  ## Distribute the files across the snow cluster workers with bplapply().
  ## Each cluster node acts as a master and spawns 2 children. 
  res <- bplapply(bfl, myFUN, BPPARM = snowp, bamIterator = bamIterator,
                  counter = counter, roi = roi) 
  ## The result is of length 3 (# of files).
  ## > length(res)
  ## [1] 3
 
  ## Each list element is of length 5 (# of iterations of size 'yieldSize').
  ## >  elementLengths(res)
  ## ERR127306 ERR127307 ERR127308 
  ##         5         5         5 
}
Run the code above in your browser using DataLab