# This first (don't run) part of example aims at presenting all
# available parameters and some variations, see at the bottom of
# this section for a running example
# Define an experiment description list with a classic epigenetic
# mark and one MNase experiment that will be seen as nucleosome
# 'density' or nucleosome 'positionning' (see midpoint parameter)
myExps <- list()
myExps[["mES_H3K4me3"]] <- list('folderName'="/home/exp",
'fileName'="SRR432543.BAM",
'fileType'="BAM",
'chrPrefix'="chr",
'chrSuffix'="",
'pairedEnds'=FALSE,
'midPoint'=FALSE)
myExps[["mES_MNase"]] <- list('folderName'="/home/exp",
'fileName'="SPT543426.BAM",
'fileType'="BAM",
'chrPrefix'="chr",
'chrSuffix'="",
'pairedEnds'=TRUE,
'midPoint'=FALSE)
myExps[["mES_MNase_MIDPOINT"]] <- list('folderName'="/home/exp",
'fileName'="SPT543426.BAM",
'fileType'="BAM",
'chrPrefix'="chr",
'chrSuffix'="",
'pairedEnds'=TRUE,
'midPoint'=TRUE)
# Call the pipeline for the three experiments with basic parameters
processPipeline(
#### I/O GENERAL PARAMETERS
# Experiments description list
INPUTFilesList = myExps,
# name of the folder that will contain results
resultSubFolder = "Results",
# name of the folder that wil contain logs and figures
reportFilesSubFolder = "ReportFiles",
# generate results as WIG fixed steps
WIGfs = TRUE,
# generate results as WIG variable steps
WIGvs = TRUE,
# generate results as GFF files
GFF = FALSE,
#### COMPLEX PARAMETERS (SINGLE OR VECTORS OR LIST OF IT)
# The threshold to detect artefactual piles will be incremented
# by one every 10Million reads aligned for each experiment
incrArtefactThrEvery = 10000000,
# Along the genome one score every 50 basepairs will be computed
binSize = 50,
# The reads will be extended according to the in-silico estimation
# algorithm or based on the pairs alignments (insert size)
elongationSize = NA,
# No subgroups selection for specific inserts or reads size
rangeSelection = IRanges(0,-1),
# no GFF files given, the module plotting statistics on reads and
# annotations will not be loaded
annotationFilesGFF = NA,
# path to file or "mm9", "hg19"... This argument is needed only if
# gff files are specified in 'annotationFilesGFF' argument
annotationGenomeFiles = NA,
#### SINGLE PARAMETERS
# For single-end experiments, the fragment size will be estimated
# between 50 and 400 with a resolution of 10bp
elongationEstimationRange = c(mini=50, maxi=400, by=10),
# The pipeline will try to save half-pairs from alignment and the
# ones broken during 'artefact' removal
rehabilitationStep = c("orphans","orphansFromArtefacts"),
# Remove chromosomes with names containing "random" or "un"
removeChrNamesContaining = "random|un",
# For paired-ends ignore inserts > 500bp according to alignment
ignoreInsertsOver = 500,
# Use 1 cpu (recommended as first try to estimate the memory usage)
nbCPUs = 1,
# Do not erase pileups and result files from subcategories prior to
# merging (orphans etc...)
keepTemp = TRUE,
# make a copy of the log for all experiments
logTofile = "./log.txt",
# In case the same computation is restarted, do not warn the user
# and erase previous results
eraseLog = TRUE,
#### LIST PARAMETERS (one element per expName)
# An eventual list of multiread repartition results
multiLocFilesList = "");
########
# The four "complex parameters" could have been declared like this
# to generate more results
# as a vector of values, each value will be used sequentially for
# all experiments
# incrArtefactThrEvery <- c(10000000,NA, -10)
# as a list for specifying arguments for individual experiments
# binSize <- list("mES_H3K4me3"=200,
# "mES_MNase"=50,
"mES_MNase_MIDPOINT"=50)
# mixed, some experiment have one value, others have several
# elongationSize <- list("mES_H3K4me3"=c(NA,0),
# "mES_MNase"=c(146,NA),
# "mES_MNase_MIDPOINT"=NA)
# Compute without elongating reads (0), a fixed numeric value (not
# recommended), or estimate in-silico (or based on pairs) the
# optimal elongation (NA)
# rangeSelection <- list("mES_H3K4me3" =IRanges(0,-1),
# "mES_MNase"=c(IRanges(0,-1),
# IRanges(0,100),
# IRanges(100,1000)),
# "mES_MNase_MIDPOINT"=c(IRanges(0,-1),
# IRanges(0,100),
# IRanges(100,1000)))
#############################################
#### Actual runnable example on BAM file ####
#############################################
# Define a temporary directory where the example will run
exampleFolder <- tempdir()
# Get the path to the example BAM file and copy it (with the index)
testFileBAM_fileName <- "embedDataTest.bam"
testFileBAM_fullPath <- system.file("extdata",
testFileBAM_fileName,
package="Pasha")
file.copy(testFileBAM_fullPath, exampleFolder)
testFileBAI_fileName <- "embedDataTest.bam.bai"
testFileBAI_fullPath <- system.file("extdata",
testFileBAI_fileName,
package="Pasha")
file.copy(testFileBAI_fullPath, exampleFolder)
# Create the data structure containing information on the experiments
INPUTFilesList <- list()
INPUTFilesList[["testBAM"]] <- list(folderName=exampleFolder,
fileName=testFileBAM_fileName,
fileType="BAM",
chrPrefix="chr",
chrSuffix="",
pairedEnds=TRUE,
midPoint=FALSE)
# Start the pipeline using default parameters
processPipeline(INPUTFilesList)Run the code above in your browser using DataLab