## Alignment with BWA (sequentially on single machine)
param <- system.file("extdata", "bwa.param", package="systemPipeR")
targets <- system.file("extdata", "targets.txt", package="systemPipeR")
args <- systemArgs(sysma=param, mytargets=targets)
sysargs(args)[1]
## Not run:
# system("bwa index -a bwtsw ./data/tair10.fasta")
# bampaths <- runCommandline(args=args)
#
# ## Alignment with BWA (parallelized on compute cluster)
# resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", cores(args)), memory="10gb")
# reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01",
# resourceList=resources)
#
# ## Variant calling with GATK
# ## The following creates in the inital step a new targets file
# ## (targets_bam.txt). The first column of this file gives the paths to
# ## the BAM files created in the alignment step. The new targets file and the
# ## parameter file gatk.param are used to create a new SYSargs
# ## instance for running GATK. Since GATK involves many processing steps, it is
# ## executed by a bash script gatk_run.sh where the user can specify the
# ## detailed run parameters. All three files are expected to be located in the
# ## current working directory. Samples files for gatk.param and
# ## gatk_run.sh are available in the subdirectory ./inst/extdata/ of the
# ## source file of the systemPipeR package.
# writeTargetsout(x=args, file="targets_bam.txt")
# system("java -jar CreateSequenceDictionary.jar R=./data/tair10.fasta O=./data/tair10.dict")
# # system("java -jar /opt/picard/1.81/CreateSequenceDictionary.jar R=./data/tair10.fasta O=./data/tair10.dict")
# args <- systemArgs(sysma="gatk.param", mytargets="targets_bam.txt")
# resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", 1), memory="10gb")
# reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01",
# resourceList=resources)
# writeTargetsout(x=args, file="targets_gatk.txt")
#
# ## Variant calling with BCFtools
# ## The following runs the variant calling with BCFtools. This step requires in
# ## the current working directory the parameter file sambcf.param and the
# ## bash script sambcf_run.sh.
# args <- systemArgs(sysma="sambcf.param", mytargets="targets_bam.txt")
# resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", 1), memory="10gb")
# reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01",
# resourceList=resources)
# writeTargetsout(x=args, file="targets_sambcf.txt")
#
# ## Filtering of VCF files generated by GATK
# args <- systemArgs(sysma="filter_gatk.param", mytargets="targets_gatk.txt")
# filter <- "totalDepth(vr) >= 2 & (altDepth(vr) / totalDepth(vr) >= 0.8) & rowSums(softFilterMatrix(vr))==4"
# # filter <- "totalDepth(vr) >= 20 & (altDepth(vr) / totalDepth(vr) >= 0.8) & rowSums(softFilterMatrix(vr))==6"
# filterVars(args, filter, varcaller="gatk", organism="A. thaliana")
# writeTargetsout(x=args, file="targets_gatk_filtered.txt")
#
# ## Filtering of VCF files generated by BCFtools
# args <- systemArgs(sysma="filter_sambcf.param", mytargets="targets_sambcf.txt")
# filter <- "rowSums(vr) >= 2 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)"
# # filter <- "rowSums(vr) >= 20 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)"
# filterVars(args, filter, varcaller="bcftools", organism="A. thaliana")
# writeTargetsout(x=args, file="targets_sambcf_filtered.txt")
#
# ## Annotate filtered variants from GATK
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
# txdb <- loadDb("./data/tair10.sqlite")
# fa <- FaFile(systemPipeR::reference(args))
# variantReport(args=args, txdb=txdb, fa=fa, organism="A. thaliana")
#
# ## Annotate filtered variants from BCFtools
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt")
# txdb <- loadDb("./data/tair10.sqlite")
# fa <- FaFile(systemPipeR::reference(args))
# variantReport(args=args, txdb=txdb, fa=fa, organism="A. thaliana")
#
# ## Combine results from GATK
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
# combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous"))
# write.table(combineDF, "./results/combineDF_nonsyn_gatk.xls", quote=FALSE, row.names=FALSE, sep="\t")
#
# ## Combine results from BCFtools
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt")
# combineDF <- combineVarReports(args, filtercol=c(Consequence="nonsynonymous"))
# write.table(combineDF, "./results/combineDF_nonsyn_sambcf.xls", quote=FALSE, row.names=FALSE, sep="\t")
#
# ## Summary for GATK
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
# write.table(varSummary(args), "./results/variantStats_gatk.xls", quote=FALSE, col.names = NA, sep="\t")
#
# ## Summary for BCFtools
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt")
# write.table(varSummary(args), "./results/variantStats_sambcf.xls", quote=FALSE, col.names = NA, sep="\t")
#
# ## Venn diagram of variants
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_gatk_filtered.txt")
# varlist <- sapply(names(outpaths(args))[1:4], function(x) as.character(read.delim(outpaths(args)[x])$VARID))
# vennset_gatk <- overLapper(varlist, type="vennsets")
# args <- systemArgs(sysma="annotate_vars.param", mytargets="targets_sambcf_filtered.txt")
# varlist <- sapply(names(outpaths(args))[1:4], function(x) as.character(read.delim(outpaths(args)[x])$VARID))
# vennset_bcf <- overLapper(varlist, type="vennsets")
# vennPlot(list(vennset_gatk, vennset_bcf), mymain="", mysub="GATK: red; BCFtools: blue", colmode=2, ccol=c("blue", "red"))
# ## End(Not run)
Run the code above in your browser using DataLab