## Collapse the coverage information
collapsedFull <- collapseFullCoverage(list(genomeData$coverage),
verbose = TRUE)
## Calculate library size adjustments
sampleDepths <- sampleDepth(collapsedFull, probs=c(0.5), verbose = TRUE)
## Build the models
group <- genomeInfo$pop
adjustvars <- data.frame(genomeInfo$gender)
models <- makeModels(sampleDepths, testvars = group, adjustvars = adjustvars)
## Preprocess the data
## Automatic chunksize used to then compare 1 vs 4 cores in the 'do not run'
## section
prep <- preprocessCoverage(genomeData, groupInfo = group, cutoff = 0,
scalefac = 32, chunksize = NULL, colsubset = NULL, mc.cores = 4)
## Get the F statistics
fstats <- genomeFstats
## We recommend determining the cutoff to use based on the F-distribution
## although you could also based it on the observed F-statistics.
## In this example we use a low cutoff used for illustrative purposes
cutoff <- 1
## Calculate the p-values and define the regions of interest.
regsWithP <- calculatePvalues(prep, models, fstats, nPermute=1, seeds=1,
chr = 'chr21', cutoff = cutoff, mc.cores = 1, method = 'regular')
regsWithP
## Calculate again, but with 10 permutations instead of just 1
regsWithP <- calculatePvalues(prep, models, fstats, nPermute=10, seeds=1:10,
chr='chr21', cutoff=cutoff, mc.cores=2, method='regular')
## Check that they are the same as the previously calculated regions
library(testthat)
expect_that(regsWithP, equals(genomeRegions))
## Histogram of the theoretical p-values by region
hist(pf(regsWithP$regions$value, df1-df0, n-df1), main='Distribution
original p-values by region', freq=FALSE)
## Histogram of the permutted p-values by region
hist(regsWithP$regions$pvalues, main='Distribution permutted p-values by
region', freq=FALSE)
## MA style plot
library('ggplot2')
ma <- data.frame(mean=regsWithP$regions$meanCoverage,
log2FoldChange=regsWithP$regions$log2FoldChangeYRIvsCEU)
ggplot(ma, aes(x=log2(mean), y=log2FoldChange)) + geom_point() +
ylab('Fold Change (log2)') + xlab('Mean coverage (log2)') +
labs(title='MA style plot')
## Annotate the results
library('bumphunter')
genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene)
annotation <- matchGenes(regsWithP$regions, genes)
head(annotation)
Run the code above in your browser using DataLab