data(H3K27Ac, package = "MAnorm2")
attr(H3K27Ac, "metaInfo")
## Fit a mean-variance curve treating each gender as a biological condition,
## and each individual (i.e., cell line) a replicate.
# \donttest{
# First perform the MA normalization and construct bioConds to represent
# individuals.
norm <- normalize(H3K27Ac, 4, 9)
norm <- normalize(norm, 5:6, 10:11)
norm <- normalize(norm, 7:8, 12:13)
conds <- list(GM12890 = bioCond(norm[4], norm[9], name = "GM12890"),
GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)
# Group individuals into bioConds based on their genders.
female <- cmbBioCond(conds[c(1, 3)], name = "female")
male <- cmbBioCond(conds[2], name = "male")
# The dependence of variance of ChIP-seq signal intensity across individuals
# on the mean signal intensity is typically not as regular as could be well
# modeled by an explicit parametric form. Better use the local regression to
# adaptively capture the mean-variance trend.
genders <- list(female = female, male = male)
genders1 <- fitMeanVarCurve(genders, method = "local", occupy.only = TRUE)
genders2 <- fitMeanVarCurve(genders, method = "local", occupy.only = FALSE)
# Suppose the local regression is performed using only the occupied genomic
# intervals as input. Good chances are that the extrapolation algorithm
# implemented in the regression method will produce over-estimated variances
# for the non-occupied intervals.
plotMeanVarCurve(genders1, subset = "all")
plotMeanVarCurve(genders2, subset = "all")
plotMeanVarCurve(genders1, subset = "non-occupied")
plotMeanVarCurve(genders2, subset = "non-occupied")
# On the other hand, applying the local regression on all genomic intervals
# may considerably reduce the estimated number of prior degrees of freedom
# associated with the fitted mean-variance curve, as ChIP-seq signals in the
# non-occupied intervals are generally of less data regularity compared with
# those in the occupied intervals.
summary(genders1$female)
summary(genders2$female)
# To split the difference, fit the mean-variance curve on all genomic
# intervals and re-estimate the number of prior degrees of freedom using
# only the occupied intervals, which is also the most recommended strategy
# in practice.
genders3 <- estimatePriorDf(genders2, occupy.only = TRUE)
plotMeanVarCurve(genders3, subset = "all")
plotMeanVarCurve(genders3, subset = "non-occupied")
summary(genders3$female)
# }
Run the code above in your browser using DataLab