# Background: Similar expression (activity) patterns of genes indicate that
# those genes share a common function. It is thus possible to infer the function
# of genes where it is currently unknown, or to learn about pathways involving
# multiple genes working in concert. Hopefully this knowledge can then be used
# to develop better crops, new treatments to disease, and so on.
# In this example we use the DDP test to find pairs of genes with associated
# expression patterns in the data of Hughes et al. (2000). Specifically, we will
# use the 3x3 DDP test variant based on summation of the likelihood ratio.
data(hughes)
nr.genes = nrow(hughes)
nr.samples = ncol(hughes)
# We want to test each pair of genes for association.
nr.tests = choose(nr.genes, 2)
# Estimate the critical test statistic value by Monte-Carlo simulation of the
# null distribution. We use enough null replicates in order to ensure accurate
# estimation.
bonferroni.level = 0.05 / nr.tests
nr.null.replicates = 100 / bonferroni.level
if (0) {
# This takes a while to compute but only needs to be done once (and is easy to
# parallelize over many CPUs).
ddp.nullsim = rep(Inf, nr.null.replicates)
x = 1:nr.samples # it is enough to permute the y vector
for (i in 2:nr.null.replicates) {
y = sample(nr.samples)
ddp.nullsim[i] = xdp.test(x, y, variant = 'DDP', K = 3)$sum.lr
}
ddp.critical.value = quantile(ddp.nullsim, probs = 1 - bonferroni.level)
} else {
# In the interest of saving time, this is what we get
# NOTE: Big Monte-Carlo simulated null distributions are available (see documentation)
ddp.critical.value = 0.01569162
}
# Now we compute the observed test statistics for all pairs
compute.xdp.for.pair = function(pair.idxs) {
x = as.numeric(hughes[pair.idxs[1], ])
y = as.numeric(hughes[pair.idxs[2], ])
return (xdp.test(x, y, variant = 'DDP', K = 3)$sum.lr)
}
ddp.obs = combn(nr.genes, 2, compute.xdp.for.pair, simplify = TRUE)
# And identify any significantly associated pairs (at the conservative
# Bonferroni level maintaining the FWER at 0.05)
ddp.significant = (ddp.obs > ddp.critical.value)
summary(ddp.significant)
# Result: There are 26 pairs out of the 45 performed here, where there is
# evidence of associated patterns! Had we analyzed this data with Pearson or
# Spearman correlation based testing, we would find only a small fraction of
# these associated pairs (see Kaufman et al., 2013).
Run the code above in your browser using DataLab