# Example 1 (get p-value of small matrix with independent features using exact test)
suppressWarnings(require(doParallel))
# registerDoParallel(cores = 2)
X1 <- matrix(nrow = 5, ncol = 10, rbinom(50, 1, 0.5)) # binary matrix, small
getPValue(X1) # perform exact test with 5000 permutations
# should be larger than 0.05
# Example 2 (get p-value of high-dim matrix with independent features using asymptotic test)
X2 <- matrix(nrow = 10, ncol = 1000, rnorm(1e4)) # real matrix, large enough
getPValue(X2, p = 2, largeP = TRUE) # very fast
# should be larger than 0.05
# getPValue(X2, p = 2) # slower, do not run (Output: 0.5764)
# Example 3 (get p-value of high-dim matrix with partitionable features using exact test)
X3 <- matrix(nrow = 10, ncol = 1000, rbinom(1e4, 1, 0.5))
getPValue(X3, block_labels = rep(c(1,2,3,4,5), 200))
# Warning message: # there are features that have zero variation (i.e., all 0s or 1s)
# In getPValue(X3, block_labels = rep(c(1, 2, 3, 4, 5), 200)) :
# There exist columns with all ones or all zeros for binary X.
# Example 4 (get p-value of high-dim matrix with partitionable features using asymptotic test)
## This elaborate example generates binarized versions of time series data.
# Helper function to binarize a marker
# by converting z-scores to {0,1} based on
# standard normal quantiles
binarizeMarker <- function(x, freq, ploidy) {
if (ploidy == 1) {
return((x > qnorm(1-freq)) + 0)
} else if (ploidy == 2) {
if (x <= qnorm((1-freq)^2)) {
return(0)
} else if (x <= qnorm(1-freq^2)) {
return(1)
} else return(2)
} else {
cat("Specify valid ploidy number, 1 or 2")
}
}
getAutoRegArray <- function(B, N, maf_l = 0.38, maf_u = 0.5, rho = 0.5, ploid = 1) {
# get minor allele frequencies by sampling from uniform
mafs <- runif(B, min = maf_l, max = maf_u)
# get AR array
ar_array <- t(replicate(N, arima.sim(n = B, list(ar=rho))))
# theoretical column variance
column_var <- 1/(1-rho^2)
# rescale so that variance per marker is 1
ar_array <- ar_array / sqrt(column_var)
# rescale each column of AR array
for (b in 1:B) {
ar_array[,b] <- sapply(ar_array[,b],
binarizeMarker,
freq = mafs[b],
ploidy = ploid)
}
return(ar_array)
}
## Function to generate the data array with desired number of samples
getExHaplotypes <- function(N) {
array <- do.call("cbind",
lapply(1:50, function(x) {getAutoRegArray(N, B = 20)}))
return(array)
}
## Generate data and run test
X4 <- getExHaplotypes(10)
getPValue(X4, block_boundaries = seq(from = 1, to = 1000, by = 25), largeP = TRUE)
# stopImplicitCluster()
Run the code above in your browser using DataLab