## Generate simulated data with surrogate variables
## In practice, you would obtain mat from a real dataset, not simulate it.
set.seed(1)
n <- 10
p <- 1000
Z <- matrix(abs(rnorm(n, sd = 4)))
alpha <- matrix(abs(rnorm(p, sd = 1)))
mat <- round(2^(alpha %*% t(Z) + abs(matrix(rnorm(n * p, sd = 5),
nrow = p,
ncol = n))))
## Choose simulation parameters
design_perm <- cbind(rep(c(0, 1), length.out = n), runif(n))
coef_perm <- matrix(rnorm(p * ncol(design_perm), sd = 6), nrow = p)
## Specify one surrogate variable (number of columns in taget_cor),
## highly correlated with first observed covariate and uncorrelated
## with second observed covariate
target_cor <- matrix(c(0.9, 0))
## Thin
thout <- thin_diff(mat = mat,
design_perm = design_perm,
coef_perm = coef_perm,
target_cor = target_cor)
## target_cor approximates correlation between estimated surrogate variable
## and matching variable.
cor(thout$matching_var, thout$sv)
## Estimated surrogate variable is associated with true surrogate variable
## (because the signal is strong in this case)
plot(Z, thout$sv, xlab = "True SV", ylab = "Estimated SV")
## So target_cor approximates correlation between surrogate variable and
## matching variables
cor(thout$matching_var, Z)
## Correlation between permuted covariates and surrogate variables are less
## close to target_cor
cor(thout$designmat, Z)
## Estimated signal is correlated to true single. First variable is slightly
## biased because the surrogate variable is not included.
Ynew <- log2(t(thout$mat) + 0.5)
X <- thout$designmat
coef_est <- t(coef(lm(Ynew ~ X))[2:3, ])
plot(thout$coefmat[, 1], coef_est[, 1],
main = "First Variable",
xlab = "Coefficient",
ylab = "Estimated Coefficient")
abline(0, 1, col = 2, lwd = 2)
plot(thout$coefmat[, 2], coef_est[, 2],
main = "Second Variable",
xlab = "Coefficient",
ylab = "Estimated Coefficient")
abline(0, 1, col = 2, lwd = 2)
## But estimated coefficient of the first variable is slightly closer when
## the surrogate variable is included.
Ynew <- log2(t(thout$mat) + 0.5)
X <- cbind(thout$designmat, thout$sv)
coef_est <- t(coef(lm(Ynew ~ X))[2:3, ])
plot(thout$coefmat[, 1], coef_est[, 1],
main = "First Variable",
xlab = "Coefficient",
ylab = "Estimated Coefficient")
abline(0, 1, col = 2, lwd = 2)
plot(thout$coefmat[, 2], coef_est[, 2],
main = "Second Variable",
xlab = "Coefficient",
ylab = "Estimated Coefficient")
abline(0, 1, col = 2, lwd = 2)
Run the code above in your browser using DataLab