# generate the data
# generate X
p <- 2
n <- 100
x <- data.frame(replicate(p, stats::runif(n, -1, 1)))
# apply the function to the x's
f <- function(x) 0.5 + 0.3*x[1] + 0.2*x[2]
smooth <- apply(x, 1, function(z) f(z))
# generate Y ~ Bernoulli (smooth)
y <- matrix(rbinom(n, size = 1, prob = smooth))
# set up a library for SuperLearner; note simple library for speed
library("SuperLearner")
learners <- c("SL.glm")
# using Y and X; use class-balanced folds
est_1 <- vim(y, x, indx = 2, type = "accuracy",
alpha = 0.05, run_regression = TRUE,
SL.library = learners, cvControl = list(V = 2),
stratified = TRUE)
# using pre-computed fitted values
set.seed(4747)
V <- 2
y_1 <- y[est_1$sample_splitting_folds == 1]
y_2 <- y[est_1$sample_splitting_folds == 2]
x_1 <- subset(x, est_1$sample_splitting_folds == 1)
x_2 <- subset(x, est_1$sample_splitting_folds == 2)
full_fit <- SuperLearner::SuperLearner(Y = y_1, X = x_1,
SL.library = learners,
cvControl = list(V = V))
full_fitted <- SuperLearner::predict.SuperLearner(full_fit)$pred
# fit the data with only X1
full_fit_2 <- SuperLearner::SuperLearner(Y = y_2, X = x_2,
SL.library = learners,
cvControl = list(V = V))
full_fitted_2 <- SuperLearner::predict.SuperLearner(full_fit_2)$pred
reduced_fit <- SuperLearner::SuperLearner(Y = full_fitted_2,
X = x_2[, -2, drop = FALSE],
SL.library = learners,
cvControl = list(V = V))
reduced_fitted <- SuperLearner::predict.SuperLearner(reduced_fit)$pred
est_2 <- vim(Y = y, f1 = full_fitted, f2 = reduced_fitted,
indx = 2, run_regression = FALSE, alpha = 0.05,
stratified = TRUE, type = "accuracy",
sample_splitting_folds = est_1$sample_splitting_folds)
Run the code above in your browser using DataLab