# Required libraries
library(mvnfast)
library(cellWise)
library(robustbase)
# Simulation parameters
n <- 50
p <- 100
rho.within <- 0.8
rho.between <- 0.2
p.active <- 20
group.size <- 5
snr <- 3
contamination.prop <- 0.1
# Setting the seed
set.seed(0)
# Block correlation structure
sigma.mat <- matrix(0, p, p)
sigma.mat[1:p.active, 1:p.active] <- rho.between
for(group in 0:(p.active/group.size - 1))
sigma.mat[(group*group.size+1):(group*group.size+group.size),
(group*group.size+1):(group*group.size+group.size)] <- rho.within
diag(sigma.mat) <- 1
# Simulation of beta vector
true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active))
# Setting the SD of the variance
sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr))
# Simulation of uncontaminated data
x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat)
y <- x %*% true.beta + rnorm(n, 0, sigma)
# Cellwise contamination
contamination_indices <- sample(1:(n * p), round(n * p * contamination.prop))
x_train <- x
x_train[contamination_indices] <- runif(length(contamination_indices), -10, 10)
# FSCRE Ensemble model
ensemble_fit <- srlars(x_train, y,
n_models = 5,
tolerance = 0.01,
robust = TRUE,
compute_coef = TRUE)
# Simulation of test data
m <- 50
x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat)
y_test <- x_test %*% true.beta + rnorm(m, 0, sigma)
# Prediction of test samples
# Default: Averaged prediction from all models
# Dynamic imputation is used by default if the model is robust
ensemble_preds <- predict(ensemble_fit, newx = x_test)
# Evaluate MSPE
mspe <- mean((y_test - ensemble_preds)^2) / sigma^2
print(paste("MSPE:", mspe))
Run the code above in your browser using DataLab