#####################################################################################
##### simulate data
#####################################################################################
set.seed(1) # fix the seed to get a reproducible result
K <- 4 # number of datasets
p <- 100 # covariate dimension
s <- 5 # support size
q <- 7 # size of subset of covariates that can be non-zero for any task
n_k <- 50 # task sample size
N <- n_k * p # full dataset samplesize
X <- matrix( rnorm(N * p), nrow = N, ncol=p) # full design matrix
B <- matrix(1 + rnorm(K * (p+1) ), nrow = p + 1, ncol = K) # betas before making sparse
Z <- matrix(0, nrow = p, ncol = K) # matrix of supports
y <- vector(length = N) # outcome vector
# randomly sample support to make betas sparse
for(j in 1:K) Z[1:q, j] <- sample( c( rep(1,s), rep(0, q - s) ), q, replace = FALSE )
B[-1,] <- B[-1,] * Z # make betas sparse and ensure all models have an intercept
task <- rep(1:K, each = n_k) # vector of task labels (indices)
# iterate through and make each task specific dataset
for(j in 1:K){
indx <- which(task == j) # indices of task
e <- rnorm(n_k)
y[indx] <- B[1, j] + X[indx,] %*% B[-1,j] + e
}
colnames(B) <- paste0("beta_", 1:K)
rownames(B) <- paste0("X_", 1:(p+1))
print("Betas")
print(round(B[1:8,],2))
###########################
# custom tuning grid
###########################
grid <- data.frame(s = c(4, 4, 5, 5),
lambda_1 = c(0.01, 0.1, 0.01, 0.1),
lambda_2 = rep(0, 4),
lambda_z = c(0.01, 0.1, 0.01, 0.1))
#################################################
# cross validation with custom tuning grid
##################################################
if (FALSE) {
if (identical(Sys.getenv("AUTO_JULIA_INSTALL"), "true")) { ## The examples are quite time consuming
## Do initiation for and automatic installation if necessary
tn <- cv.smtl(y = y,
X = X,
study = task,
commonSupp = FALSE,
grid = grid,
nfolds = 5,
multiTask = FALSE)
# model fitting
mod <- sMTL::smtl(y = y,
X = X,
study = task,
s = tn$best.1se$s,
commonSupp = TRUE,
lambda_1 = tn$best.1se$lambda_1,
lambda_z = tn$best.1se$lambda_z)
######################################################
# cross validation with automatically generated grid
#######################################################
tn <- cv.smtl(y = y,
X = X,
study = task,
commonSupp = FALSE,
lambda_1 = TRUE,
lambda_w = FALSE,
lambda_z = TRUE,
nfolds = 5,
multiTask = FALSE)
# model fitting
mod <- sMTL::smtl(y = y,
X = X,
study = task,
s = tn$best.1se$s,
commonSupp = TRUE,
lambda_1 = tn$best.1se$lambda_1,
lambda_z = tn$best.1se$lambda_z)
print(round(mod$beta[1:8,],2))
}
}
Run the code above in your browser using DataLab