# NOT RUN {
##########################
##### Simulate Data ######
##########################
set.seed(1)
# create half of training dataset from 1 distribution
X1 <- matrix(rnorm(2000), ncol = 2) # design matrix - 2 covariates
B1 <- c(5, 10, 15) # true beta coefficients
y1 <- cbind(1, X1) %*% B1
# create 2nd half of training dataset from another distribution
X2 <- matrix(rnorm(2000, 1,2), ncol = 2) # design matrix - 2 covariates
B2 <- c(10, 5, 0) # true beta coefficients
y2 <- cbind(1, X2) %*% B2
X <- rbind(X1, X2)
y <- c(y1, y2)
study <- sample.int(10, 2000, replace = TRUE) # 10 studies
data <- data.frame( Study = study, Y = y, V1 = X[,1], V2 = X[,2] )
# create target study design matrix for covariate profile similarity weighting and
# accept/reject algorithm (covaraite-matched study strap)
target <- matrix(rnorm(1000, 3, 5), ncol = 2) # design matrix
colnames(target) <- c("V1", "V2")
##########################
##### Model Fitting #####
##########################
# Fit model with 1 Single-Study Learner (SSL): PCA Regression
arMod1 <- cmss(formula = Y ~.,
data = data,
target.study = target,
converge.lim = 10,
bag.size = length(unique(data$Study)),
max.straps = 50,
paths = 2,
ssl.method = list("pcr"),
ssl.tuneGrid = list(data.frame("ncomp" = 2))
)
# Fit model with 2 SSLs: Linear Regression and PCA Regression
# }
# NOT RUN {
arMod2 <- cmss(formula = Y ~.,
data = data,
target.study = target,
converge.lim = 20,
bag.size = length(unique(data$Study)),
max.straps = 50,
paths = 2,
ssl.method = list("lm", "pcr"),
ssl.tuneGrid = list(NA, data.frame("ncomp" = 2))
)
# }
# NOT RUN {
# Fit model with custom similarity function for
# accept/reject step and 2 custom function for Covariate
# Profile Similarity weights
# custom function for CPS
fn1 <- function(x1,x2){
return( abs( cor( colMeans(x1), colMeans(x2) )) )
}
fn2 <- function(x1,x2){
return( sum ( ( colMeans(x1) - colMeans(x2) )^2 ) )
}
# }
# NOT RUN {
arMod3 <- cmss(formula = Y ~.,
data = data,
target.study = target,
sim.fn = fn1,
customFNs = list(fn1, fn2),
converge.lim = 50,
bag.size = length(unique(data$Study)),
max.straps = 50,
paths = 2,
ssl.method = list("lm", "pcr"),
ssl.tuneGrid = list(NA, data.frame("ncomp" = 2))
)
# }
# NOT RUN {
#########################
##### Predictions ######
#########################
preds <- studyStrap.predict(arMod1, target)
# }
Run the code above in your browser using DataLab