##---------------------------
## Survival analysis
##---------------------------
#############################
# Fit a Cox proportional hazards model by CoxBoost by considering
# interactions after screening interactions by random forest
# system.time:
# user system elapsed
# 370.97 2.32 374.31
# For a faster run set repetitions down!
#############################
# Create survival data with interactions:
simulation <- simul.int(287578,n = 200, p = 500,
beta.int = 1.0,
beta.main = 0.9,
censparam = 1/20,
lambda = 1/20)
data <- simulation$data
# Showing True Effects:
simulation$info
# Perform the sprinter approach:
set.seed(123)
testcb <- sprinter( x=data[,1:500],
time = data$obs.time,
status= data$obs.status,
repetitions = 10,
mandatory = c("ID1","ID2"),
n.inter.candidates = 1000,
screen.main = fit.CoxBoost,
fit.final = fit.CoxBoost,
args.screen.main = list(seed=123,stepno = 10, K = 10,
criterion ='pscore', nu = 0.05),
parallel = FALSE)
summary(testcb)
##########
# Fit a Cox proportional hazards model by considering
# interactions after screening interactions by random forest
# and selecting relevant effects by univariate Cox regression:
# system.time:
# user system elapsed
# 374.50 1.53 376.68
# For a faster run set repetitions down!
##########
# Create survival data with interactions:
data <- simul.int(287578,n = 200, p = 500,
beta.int = 1.0,
beta.main = 0.9,
censparam = 1/20,
lambda = 1/20)[[1]]
# Perform the sprinter approach:
set.seed(123)
testunicox <- sprinter( x=data[,1:500],
time = data$obs.time,
status= data$obs.status,
repetitions = 10,
mandatory = c("ID1","ID2"),
n.inter.candidates = 1000,
screen.main = fit.uniCox,
fit.final = fit.uniCox,
parallel = FALSE)
summary(testunicox)
# true coefficients:
# ID1 ID2 ID5:ID6 ID7:ID8
# 0.9 -0.9 1 -1
##---------------------------
## Continuous outcome
##---------------------------
# selection of main effects by univariate generalized
# linear models and pre-selections of interactions
# by random forest:
sprinter.glm.rf.con <- sprinter( x=data[,1:500],
time = data$obs.time,
repetitions = 10,
mandatory = c("ID1","ID2"),
n.inter.candidates = 1000,
screen.main = fit.uniGlm,
fit.final = fit.uniGlm,
parallel = FALSE)
# selection of main effects by univariate generalized
# linear models and pre-selections of interactions
# by logic regression:
sprinter.glm.logicR.con <- sprinter( x=data[,1:500],
time = data$obs.time,
repetitions = 10,
mandatory = c("ID1","ID2"),
n.inter.candidates = 1000,
screen.main = fit.uniGlm,
screen.inter = fit.logicReg,
fit.final = fit.uniGlm,
args.screen.inter = list(type = 2),
parallel = FALSE)
# selection of main effects by GAMBoost
# and pre-selections of interactions
# by random forest:
sprinter.gamboost.rf.con <- sprinter( x=data[,1:500],
time = data$obs.time,
repetitions = 10,
mandatory = c("ID1","ID2"),
n.inter.candidates = 1000,
screen.main = fit.GAMBoost,
args.screen.main = list(stepno = 10),
fit.final = fit.GAMBoost,
parallel = FALSE)
##---------------------------
## Binary outcome
##---------------------------
x <- matrix(runif(200*500,min=-1,max=1),200,500)
colnames(x) <- paste('ID', 1:500, sep = '')
eta <- -0.5 + 2*x[,1] - 2*x[,3] + 2 * x[,3]*x[,4]
y <- rbinom(200,1,binomial()$linkinv(eta))
# selection of main effects by univariate generalized
# linear models and pre-selections of interactions
# by random forest:
sprinter.glm.rf.bin <- sprinter( x=x[,1:500],
time = y,
repetitions = 10,
mandatory = c("ID1","ID2"),
n.inter.candidates = 1000,
screen.main = fit.uniGlm,
fit.final = fit.uniGlm,
args.screen.main = list(family = binomial()),
parallel = FALSE)
# selection of main effects by univariate generalized
# linear models and pre-selections of interactions
# by logic regression:
sprinter.glm.logicR.bin <- sprinter( x=x[,1:500],
time = y,
repetitions = 10,
mandatory = c("ID1","ID2"),
n.inter.candidates = 1000,
screen.main = fit.uniGlm,
screen.inter = fit.logicReg,
fit.final = fit.uniGlm,
args.screen.inter = list(type = 3),
parallel = FALSE)
# selection of main effects by GAMBoost and pre-selection of
# interactions by random forest:
sprinter.GAMBoost.rf.bin <- sprinter( x=x,
time = y,
repetitions = 10,
mandatory = c("ID1","ID2"),
n.inter.candidates = 1000,
screen.main = fit.GAMBoost,
fit.final = fit.GAMBoost,
args.screen.main = list(family = binomial()),
parallel = FALSE)
Run the code above in your browser using DataLab