# \donttest{
## Linear regression
# Data simulation
set.seed(1)
simul <- SimulateRegression(n = 500, pk = 50, family = "gaussian")
# Training/test split
ids <- Split(data = simul$ydata, tau = c(0.8, 0.2))
# Stability selection
stab <- VariableSelection(
xdata = simul$xdata[ids[[1]], ],
ydata = simul$ydata[ids[[1]], ]
)
# Predictions from post stability selection estimation
yhat <- predict(stab,
xdata = simul$xdata[ids[[1]], ],
ydata = simul$ydata[ids[[1]], ],
newdata = simul$xdata[ids[[2]], ],
method = "refit"
)
cor(simul$ydata[ids[[2]], ], yhat)^2 # Q-squared
# Predictions from ensemble model
yhat <- predict(stab,
xdata = simul$xdata[ids[[1]], ],
ydata = simul$ydata[ids[[1]], ],
newdata = simul$xdata[ids[[2]], ],
method = "ensemble"
)
cor(simul$ydata[ids[[2]], ], yhat)^2 # Q-squared
## Logistic regression
# Data simulation
set.seed(1)
simul <- SimulateRegression(n = 500, pk = 20, family = "binomial", ev_xy = 0.9)
# Training/test split
ids <- Split(data = simul$ydata, family = "binomial", tau = c(0.8, 0.2))
# Stability selection
stab <- VariableSelection(
xdata = simul$xdata[ids[[1]], ],
ydata = simul$ydata[ids[[1]], ],
family = "binomial"
)
# Predictions from post stability selection estimation
yhat <- predict(stab,
xdata = simul$xdata[ids[[1]], ],
ydata = simul$ydata[ids[[1]], ],
newdata = simul$xdata[ids[[2]], ],
method = "refit", type = "response"
)
plot(ROC(predicted = yhat, observed = simul$ydata[ids[[2]], ]))
# Predictions from ensemble model
yhat <- predict(stab,
xdata = simul$xdata[ids[[1]], ],
ydata = simul$ydata[ids[[1]], ],
newdata = simul$xdata[ids[[2]], ],
method = "ensemble", type = "response"
)
plot(ROC(predicted = yhat, observed = simul$ydata[ids[[2]], ]),
add = TRUE,
col = "blue"
)
# }
Run the code above in your browser using DataLab