# \donttest{
# Data simulation
set.seed(1)
simul <- SimulateRegression(
n = 1000, pk = 20,
family = "binomial", ev_xy = 0.8
)
# Data split: selection, training and test set
ids <- Split(
data = simul$ydata,
family = "binomial",
tau = c(0.4, 0.3, 0.3)
)
xselect <- simul$xdata[ids[[1]], ]
yselect <- simul$ydata[ids[[1]], ]
xtrain <- simul$xdata[ids[[2]], ]
ytrain <- simul$ydata[ids[[2]], ]
xtest <- simul$xdata[ids[[3]], ]
ytest <- simul$ydata[ids[[3]], ]
# Stability selection
stab <- VariableSelection(
xdata = xselect,
ydata = yselect,
family = "binomial"
)
# Performances in test set of model refitted in training set
roc <- ExplanatoryPerformance(
xdata = xtrain, ydata = ytrain,
new_xdata = xtest, new_ydata = ytest,
stability = stab
)
plot(roc)
roc$AUC
# Alternative with multiple training/test splits
roc <- ExplanatoryPerformance(
xdata = rbind(xtrain, xtest),
ydata = c(ytrain, ytest),
stability = stab, K = 100
)
plot(roc)
boxplot(roc$AUC)
# Partial Least Squares Discriminant Analysis
if (requireNamespace("sgPLS", quietly = TRUE)) {
stab <- VariableSelection(
xdata = xselect,
ydata = yselect,
implementation = SparsePLS,
family = "binomial"
)
# Defining wrapping functions for predictions from PLS-DA
PLSDA <- function(xdata, ydata, family = "binomial") {
model <- mixOmics::plsda(X = xdata, Y = as.factor(ydata), ncomp = 1)
return(model)
}
PredictPLSDA <- function(xdata, model) {
xdata <- xdata[, rownames(model$loadings$X), drop = FALSE]
predicted <- predict(object = model, newdata = xdata)$predict[, 2, 1]
return(predicted)
}
# Performances with custom models
roc <- ExplanatoryPerformance(
xdata = rbind(xtrain, xtest),
ydata = c(ytrain, ytest),
stability = stab, K = 100,
implementation = PLSDA, prediction = PredictPLSDA
)
plot(roc)
}
# }
Run the code above in your browser using DataLab