# \donttest{
oldpar <- par(no.readonly = TRUE)
par(mar = rep(7, 4))
# Linear regression
set.seed(1)
simul <- SimulateRegression(n = 100, pk = 50, family = "gaussian")
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
family = "gaussian"
)
# Calibration plot
CalibrationPlot(stab)
# Extracting the results
summary(stab)
Stable(stab)
SelectionProportions(stab)
plot(stab)
# Using randomised LASSO
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
family = "gaussian", penalisation = "randomised"
)
plot(stab)
# Using adaptive LASSO
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
family = "gaussian", penalisation = "adaptive"
)
plot(stab)
# Using additional arguments from glmnet (e.g. penalty.factor)
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata, family = "gaussian",
penalty.factor = c(rep(1, 45), rep(0, 5))
)
head(coef(stab))
# Using CART
if (requireNamespace("rpart", quietly = TRUE)) {
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
implementation = CART,
family = "gaussian",
)
plot(stab)
}
# Regression with multivariate outcomes
set.seed(1)
simul <- SimulateRegression(n = 100, pk = 20, q = 3, family = "gaussian")
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
family = "mgaussian"
)
summary(stab)
# Logistic regression
set.seed(1)
simul <- SimulateRegression(n = 200, pk = 10, family = "binomial", ev_xy = 0.8)
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
family = "binomial"
)
summary(stab)
# Sparse PCA (1 component, see BiSelection for more components)
if (requireNamespace("elasticnet", quietly = TRUE)) {
set.seed(1)
simul <- SimulateComponents(pk = c(5, 3, 4))
stab <- VariableSelection(
xdata = simul$data,
Lambda = seq_len(ncol(simul$data) - 1),
implementation = SparsePCA
)
CalibrationPlot(stab, xlab = "")
summary(stab)
}
# Sparse PLS (1 outcome, 1 component, see BiSelection for more options)
if (requireNamespace("sgPLS", quietly = TRUE)) {
set.seed(1)
simul <- SimulateRegression(n = 100, pk = 50, family = "gaussian")
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
Lambda = seq_len(ncol(simul$xdata) - 1),
implementation = SparsePLS, family = "gaussian"
)
CalibrationPlot(stab, xlab = "")
SelectedVariables(stab)
}
# Group PLS (1 outcome, 1 component, see BiSelection for more options)
if (requireNamespace("sgPLS", quietly = TRUE)) {
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
Lambda = seq_len(5),
group_x = c(5, 5, 10, 20, 10),
group_penalisation = TRUE,
implementation = GroupPLS, family = "gaussian"
)
CalibrationPlot(stab, xlab = "")
SelectedVariables(stab)
}
# Example with more hyper-parameters: elastic net
set.seed(1)
simul <- SimulateRegression(n = 100, pk = 50, family = "gaussian")
TuneElasticNet <- function(xdata, ydata, family, alpha) {
stab <- VariableSelection(
xdata = xdata, ydata = ydata,
family = family, alpha = alpha, verbose = FALSE
)
return(max(stab$S, na.rm = TRUE))
}
myopt <- optimise(TuneElasticNet,
lower = 0.1, upper = 1, maximum = TRUE,
xdata = simul$xdata, ydata = simul$ydata,
family = "gaussian"
)
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
family = "gaussian", alpha = myopt$maximum
)
summary(stab)
enet <- SelectedVariables(stab)
# Comparison with LASSO
stab <- VariableSelection(xdata = simul$xdata, ydata = simul$ydata, family = "gaussian")
summary(stab)
lasso <- SelectedVariables(stab)
table(lasso, enet)
# Example using an external function: group-LASSO with gglasso
if (requireNamespace("gglasso", quietly = TRUE)) {
set.seed(1)
simul <- SimulateRegression(n = 200, pk = 20, family = "binomial")
ManualGridGroupLasso <- function(xdata, ydata, family, group_x, ...) {
# Defining the grouping
group <- do.call(c, lapply(seq_len(length(group_x)), FUN = function(i) {
rep(i, group_x[i])
}))
if (family == "binomial") {
ytmp <- ydata
ytmp[ytmp == min(ytmp)] <- -1
ytmp[ytmp == max(ytmp)] <- 1
return(gglasso::gglasso(xdata, ytmp, loss = "logit", group = group, ...))
} else {
return(gglasso::gglasso(xdata, ydata, lambda = lambda, loss = "ls", group = group, ...))
}
}
Lambda <- LambdaGridRegression(
xdata = simul$xdata, ydata = simul$ydata,
family = "binomial", Lambda_cardinal = 20,
implementation = ManualGridGroupLasso,
group_x = rep(5, 4)
)
GroupLasso <- function(xdata, ydata, Lambda, family, group_x, ...) {
# Defining the grouping
group <- do.call(c, lapply(seq_len(length(group_x)), FUN = function(i) {
rep(i, group_x[i])
}))
# Running the regression
if (family == "binomial") {
ytmp <- ydata
ytmp[ytmp == min(ytmp)] <- -1
ytmp[ytmp == max(ytmp)] <- 1
mymodel <- gglasso::gglasso(xdata, ytmp, lambda = Lambda, loss = "logit", group = group, ...)
}
if (family == "gaussian") {
mymodel <- gglasso::gglasso(xdata, ydata, lambda = Lambda, loss = "ls", group = group, ...)
}
# Extracting and formatting the beta coefficients
beta_full <- t(as.matrix(mymodel$beta))
beta_full <- beta_full[, colnames(xdata)]
selected <- ifelse(beta_full != 0, yes = 1, no = 0)
return(list(selected = selected, beta_full = beta_full))
}
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata,
implementation = GroupLasso, family = "binomial", Lambda = Lambda,
group_x = rep(5, 4),
group_penalisation = TRUE
)
summary(stab)
}
par(oldpar)
# }
Run the code above in your browser using DataLab