# NOT RUN {
# Load some example data.
library(subsemble)
library(cvAUC) # >= version 1.0.1
data(admissions)
# Training data.
x <- subset(admissions, select = -c(Y))[1:400,]
y <- admissions$Y[1:400]
# Test data.
newx <- subset(admissions, select = -c(Y))[401:500,]
newy <- admissions$Y[401:500]
# Set up the Subsemble.
learner <- c("SL.randomForest", "SL.glm")
metalearner <- "SL.glm"
subsets <- 2
# Train and test the model.
# With learnControl$multiType="crossprod" (the default),
# we ensemble 4 models (2 subsets x 2 learners).
fit <- subsemble(x = x, y = y, newx = newx, family = binomial(),
learner = learner, metalearner = metalearner,
subsets = subsets)
# Evaulate the model by calculating AUC on the test set.
auc <- AUC(predictions = fit$pred, labels = newy)
print(auc) # Test set AUC is: 0.937
# We can also use the predict method to generate predictions on new data afterwards.
pred <- predict(fit, newx)
auc <- AUC(predictions = pred$pred, labels = newy)
print(auc) # Test set AUC is: 0.937
# Modify the learnControl argument and train/eval a new Subsemble.
# With learnControl$multiType="divisor",
# we ensemble only 2 models (one for each subset).
fit <- subsemble(x = x, y = y, newx = newx, family = binomial(),
learner = learner, metalearner = metalearner,
subsets = subsets,
learnControl = list(multiType = "divisor"))
auc <- AUC(predictions = fit$pred, labels = newy)
print(auc) # Test set AUC is: 0.922
# An example using a single learner.
# In this case there are 3 subsets and 1 learner,
# for a total of 3 models in the ensemble.
learner <- c("SL.randomForest")
metalearner <- "SL.glmnet"
subsets <- 3
fit <- subsemble(x = x, y = y, newx = newx, family = binomial(),
learner = learner, metalearner = metalearner,
subsets = subsets)
auc <- AUC(predictions = fit$pred, labels = newy)
print(auc) # Test set AUC is: 0.925
# An example using the full data (i.e. subsets = 1).
# Here, we have an ensemble of 2 models (one for each of the 2 learners).
# This is equivalent to the Super Learner algorithm.
learner <- c("SL.randomForest", "SL.glm")
metalearner <- "SL.glm"
subsets <- 1
fit <- subsemble(x = x, y = y, newx = newx, family = binomial(),
learner = learner, metalearner = metalearner,
subsets = subsets)
auc <- AUC(predictions = fit$pred, labels = newy)
print(auc) # Test set AUC is: 0.935
# Multicore subsemble via the "parallel" package.
# To perform the cross-validation and training steps using all available cores,
# use the parallel = "multicore" option.
# More examples and information at: https://github.com/ledell/subsemble
# }
Run the code above in your browser using DataLab