data("bodyfat", package = "mboost")
### fit linear model to data
model <- glmboost(DEXfat ~ ., data = bodyfat, center = TRUE)
### AIC-based selection of number of boosting iterations
maic <- AIC(model)
maic
### inspect coefficient path and AIC-based stopping criterion
par(mai = par("mai") * c(1, 1, 1, 1.8))
plot(model)
abline(v = mstop(maic), col = "lightgray")
### 10-fold cross-validation
cv10f <- cv(model.weights(model), type = "kfold")
cvm <- cvrisk(model, folds = cv10f, papply = lapply)
print(cvm)
mstop(cvm)
plot(cvm)
### 25 bootstrap iterations (manually)
set.seed(290875)
n <- nrow(bodyfat)
bs25 <- rmultinom(25, n, rep(1, n)/n)
cvm <- cvrisk(model, folds = bs25, papply = lapply)
print(cvm)
mstop(cvm)
plot(cvm)
### same by default
set.seed(290875)
cvrisk(model, papply = lapply)
### 25 bootstrap iterations (using cv)
set.seed(290875)
bs25_2 <- cv(model.weights(model), type="bootstrap")
all(bs25 == bs25_2)
### trees
blackbox <- blackboost(DEXfat ~ ., data = bodyfat)
cvtree <- cvrisk(blackbox, papply = lapply)
plot(cvtree)
### cvrisk in parallel modes:
## multicore only runs properly on unix systems
library("multicore")
cvrisk(model)
## infrastructure needs to be set up in advance
library("snow")
cl <- makePVMcluster(25) # e.g. to run cvrisk on 25 nodes via PVM
myApply <- function(X, FUN, cl, ...) {
clusterEvalQ(cl, library("mboost")) # load mboost on nodes
## further set up steps as required
clusterApplyLB(cl = cl, X, FUN, ...)
}
cvrisk(model, papply = myApply, cl = cl)
stopCluster(cl)
Run the code above in your browser using DataLab