### a simple two-dimensional example: cars data
cars.gb <- glmboost(dist ~ speed, data = cars,
control = boost_control(mstop = 5000),
center = FALSE)
cars.gb
### coefficients should coincide
coef(cars.gb) + c(cars.gb$offset, 0)
coef(lm(dist ~ speed, data = cars))
### plot fit
layout(matrix(1:2, ncol = 2))
plot(dist ~ speed, data = cars)
lines(cars$speed, predict(cars.gb), col = "red")
### now we center the design matrix for
### much quicker "convergence"
cars.gb_centered <- glmboost(dist ~ speed, data = cars,
control = boost_control(mstop = 2000),
center = TRUE)
par(mfrow=c(1,2))
plot(cars.gb, main="without centering")
plot(cars.gb_centered, main="with centering")
### alternative loss function: absolute loss
cars.gbl <- glmboost(dist ~ speed, data = cars,
control = boost_control(mstop = 5000),
family = Laplace())
cars.gbl
coef(cars.gbl) + c(cars.gbl$offset, 0)
lines(cars$speed, predict(cars.gbl), col = "green")
### Huber loss with adaptive choice of delta
cars.gbh <- glmboost(dist ~ speed, data = cars,
control = boost_control(mstop = 5000),
family = Huber())
lines(cars$speed, predict(cars.gbh), col = "blue")
legend("topleft", col = c("red", "green", "blue"), lty = 1,
legend = c("Gaussian", "Laplace", "Huber"), bty = "n")
### plot coefficient path of glmboost
par(mai = par("mai") * c(1, 1, 1, 2.5))
plot(cars.gb)
### sparse high-dimensional example
library("Matrix")
n <- 100
p <- 10000
ptrue <- 10
X <- Matrix(0, nrow = n, ncol = p)
X[sample(1:(n * p), floor(n * p / 20))] <- runif(floor(n * p / 20))
beta <- numeric(p)
beta[sample(1:p, ptrue)] <- 10
y <- drop(X %*% beta + rnorm(n, sd = 0.1))
mod <- glmboost(y = y, x = X, center = TRUE) ### mstop needs tuning
coef(mod, which = which(beta > 0))
Run the code above in your browser using DataLab