# NOT RUN {
require(rbooster)
## n number of cases, p number of variables, k number of classes.
cv_sampler <- function(y, train_proportion) {
unlist(lapply(unique(y), function(m) sample(which(y==m), round(sum(y==m))*train_proportion)))
}
data_simulation <- function(n, p, k, train_proportion){
means <- seq(0, k*2.5, length.out = k)
x <- do.call(rbind, lapply(means,
function(m) matrix(data = rnorm(n = round(n/k)*p,
mean = m,
sd = 2),
nrow = round(n/k))))
y <- factor(rep(letters[1:k], each = round(n/k)))
train_i <- cv_sampler(y, train_proportion)
data <- data.frame(x, y = y)
data_train <- data[train_i,]
data_test <- data[-train_i,]
return(list(data = data,
data_train = data_train,
data_test = data_test))
}
### binary classification
dat <- data_simulation(n = 500, p = 2, k = 2, train_proportion = 0.8)
mm <- booster(x_train = dat$data_train[,1:2],
y_train = dat$data_train[,3],
classifier = "rpart",
method = "discrete",
x_test = dat$data_test[,1:2],
y_test = dat$data_test[,3],
weighted_bootstrap = FALSE,
max_iter = 100,
lambda = 1,
print_detail = TRUE,
print_plot = TRUE,
bag_frac = 1,
p_weak = 2)
## test prediction
mm$test_prediction
## or
pp <- predict(object = mm, newdata = dat$data_test[,1:2], type = "pred")
## test error
tail(mm$err_test, 1)
sum(dat$data_test[,3] != pp)/nrow(dat$data_test)
### multiclass classification
dat <- data_simulation(n = 800, p = 5, k = 3, train_proportion = 0.8)
mm <- booster(x_train = dat$data_train[,1:5],
y_train = dat$data_train[,6],
classifier = "rpart",
method = "real",
x_test = dat$data_test[,1:5],
y_test = dat$data_test[,6],
weighted_bootstrap = FALSE,
max_iter = 100,
lambda = 1,
print_detail = TRUE,
print_plot = TRUE,
bag_frac = 1,
p_weak = 2)
## test prediction
mm$test_prediction
## or
pp <- predict(object = mm, newdata = dat$data_test[,1:5], type = "pred", print_detail = TRUE)
## test error
tail(mm$err_test, 1)
sum(dat$data_test[,6] != pp)/nrow(dat$data_test)
### binary classification, custom classifier
dat <- data_simulation(n = 500, p = 10, k = 2, train_proportion = 0.8)
x <- dat$data[,1:10]
y <- dat$data[,11]
x_train <- dat$data_train[,1:10]
y_train <- dat$data_train[,11]
x_test <- dat$data_test[,1:10]
y_test <- dat$data_test[,11]
## a custom regression classifier function
classifier_lm <- function(x_train, y_train, weights, ...){
y_train_code <- c(-1,1)
y_train_coded <- sapply(levels(y_train), function(m) y_train_code[(y_train == m) + 1])
y_train_coded <- y_train_coded[,1]
model <- lm.wfit(x = as.matrix(cbind(1,x_train)), y = y_train_coded, w = weights)
return(list(coefficients = model$coefficients,
levels = levels(y_train)))
}
## predictor function
predictor_lm <- function(model, x_new, type = "pred", ...) {
coef <- model$coefficients
levels <- model$levels
fit <- as.matrix(cbind(1, x_new))%*%coef
probs <- 1/(1 + exp(-fit))
probs <- data.frame(probs, 1 - probs)
colnames(probs) <- levels
if (type == "pred") {
preds <- factor(levels[apply(probs, 1, which.max)], levels = levels, labels = levels)
return(preds)
}
if (type == "prob") {
return(probs)
}
}
## real AdaBoost
mm <- booster(x_train = x_train,
y_train = y_train,
classifier = classifier_lm,
predictor = predictor_lm,
method = "real",
x_test = x_test,
y_test = y_test,
weighted_bootstrap = FALSE,
max_iter = 50,
lambda = 1,
print_detail = TRUE,
print_plot = TRUE,
bag_frac = 0.5,
p_weak = 2)
## test prediction
mm$test_prediction
pp <- predict(object = mm, newdata = x_test, type = "pred", print_detail = TRUE)
## test error
tail(mm$err_test, 1)
sum(y_test != pp)/nrow(x_test)
## discrete AdaBoost
mm <- booster(x_train = x_train,
y_train = y_train,
classifier = classifier_lm,
predictor = predictor_lm,
method = "discrete",
x_test = x_test,
y_test = y_test,
weighted_bootstrap = FALSE,
max_iter = 50,
lambda = 1,
print_detail = TRUE,
print_plot = TRUE,
bag_frac = 0.5,
p_weak = 2)
## test prediction
mm$test_prediction
pp <- predict(object = mm, newdata = x_test, type = "pred", print_detail = TRUE)
## test error
tail(mm$err_test, 1)
sum(y_test != pp)/nrow(x_test)
# plot function can be used to plot errors
plot(mm)
# more examples are in vignette("booster", package = "rbooster")
# }
Run the code above in your browser using DataLab