# See https://github.com/fabsig/GPBoost/tree/master/R-package for more examples
# \donttest{
library(gpboost)
data(GPBoost_data, package = "gpboost")
# Create random effects model, dataset, and define parameter grid
gp_model <- GPModel(group_data = group_data[,1], likelihood="gaussian")
dataset <- gpb.Dataset(X, label = y)
param_grid = list("learning_rate" = c(1,0.1,0.01),
"min_data_in_leaf" = c(10,100,1000),
"max_depth" = c(1,2,3,5,10),
"lambda_l2" = c(0,1,10))
other_params <- list(num_leaves = 2^10)
# Note: here we try different values for 'max_depth' and thus set 'num_leaves' to a large value.
# An alternative strategy is to impose no limit on 'max_depth',
# and try different values for 'num_leaves' as follows:
# param_grid = list("learning_rate" = c(1,0.1,0.01),
# "min_data_in_leaf" = c(10,100,1000),
# "num_leaves" = 2^(1:10),
# "lambda_l2" = c(0,1,10))
# other_params <- list(max_depth = -1)
set.seed(1)
opt_params <- gpb.grid.search.tune.parameters(param_grid = param_grid, params = other_params,
num_try_random = NULL, nfold = 4,
data = dataset, gp_model = gp_model,
use_gp_model_for_validation=TRUE, verbose_eval = 1,
nrounds = 1000, early_stopping_rounds = 10)
print(paste0("Best parameters: ",
paste0(unlist(lapply(seq_along(opt_params$best_params),
function(y, n, i) { paste0(n[[i]],": ", y[[i]]) },
y=opt_params$best_params,
n=names(opt_params$best_params))), collapse=", ")))
print(paste0("Best number of iterations: ", opt_params$best_iter))
print(paste0("Best score: ", round(opt_params$best_score, digits=3)))
# Note: other scoring / evaluation metrics can be chosen using the
# 'metric' argument, e.g., metric = "l1"
# Using manually defined validation data instead of cross-validation
valid_tune_idx <- sample.int(length(y), as.integer(0.2*length(y)))
folds = list(valid_tune_idx)
opt_params <- gpb.grid.search.tune.parameters(param_grid = param_grid, params = other_params,
num_try_random = NULL, folds = folds,
data = dataset, gp_model = gp_model,
use_gp_model_for_validation=TRUE, verbose_eval = 1,
nrounds = 1000, early_stopping_rounds = 10)
# }
Run the code above in your browser using DataLab