# See https://github.com/fabsig/GPBoost/tree/master/R-package for more examples
# \donttest{
library(gpboost)
data(GPBoost_data, package = "gpboost")
n <- length(y)
param_grid <- list("learning_rate" = c(0.001, 0.01, 0.1, 1, 10),
"min_data_in_leaf" = c(1, 10, 100, 1000),
"max_depth" = c(-1),
"num_leaves" = 2^(1:10),
"lambda_l2" = c(0, 1, 10, 100),
"max_bin" = c(250, 500, 1000, min(n,10000)),
"line_search_step_length" = c(TRUE, FALSE))
# Note: "max_depth" = c(-1) means no depth limit as we tune 'num_leaves'.
# Can also additionally tune 'max_depth', e.g., "max_depth" = c(-1, 1, 2, 3, 5, 10)
metric = "mse" # Define metric
# Note: can also use metric = "test_neg_log_likelihood".
# See https://github.com/fabsig/GPBoost/blob/master/docs/Parameters.rst#metric-parameters
gp_model <- GPModel(group_data = group_data[,1], likelihood="gaussian")
data_train <- gpb.Dataset(data = X, label = y)
set.seed(1)
opt_params <- gpb.grid.search.tune.parameters(param_grid = param_grid,
data = data_train, gp_model = gp_model,
num_try_random = 100, nfold = 5,
nrounds = 1000, early_stopping_rounds = 20,
verbose_eval = 1, metric = metric, cv_seed = 4)
print(paste0("Best parameters: ",
paste0(unlist(lapply(seq_along(opt_params$best_params),
function(y, n, i) { paste0(n[[i]],": ", y[[i]]) },
y=opt_params$best_params,
n=names(opt_params$best_params))), collapse=", ")))
print(paste0("Best number of iterations: ", opt_params$best_iter))
print(paste0("Best score: ", round(opt_params$best_score, digits=3)))
# Alternatively and faster: using manually defined validation data instead of cross-validation
# use 20% of the data as validation data
valid_tune_idx <- sample.int(length(y), as.integer(0.2*length(y)))
folds <- list(valid_tune_idx)
opt_params <- gpb.grid.search.tune.parameters(param_grid = param_grid,
data = data_train, gp_model = gp_model,
num_try_random = 100, folds = folds,
nrounds = 1000, early_stopping_rounds = 20,
verbose_eval = 1, metric = metric, cv_seed = 4)
# }
Run the code above in your browser using DataLab