dataset <- do.call(
cbind,
c(sapply(paste0("col", 1:6), function(x) {
rnorm(n = 500)
},
USE.NAMES = TRUE,
simplify = FALSE
),
list(target = sample(0:1, 500, TRUE))
))
fold_list <- splitTools::create_folds(
y = dataset[, 7],
k = 3,
type = "stratified",
seed = 123
)
# GLM
glm_optimization <- mlexperiments::MLCrossValidation$new(
learner = LearnerGlm$new(),
fold_list = fold_list,
seed = 123
)
glm_optimization$learner_args <- list(family = binomial(link = "logit"))
glm_optimization$predict_args <- list(type = "response")
glm_optimization$performance_metric_args <- list(positive = "1")
glm_optimization$performance_metric <- metric("auc")
glm_optimization$return_models <- TRUE
# set data
glm_optimization$set_data(
x = data.matrix(dataset[, -7]),
y = dataset[, 7]
)
glm_cv_results <- glm_optimization$execute()
# KNN
knn_optimization <- mlexperiments::MLCrossValidation$new(
learner = LearnerKnn$new(),
fold_list = fold_list,
seed = 123
)
knn_optimization$learner_args <- list(
k = 3,
l = 0,
test = parse(text = "fold_test$x")
)
knn_optimization$predict_args <- list(type = "prob")
knn_optimization$performance_metric_args <- list(positive = "1")
knn_optimization$performance_metric <- metric("auc")
# set data
knn_optimization$set_data(
x = data.matrix(dataset[, -7]),
y = dataset[, 7]
)
cv_results_knn <- knn_optimization$execute()
# validate folds
validate_fold_equality(
list(glm_optimization, knn_optimization)
)
Run the code above in your browser using DataLab