# Automatic Feature Selection
# \donttest{
# split to train and external set
task = tsk("penguins")
split = partition(task, ratio = 0.8)
# create auto fselector
afs = auto_fselector(
fselector = fs("random_search"),
learner = lrn("classif.rpart"),
resampling = rsmp ("holdout"),
measure = msr("classif.ce"),
term_evals = 4)
# optimize feature subset and fit final model
afs$train(task, row_ids = split$train)
# predict with final model
afs$predict(task, row_ids = split$test)
# show result
afs$fselect_result
# model slot contains trained learner and fselect instance
afs$model
# shortcut trained learner
afs$learner
# shortcut fselect instance
afs$fselect_instance
# Nested Resampling
afs = auto_fselector(
fselector = fs("random_search"),
learner = lrn("classif.rpart"),
resampling = rsmp ("holdout"),
measure = msr("classif.ce"),
term_evals = 4)
resampling_outer = rsmp("cv", folds = 3)
rr = resample(task, afs, resampling_outer, store_models = TRUE)
# retrieve inner feature selection results.
extract_inner_fselect_results(rr)
# performance scores estimated on the outer resampling
rr$score()
# unbiased performance of the final model trained on the full data set
rr$aggregate()
# }
Run the code above in your browser using DataLab