# \donttest{
data("biomarkers")
x <- biomarkers[, !(names(biomarkers) %in% c("mucinous", "high_malignancy"))]
feature_nms <- names(x)
library("dplyr")
library("SuperLearner")
# do multiple imputation (with a small number for illustration only)
library("mice")
n_imp <- 2
set.seed(20231129)
mi_biomarkers <- mice::mice(data = biomarkers, m = n_imp, printFlag = FALSE)
imputed_biomarkers <- mice::complete(mi_biomarkers, action = "long") %>%
rename(imp = .imp, id = .id)
# set up a list to collect selected sets
all_selected_vars <- vector("list", length = 5)
for (i in 1:n_imp) {
# fit a Super Learner using simple library for illustration only
these_data <- imputed_biomarkers %>%
filter(imp == i)
this_y <- these_data$mucinous
this_x <- these_data %>%
select(starts_with("lab"), starts_with("cea"))
this_x_df <- as.data.frame(this_x)
fit <- SuperLearner::SuperLearner(Y = this_y, X = this_x_df,
SL.library = "SL.glm",
cvControl = list(V = 2),
family = "binomial")
# do extrinsic selection
all_selected_vars[[i]] <- extrinsic_selection(
fit = fit, feature_names = feature_nms, threshold = 5, import_type = "all"
)$selected
}
# perform extrinsic variable selection
selected_vars <- pool_selected_sets(sets = all_selected_vars, threshold = 1 / n_imp)
feature_nms[selected_vars]
# }
Run the code above in your browser using DataLab