# Quick example
# Create random x with missing values
x = matrix(rnorm(300), ncol = 3)
x = x + rnorm(1) * x[,sample(1:3)] + rnorm(1) * x[,sample(1:3)]
x[sample(1:300, 30)] = NA
# Impute missing values
m_impute = impute.glmnet.matrix_fit(x, ncores = 2)
x_imputed = impute.glmnet.matrix(m_impute, x)
# Complete example (it might take some time even if the example is simple...)
# \donttest{
# Create random x (predictors) and y (binary)
x = matrix(rnorm(4000), ncol = 20)
x = x + rnorm(1) * x[,sample(1:20)] + rnorm(1) * x[,sample(1:20)]
y = 1 * (plogis(x[,1] - x[,2] + rnorm(200, 0, 0.1)) > 0.5)
# Make some x missing values
x[sample(1:4000, 400)] = NA
# Predict y via cross-validation, including imputations
fit_fun = function (x_training, y_training) {
m = list(
impute = impute.glmnet.matrix_fit(x_training, ncores = 1),
lasso = list()
)
x_imputed = impute.glmnet.matrix(m$impute, x_training)
for (imp in 1:length(x_imputed)) {
m$lasso[[imp]] = glmnet_fit(x_imputed[[imp]], y_training, family = "binomial")
}
m
}
predict_fun = function (m, x_test) {
x_imputed = impute.glmnet.matrix(m$impute, x_test)
y_pred = NULL
for (imp in 1:length(x_imputed)) {
y_pred = cbind(y_pred, glmnet_predict(m$lasso[[imp]], x_imputed[[imp]]))
}
apply(y_pred, 1, mean)
}
# Only 2 folds to ensure the example runs quickly
res = cv(x, y, family = "binomial", fit_fun = fit_fun, predict_fun = predict_fun, nfolds = 2)
# Show accuracy
se = mean(res$predictions$y.pred[res$predictions$y == 1] > 0.5)
sp = mean(res$predictions$y.pred[res$predictions$y == 0] < 0.5)
bac = (se + sp) / 2
cat("Sensitivity:", round(se, 2), "\n")
cat("Specificity:", round(sp, 2), "\n")
cat("Balanced accuracy:", round(bac, 2), "\n")
# }
Run the code above in your browser using DataLab