if (FALSE) {
library(HMDA)
library(h2o)
hmda.init()
# Import a sample binary outcome dataset into H2O
train <- h2o.importFile(
"https://s3.amazonaws.com/h2o-public-test-data/smalldata/higgs/higgs_train_10k.csv")
test <- h2o.importFile(
"https://s3.amazonaws.com/h2o-public-test-data/smalldata/higgs/higgs_test_5k.csv")
# Identify predictors and response
y <- "response"
x <- setdiff(names(train), y)
# For binary classification, response should be a factor
train[, y] <- as.factor(train[, y])
test[, y] <- as.factor(test[, y])
params <- list(learn_rate = c(0.01, 0.1),
max_depth = c(3, 5, 9),
sample_rate = c(0.8, 1.0)
)
# Train and validate a cartesian grid of GBMs
hmda_grid1 <- hmda.grid(algorithm = "gbm", x = x, y = y,
grid_id = "hmda_grid1",
training_frame = train,
nfolds = 10,
ntrees = 100,
seed = 1,
hyper_params = params)
# compute weighted mean shap values
wmshap <- hmda.wmshap(models = hmda_grid1,
newdata = test,
performance_metric = "aucpr",
standardize_performance_metric = FALSE,
performance_type = "xval",
minimum_performance = 0,
method = "mean",
cutoff = 0.01,
plot = TRUE)
# define domains to combine their WMSHAP values
# =============================================
#
# There are different ways to specify a cluster of features or even
# a group of factors that touch on a broader domain. HMDA includes
# exploratory factor analysis procedure to help with this process
# (see ?hmda.efa function). Here, "assuming" that we have good reasons
# to combine some of the features under some clusters:
domains = list(Group1 = c("x22", "x18", "x14", "x1", "x10", "x4"),
Group2 = c("x25", "x23", "x6", "x27"),
Group3 = c("x28", "x26"))
hmda.domain(wmshap = wmshap,
plot = "bar",
domains = domains,
print = TRUE)
}
Run the code above in your browser using DataLab