plot.feature_importance_explainer: Plots Variable Importance

Description

Function plot.feature_importance_explainer plots variable importance calculated as changes in the loss function after variable drops. It uses output from feature_importance function that corresponds to permutation based measure of variable importance. Variables are sorted in the same order in all panels. The order depends on the average drop out loss. In different panels variable contributions may not look like sorted if variable importance is different in different in different models.

Usage

# S3 method for feature_importance_explainer
plot(x, ..., max_vars = NULL,
  bar_width = 10)

Arguments

a variable dropout explainer produced with the 'feature_importance' function

...

other explainers that shall be plotted together

max_vars

maximum number of variables that shall be presented for for each model. By default NULL what means all variables

bar_width

width of bars. By default 10

Value

a ggplot2 object

Details

Find more details in the Feature Importance Chapter.

References

Predictive Models: Visual Exploration, Explanation and Debugging https://pbiecek.github.io/PM_VEE

Examples

Run this code

# NOT RUN {
library("DALEX")
# Toy examples, because CRAN angels ask for them
titanic <- na.omit(titanic)
model_titanic_glm <- glm(survived == "yes" ~ gender + age + fare,
                       data = titanic, family = "binomial")

explain_titanic_glm <- explain(model_titanic_glm,
                           data = titanic[,-9],
                           y = titanic$survived == "yes")
vd_rf <- feature_importance(explain_titanic_glm)
plot(vd_rf)

 
# }
# NOT RUN {
library("randomForest")

 model_titanic_rf <- randomForest(survived == "yes" ~ gender + age + class + embarked +
                                    fare + sibsp + parch,  data = titanic)
 explain_titanic_rf <- explain(model_titanic_rf,
                           data = titanic[,-9],
                           y = titanic$survived == "yes")

vd_rf <- feature_importance(explain_titanic_rf)
plot(vd_rf)

HR_rf_model <- randomForest(status~., data = HR, ntree = 100)
explainer_rf  <- explain(HR_rf_model, data = HR, y = HR$status)
vd_rf <- feature_importance(explainer_rf, type = "raw",
                            loss_function = loss_cross_entropy)
head(vd_rf)
plot(vd_rf)

HR_glm_model <- glm(status == "fired"~., data = HR, family = "binomial")
explainer_glm <- explain(HR_glm_model, data = HR, y = HR$status == "fired")
vd_glm <- feature_importance(explainer_glm, type = "raw",
                        loss_function = loss_root_mean_square)
head(vd_glm)
plot(vd_glm)

library("xgboost")
model_martix_train <- model.matrix(status == "fired" ~ . -1, HR)
data_train <- xgb.DMatrix(model_martix_train, label = HR$status == "fired")
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
              objective = "binary:logistic", eval_metric = "auc")
HR_xgb_model <- xgb.train(param, data_train, nrounds = 50)
explainer_xgb <- explain(HR_xgb_model, data = model_martix_train,
                     y = HR$status == "fired", label = "xgboost")
vd_xgb <- feature_importance(explainer_xgb, type = "raw")
head(vd_xgb)

plot(vd_glm, vd_xgb, bar_width = 5)
 
# }
# NOT RUN {
# }

Run the code above in your browser using DataLab