feature_importance: Feature Importance Plots

Description

This function calculates variable importance based on the drop in the Loss function after single-variable-perturbations. For this reason it is also called the Variable Dropout Plot.

Usage

feature_importance(x, ...)
# S3 method for explainer
feature_importance(x,
  loss_function = loss_root_mean_square, ..., type = "raw",
  n_sample = NULL)
# S3 method for default
feature_importance(x, data, y, predict_function,
  loss_function = loss_root_mean_square, ..., label = class(x)[1],
  type = "raw", n_sample = NULL)

Arguments

a model to be explained, or an explainer created with function `DALEX::explain()`.

...

other parameters

loss_function

a function thet will be used to assess variable importance

type

character, type of transformation that should be applied for dropout loss. 'raw' results raw drop lossess, 'ratio' returns drop_loss/drop_loss_full_model while 'difference' returns drop_loss - drop_loss_full_model

n_sample

number of observations that should be sampled for calculation of variable importance. If NULL then variable importance will be calculated on whole dataset (no sampling).

data

validation dataset, will be extracted from `x` if it's an explainer

true labels for `data`, will be extracted from `x` if it's an explainer

predict_function

predict function, will be extracted from `x` if it's an explainer

label

name of the model. By default it's extracted from the 'class' attribute of the model

Value

An object of the class 'feature_importance'. It's a data frame with calculated average response.

Details

Find more detailes in the Feature Importance Chapter.

References

Predictive Models: Visual Exploration, Explanation and Debugging https://pbiecek.github.io/PM_VEE

Examples

Run this code

# NOT RUN {
library("DALEX")
titanic <- na.omit(titanic)
model_titanic_glm <- glm(survived == "yes" ~ gender + age + fare,
                       data = titanic, family = "binomial")

explain_titanic_glm <- explain(model_titanic_glm,
                           data = titanic[,-9],
                           y = titanic$survived == "yes")

vd_rf <- feature_importance(explain_titanic_glm)
plot(vd_rf)

 
# }
# NOT RUN {
library("randomForest")

 titanic <- na.omit(titanic)
 model_titanic_rf <- randomForest(survived == "yes" ~ gender + age + class + embarked +
                                    fare + sibsp + parch,  data = titanic)
 explain_titanic_rf <- explain(model_titanic_rf,
                           data = titanic[,-9],
                           y = titanic$survived == "yes")

vd_rf <- feature_importance(explain_titanic_rf)
plot(vd_rf)

HR_rf_model <- randomForest(status~., data = HR, ntree = 100)
explainer_rf  <- explain(HR_rf_model, data = HR, y = HR$status)
vd_rf <- feature_importance(explainer_rf, type = "raw",
                            loss_function = loss_cross_entropy)
head(vd_rf)
plot(vd_rf)

HR_glm_model <- glm(status == "fired"~., data = HR, family = "binomial")
explainer_glm <- explain(HR_glm_model, data = HR, y = HR$status == "fired")
vd_glm <- feature_importance(explainer_glm, type = "raw",
                        loss_function = loss_root_mean_square)
head(vd_glm)
plot(vd_glm)

library("xgboost")
model_martix_train <- model.matrix(status == "fired" ~ . -1, HR)
data_train <- xgb.DMatrix(model_martix_train, label = HR$status == "fired")
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
              objective = "binary:logistic", eval_metric = "auc")
HR_xgb_model <- xgb.train(param, data_train, nrounds = 50)
explainer_xgb <- explain(HR_xgb_model, data = model_martix_train,
                     y = HR$status == "fired", label = "xgboost")
vd_xgb <- feature_importance(explainer_xgb, type = "raw")
head(vd_xgb)
plot(vd_xgb, vd_glm)
 
# }

Run the code above in your browser using DataLab