# NOT RUN {
# We train a tree on the Boston dataset:
if(require("rpart")){
data("Boston", package = "MASS")
mod = rpart(medv ~ ., data = Boston)
# Compute the individual conditional expectations for the first feature
X = Boston[-which(names(Boston) == 'medv')]
y = Boston$medv
# Compute feature importances as the performance drop in mean absolute error
imp = feature.imp(mod, X, y, loss = 'mae')
# Plot the results directly
plot(imp)
# Since the result is a ggplot object, you can extend it:
library("ggplot2")
plot(imp) + theme_bw()
# If you want to do your own thing, just extract the data:
imp.dat = imp$data()
head(imp.dat)
ggplot(imp.dat, aes(x = ..feature, y = importance)) + geom_point() +
theme_bw()
# feature.imp() also works with multiclass classification.
# In this case, the importance measurement regards all classes
mod = rpart(Species ~ ., data= iris)
X = iris[-which(names(iris) == 'Species')]
y = iris$Species
# For some models we have to specify additional arguments for the predict function
imp = feature.imp(mod, X, y, loss = 'ce', predict.args = list(type = 'prob'))
plot(imp)
# Here we encounter the special case that the machine learning model perfectly predicts
# The importance becomes infinite
imp$data()
# For multiclass classification models, you can choose to only compute performance for one class.
# Make sure to adapt y
imp = feature.imp(mod, X, y == 'virginica', class = 3, loss = 'ce',
predict.args = list(type = 'prob'))
plot(imp)
}
# }
Run the code above in your browser using DataLab