## not run
## NOTES: please remove comments to run, since importance and plot method will draw many graphics,
## use of option 'threads = 1' (disabling parallel processing) to speed up computing,
## since parallel processing is slower for small sample.
## 1 - Importance for Classification and Regression with formula
#### Classification
# data(iris)
# iris.ruf <- randomUniformForest(Species ~ ., data = iris, threads = 1)
## global importance: giving 2 ways to explain importance (table and visualization)
# summary(iris.ruf)
## much more about importance : giving 4 ways to explain importance in a complementary fashion
## of global importance
# iris.ruf.importance <- importance(iris.ruf, Xtest = iris, threads = 1)
## get importance summary
# iris.ruf.importance
## visualizing all in one
# plot(iris.ruf.importance, Xtest = iris)
## get details about observations (link between observations and features) :
## note that class and features are replaced by their internal numeric values.
## Retrieve model of observations importance
# iris.observationsImportance <-
# data.frame(iris.ruf.importance$localVariableImportance$obsVariableImportance)
## first impute true labels
## iris.observationsImportance$class <- as.factor(iris.observationsImportance$class)
# levels(iris.observationsImportance$class) <- iris.ruf$variables
## draw a sample of 10 observations to see which features affect the most every observation
## and the label associated (either from train or test sample)
# iris.observationsImportance[sample(nrow(iris), 10), ]
#### Regression
# data(airquality)
# airquality.data = airquality
## impute NA
# airquality.NAimputed <- fillNA2.randomUniformForest(airquality.data)
## compute model
# ozone.ruf <- randomUniformForest(Ozone ~ ., data = airquality.NAimputed, threads = 1)
# ozone.ruf
## print and plot (global) variable importance
# summary(ozone.ruf)
## much more about importance
# ozone.ruf.importance <- importance(ozone.ruf, Xtest = airquality.NAimputed, threads = 1)
## visualizing all in one : in case of formula, 'formulaInput' is needed for the 'plot' method
# plot(ozone.ruf.importance, Xtest = airquality.NAimputed, formulaInput = ozone.ruf$formula)
## 2- Importance for Classification and Regression without formula (more usual and recommended
# for random Uniform Forests)
#### Classification: "car evaluation" data (http://archive.ics.uci.edu/ml/datasets/Car+Evaluation)
# data(carEvaluation)
# car.data <- carEvaluation
# n <- nrow(car.data)
# p <- ncol(car.data)
# trainTestIdx <- cut(sample(1:n, n), 2, labels= FALSE)
## train examples
# car.data.train <- car.data[trainTestIdx == 1, -p]
# car.class.train <- as.factor(car.data[trainTestIdx == 1, p])
## test data
# car.data.test <- car.data[trainTestIdx == 2, -p]
# car.class.test <- as.factor(car.data[trainTestIdx == 2, p])
## compute model : train then test in the same function
# car.ruf <- randomUniformForest(car.data.train, car.class.train,
# xtest = car.data.test, ytest = car.class.test, threads = 1)
# car.ruf
## global importance: note that 'safety' does not appear to be an important feature in the barplot
## but in the table, it is, by far, the most important feature of unacceptable (unacc) cars.
# summary(car.ruf)
## interactions and local importance tell most of the story...
## (not run)
# car.ruf.importance <- importance(car.ruf, Xtest = car.data.train, threads = 1)
## ...that can be used to explain train data
# plot(car.ruf.importance, Xtest = car.data.train)
## or explain test data
# car.ruf.importance.test <- importance(car.ruf, Xtest = car.data.test, threads = 1)
# plot(car.ruf.importance.test, Xtest = car.data.test)
#### Regression : "Concrete Compressive Strength" data
## (http://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength)
# data(ConcreteCompressiveStrength)
# ConcreteCompressiveStrength.data = ConcreteCompressiveStrength
# n <- nrow(ConcreteCompressiveStrength.data)
# p <- ncol(ConcreteCompressiveStrength.data)
# trainTestIdx <- cut(sample(1:n, n), 2, labels= FALSE)
## train examples
# Concrete.data.train <- ConcreteCompressiveStrength.data[trainTestIdx == 1, -p]
# Concrete.responses.train <- ConcreteCompressiveStrength.data[trainTestIdx == 1, p]
## test data
# Concrete.data.test <- ConcreteCompressiveStrength.data[trainTestIdx == 2, -p]
# Concrete.responses.test <- ConcreteCompressiveStrength.data[trainTestIdx == 2, p]
## model
# Concrete.ruf <- randomUniformForest(Concrete.data.train, Concrete.responses.train,
# featureselectionrule = "L1", threads = 1)
# Concrete.ruf
## predictions : option ' type = "all" ' is needed to manually assess importance of a test set
# Concrete.ruf.pred <- predict(Concrete.ruf, Concrete.data.test, type = "all")
## more options about importance : more interactions
# Concrete.ruf.importance <- importance(Concrete.ruf, Xtest = Concrete.data.test,
# maxInteractions = 6, predObject = Concrete.ruf.pred, threads = 1)
## and more features
# plot(Concrete.ruf.importance, nLocalFeatures = 7, Xtest = Concrete.data.test)Run the code above in your browser using DataLab