## not run
## NOTER: please remove comments to run
#### Classification: "car evaluation" data (http://archive.ics.uci.edu/ml/datasets/Car+Evaluation)
# data(carEvaluation)
# car.data <- carEvaluation
# n <- nrow(car.data)
# p <- ncol(car.data)
# trainTestIdx <- cut(sample(1:n, n), 2, labels= FALSE)
## train examples
# car.data.train <- car.data[trainTestIdx == 1, -p]
# car.class.train <- as.factor(car.data[trainTestIdx == 1, p])
## test data
# car.data.test <- car.data[trainTestIdx == 2, -p]
# car.class.test <- as.factor(car.data[trainTestIdx == 2, p])
## compute model : train then test in the same function.
## Note that OOB error hardly works due to imbalanced classes.
## Handle it, using second upper Breiman's bound...
# car.ruf <- randomUniformForest(car.data.train, car.class.train,
# xtest = car.data.test, ytest = car.class.test)
# car.ruf
## get for example two most important features, using table of features
## we choose "buying" and "safety"
# summary(car.ruf)
## compute importance object (getting almost all objects wich could lead to a better explanation)
## with deepest level of interactions to get enough points
# car.ruf.importance <- importance.randomUniformForest(car.ruf,
# Xtest = car.data.train, maxInteractions = 8)
## compute and plot partial dependence between "buying" and "safety" on train data
## heatmap leads to clusters, dependence tells what happens there, class distributions
## tell how it happens and one has to connect these with 'importanceObject' figures
## trick : enlarge then reduce window to see figures
# pDbetweenPredictors.car.buyingAndSafety <- partialDependenceBetweenPredictors(car.data.train,
# car.ruf.importance, c("buying", "safety"), whichOrder = "all")
#### Regression : "Concrete Compressive Strength" data
## (http://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength)
# data(ConcreteCompressiveStrength)
# ConcreteCompressiveStrength.data = ConcreteCompressiveStrength
# n <- nrow(ConcreteCompressiveStrength.data)
# p <- ncol(ConcreteCompressiveStrength.data)
# trainTestIdx <- cut(sample(1:n, n), 2, labels= FALSE)
## train examples
# concrete.data.train <- ConcreteCompressiveStrength.data[trainTestIdx == 1, -p]
# concrete.responses.train <- ConcreteCompressiveStrength.data[trainTestIdx == 1, p]
## test data
# concrete.data.test <- ConcreteCompressiveStrength.data[trainTestIdx == 2, -p]
# concrete.responses.test <- ConcreteCompressiveStrength.data[trainTestIdx == 2, p]
## model
# concrete.ruf <- randomUniformForest(concrete.data.train, concrete.responses.train,
# featureselectionrule = "L1")
# concrete.ruf
## importance at the deepest level of interactions
# concrete.ruf.importance <- importance.randomUniformForest(concrete.ruf,
# Xtest = concrete.data.train, maxInteractions = 8)
## compute and plot partial dependence between "Age" and "Cement" on train data,
## without 3D representation and with filter upon outliers
# pDbetweenPredictors.concrete.cementAndwater <-
# partialDependenceBetweenPredictors(concrete.data.train,
# concrete.ruf.importance, c("Age", "Cement"), whichOrder = "all", outliersFilter = TRUE)
## compute and plot partial dependence between "Age" and "Cement" on train data,
## with 3D representation (slower)
# pDbetweenPredictors.concrete.cementAndwater <-
# partialDependenceBetweenPredictors(concrete.data.train,
# concrete.ruf.importance, c("Age", "Cement"), whichOrder = "all", perspective = TRUE)Run the code above in your browser using DataLab