if (FALSE) {
## Load package:
library("diversityForest")
## Set seed to make results reproducible:
set.seed(1234)
## Construct interaction forests and calculate EIM values:
# Binary outcome:
data(zoo)
modelcat <- interactionfor(dependent.variable.name = "type", data = zoo,
num.trees = 20)
# Metric outcome:
data(stock)
modelcont <- interactionfor(dependent.variable.name = "company10", data = stock,
num.trees = 20)
# Survival outcome:
library("survival")
mgus2$id <- NULL # 'mgus2' data set is contained in the 'survival' package
# categorical variables need to be of factor format - important!!
mgus2$sex <- factor(mgus2$sex)
mgus2$pstat <- factor(mgus2$pstat)
# Remove the second time variable 'ptime':
mgus2$ptime <- NULL
# Remove missing values:
mgus2 <- mgus2[complete.cases(mgus2),]
# Take subset to make the calculations less computationally
# expensive for the example (in actual applications, we would of course
# use the whole data set):
mgus2sub <- mgus2[sample(1:nrow(mgus2), size=500),]
# Apply 'interactionfor':
modelsurv <- interactionfor(formula = Surv(futime, death) ~ ., data=mgus2sub, num.trees=20)
# NOTE: num.trees = 20 (in the above) would be much too small for practical
# purposes. This small number of trees was simply used to keep the
# runtime of the example short.
# The default number of trees is num.trees = 20000 if EIM values are calculated
# and num.trees = 2000 otherwise.
## Inspect the rankings of the variables and variable pairs with respect to
## the univariable, quantitative, and qualitative EIM values:
# Univariable EIM values:
modelcat$eim.univ.sorted
# Pairs with top quantitative EIM values:
modelcat$eim.quant.sorted[1:5]
# Pairs with top qualitative EIM values:
modelcat$eim.qual.sorted[1:5]
## Investigate visually the forms of the interaction effects of the variable pairs with
## largest quantitative and qualitative EIM values:
plot(modelcat)
plotEffects(modelcat, type="quant") # type="quant" is default.
plotEffects(modelcat, type="qual")
## Prediction:
# Separate 'zoo' data set randomly in training
# and test data:
data(zoo)
train.idx <- sample(nrow(zoo), 2/3 * nrow(zoo))
zoo.train <- zoo[train.idx, ]
zoo.test <- zoo[-train.idx, ]
# Construct interaction forest on training data:
# NOTE again: num.trees = 20 is specified too small for practical purposes.
modelcattrain <- interactionfor(dependent.variable.name = "type", data = zoo.train,
importance = "none", num.trees = 20)
# NOTE: Because we are only interested in prediction here, we do not
# calculate EIM values (by setting importance = "none"), because this
# speeds up calculations.
# Predict class values of the test data:
pred.zoo <- predict(modelcattrain, data = zoo.test)
# Compare predicted and true class values of the test data:
table(zoo.test$type, pred.zoo$predictions)
}
Run the code above in your browser using DataLab