## Classification:
data(iris)
set.seed(71)
iris.rf <- randomForest(Species ~ ., data=iris, importance=TRUE,
proximity=TRUE)
print(iris.rf)
## Look at variable importance:
print(round(iris.rf$importance, 2))
## Do MDS on 1 - proximity:
library(mva)
iris.mds <- cmdscale(1 - iris.rf$proximity)
pairs(cbind(iris[,1:4], iris.mds), cex=0.6, gap=0.2,
col=c("red", "green", "blue")[codes(iris$Species)],
main="Iris Data: Predictors and MDS of Proximity Based on RandomForest")
## Examine the stress of MDS:
print( sum((as.dist(1 - iris.rf$proximity) - dist(iris.mds))^2) /
sum((as.dist(1 - iris.rf$proximity)^2)) )
## The `unsupervised' case:
set.seed(17)
iris.urf <- randomForest(iris[, -5], proximity=TRUE, outscale=TRUE)
## Look for Outliers:
plot(iris.urf$out, type="h", ylab="",
main="Measure of Outlyingness for Iris Data")
## Regression:
data(airquality)
set.seed(131)
ozone.rf <- randomForest(Ozone ~ ., data=airquality, mtry=3, importance=TRUE)
print(ozone.rf)
## Show "importance" of variables: higher value mean more important:
print(round(ozone.rf$importance, 2))
Run the code above in your browser using DataLab