# NOT RUN {
# load data
data(airquality)
# remove observations with missing predictor variable values
airquality <- airquality[complete.cases(airquality), ]
# get number of observations and the response column index
n <- nrow(airquality)
response.col <- 1
# split data into training and test sets
train.ind <- sample(c("A", "B", "C"), n,
replace = TRUE, prob = c(0.8, 0.1, 0.1))
Xtrain <- airquality[train.ind == "A", -response.col]
Ytrain <- airquality[train.ind == "A", response.col]
Xtest1 <- airquality[train.ind == "B", -response.col]
Xtest2 <- airquality[train.ind == "C", -response.col]
# fit regression random forest to the training data
rf <- randomForest::randomForest(Xtrain, Ytrain, nodesize = 5,
ntree = 500,
keep.inbag = TRUE)
# estimate conditional mean squared prediction errors,
# biases, prediction intervals, and error distribution
# functions for the observations in Xtest1. return
# train_nodes to avoid recomputation in the next
# line of code.
output1 <- quantForestError(rf, Xtrain, Xtest1,
return_train_nodes = TRUE)
# estimate just the conditional mean squared prediction errors
# and prediction intervals for the observations in Xtest2.
# avoid recomputation by providing train_nodes from the
# previous line of code.
output2 <- quantForestError(rf, Xtrain, Xtest2,
what = c("mspe", "interval"),
train_nodes = output1$train_nodes)
# for illustrative purposes, convert response to categorical
Ytrain <- as.factor(Ytrain > 31.5)
# fit classification random forest to the training data
rf <- randomForest::randomForest(Xtrain, Ytrain, nodesize = 3,
ntree = 500,
keep.inbag = TRUE)
# estimate conditional misclassification rate of the
# predictions of Xtest1
output <- quantForestError(rf, Xtrain, Xtest1)
# }
Run the code above in your browser using DataLab