## ------------------------------------------------------------
## typical train/testing scenario
## ------------------------------------------------------------
data(veteran, package = "randomForestSRC")
train <- sample(1:nrow(veteran), round(nrow(veteran) * 0.80))
veteran.grow <- rfsrc(Surv(time, status) ~ ., veteran[train, ], ntree = 100)
veteran.pred <- predict(veteran.grow, veteran[-train , ])
print(veteran.grow)
print(veteran.pred)
## Not run:
# ## ------------------------------------------------------------
# ## predicted probability and predicted class labels are returned
# ## in the predict object for classification analyses
# ## ------------------------------------------------------------
#
# data(breast, package = "randomForestSRC")
# breast.obj <- rfsrc(status ~ ., data = breast[(1:100), ], nsplit = 10)
# breast.pred <- predict(breast.obj, breast[-(1:100), ])
# print(head(breast.pred$predicted))
# print(breast.pred$class)
#
# ## ------------------------------------------------------------
# ## example illustrating restore mode
# ## if predict is called without specifying the test data
# ## the original training data is used and the forest is restored
# ## ------------------------------------------------------------
#
# # first we make the grow call
# airq.obj <- rfsrc(Ozone ~ ., data = airquality)
#
# # now we restore it and compare it to the original call
# # they are identical
# predict(airq.obj)
# print(airq.obj)
#
# # we can retrieve various outputs that were not asked for in
# # in the original call
#
# #here we extract the proximity matrix
# prox <- predict(airq.obj, proximity = TRUE)$proximity
# print(prox[1:10,1:10])
#
# #here we extract the number of times a variable was used to grow
# #the grow forest
# var.used <- predict(airq.obj, var.used = "by.tree")$var.used
# print(head(var.used))
#
# ## ------------------------------------------------------------
# ## unique feature of randomForestSRC
# ## cross-validation can be used when factor labels differ over
# ## training and test data
# ## ------------------------------------------------------------
#
# # first we convert all x-variables to factors
# data(veteran, package = "randomForestSRC")
# veteran.factor <- data.frame(lapply(veteran, factor))
# veteran.factor$time <- veteran$time
# veteran.factor$status <- veteran$status
#
# # split the data into unbalanced train/test data (5/95)
# # the train/test data have the same levels, but different labels
# train <- sample(1:nrow(veteran), round(nrow(veteran) * .05))
# summary(veteran.factor[train,])
# summary(veteran.factor[-train,])
#
# # grow the forest on the training data and predict on the test data
# veteran.f.grow <- rfsrc(Surv(time, status) ~ ., veteran.factor[train, ])
# veteran.f.pred <- predict(veteran.f.grow, veteran.factor[-train , ])
# print(veteran.f.grow)
# print(veteran.f.pred)
#
# ## ------------------------------------------------------------
# ## example illustrating the flexibility of outcome = "test"
# ## illustrates restoration of forest via outcome = "test"
# ## ------------------------------------------------------------
#
# # first we make the grow call
# data(pbc, package = "randomForestSRC")
# pbc.grow <- rfsrc(Surv(days, status) ~ ., pbc, nsplit = 10)
#
# # now use predict with outcome = TEST
# pbc.pred <- predict(pbc.grow, pbc, outcome = "test")
#
# # notice that error rates are the same!!
# print(pbc.grow)
# print(pbc.pred)
#
# # note this is equivalent to restoring the forest
# pbc.pred2 <- predict(pbc.grow)
# print(pbc.grow)
# print(pbc.pred)
# print(pbc.pred2)
#
# # similar example, but with na.action = "na.impute"
# airq.obj <- rfsrc(Ozone ~ ., data = airquality, na.action = "na.impute")
# print(airq.obj)
# print(predict(airq.obj))
# # ... also equivalent to outcome="test" but na.action = "na.impute" required
# print(predict(airq.obj, airquality, outcome = "test", na.action = "na.impute"))
#
# # classification example
# iris.obj <- rfsrc(Species ~., data = iris)
# print(iris.obj)
# print(predict.rfsrc(iris.obj, iris, outcome = "test"))
#
# ## ------------------------------------------------------------
# ## another example illustrating outcome = "test"
# ## unique way to check reproducibility of the forest
# ## ------------------------------------------------------------
#
# # primary call
# set.seed(542899)
# data(pbc, package = "randomForestSRC")
# train <- sample(1:nrow(pbc), round(nrow(pbc) * 0.50))
# pbc.out <- rfsrc(Surv(days, status) ~ ., data=pbc[train, ],
# nsplit = 10)
#
# # standard predict call
# pbc.train <- predict(pbc.out, pbc[-train, ], outcome = "train")
# #non-standard predict call: overlays the test data on the grow forest
# pbc.test <- predict(pbc.out, pbc[-train, ], outcome = "test")
#
# # check forest reproducibilility by comparing "test" predicted survival
# # curves to "train" predicted survival curves for the first 3 individuals
# Time <- pbc.out$time.interest
# matplot(Time, t(exp(-pbc.train$chf)[1:3,]), ylab = "Survival", col = 1, type = "l")
# matlines(Time, t(exp(-pbc.test$chf)[1:3,]), col = 2)
#
# ## ------------------------------------------------------------
# ## survival analysis using mixed multivariate outcome analysis
# ## compare the predicted value to RSF
# ## ------------------------------------------------------------
#
# # fit the pbc data using RSF
# data(pbc, package = "randomForestSRC")
# rsf.obj <- rfsrc(Surv(days, status) ~ ., pbc, nsplit = 10)
# yvar <- rsf.obj$yvar
#
# # fit a mixed outcome forest using days and status as y-variables
# pbc.mod <- pbc
# pbc.mod$status <- factor(pbc.mod$status)
# mix.obj <- rfsrc(Multivar(days, status) ~., pbc.mod, nsplit = 10)
#
# # compare oob predicted values
# rsf.pred <- rsf.obj$predicted.oob
# mix.pred <- mix.obj$regrOutput$days$predicted.oob
# plot(rsf.pred, mix.pred)
#
# # compare C-index error rate
# rsf.err <- randomForestSRC:::cindex(yvar$days, yvar$status, rsf.pred)
# mix.err <- 1 - randomForestSRC:::cindex(yvar$days, yvar$status, mix.pred)
# cat("RSF :", rsf.err, "\n")
# cat("multivariate forest:", mix.err, "\n")
#
# ## End(Not run)
Run the code above in your browser using DataLab