# NOT RUN {
## ------------------------------------------------------------
## regression example
## ------------------------------------------------------------
## grow the forest - request VIMP
reg.o <- rfsrc(mpg ~ ., mtcars)
## very small sample size so need largish subratio
reg.smp.o <- subsample(reg.o, B = 25, subratio = .5)
## plot confidence regions
plot.subsample(reg.smp.o)
## summary of results
print(reg.smp.o)
## extract subsampled VIMP
print(extract.subsample(reg.smp.o)$vmpS)
## extract unstandardized subsampled VIMP
print(extract.subsample(reg.smp.o, standardize=FALSE)$vmpS)
## now try the double bootstrap (slow!!)
reg.dbs.o <- subsample(reg.o, B = 25, bootstrap = TRUE)
print(reg.dbs.o)
plot.subsample(reg.dbs.o)
## ------------------------------------------------------------
## classification example
## ------------------------------------------------------------
## 3 non-linear, 15 linear, and 5 noise variables
if (library("caret", logical.return = TRUE)) {
d <- twoClassSim(1000, linearVars = 15, noiseVars = 5)
## VIMP based on (default) misclassification error
cls.o <- rfsrc(Class ~ ., d)
cls.smp.o <- subsample(cls.o, B = 100)
plot.subsample(cls.smp.o, cex.axis = .7)
## same as above, but with VIMP defined using normalized Brier score
cls.o2 <- rfsrc(Class ~ ., d, perf.type = "brier")
cls.smp.o2 <- subsample(cls.o2, B = 100)
plot.subsample(cls.smp.o2, cex.axis = .7)
}
## ------------------------------------------------------------
## class-imbalanced example
## uses q-classifier with G-mean VIMP
## ------------------------------------------------------------
if (library("caret", logical.return = TRUE)) {
## experimental settings
n <- 1000
q <- 20
ir <- 6
f <- as.formula(Class ~ .)
## simulate the data, create minority class data
d <- twoClassSim(n, linearVars = 15, noiseVars = q)
d$Class <- factor(as.numeric(d$Class) - 1)
idx.0 <- which(d$Class == 0)
idx.1 <- sample(which(d$Class == 1), sum(d$Class == 1) / ir , replace = FALSE)
d <- d[c(idx.0,idx.1),, drop = FALSE]
## q-classifier
oq <- imbalanced(Class ~ ., d, splitrule = "auc",
importance = TRUE, block.size = 10)
## subsample the q-classifier
smp.oq <- subsample(oq, B = 100)
plot(smp.oq, cex.axis = .7)
}
## ------------------------------------------------------------
## survival example
## ------------------------------------------------------------
data(pbc, package = "randomForestSRC")
srv.o <- rfsrc(Surv(days, status) ~ ., pbc)
srv.smp.o <- subsample(srv.o, B = 100)
plot(srv.smp.o)
## ------------------------------------------------------------
## competing risk example
## target event is death (event = 2)
## ------------------------------------------------------------
if (library("survival", logical.return = TRUE)) {
data(pbc, package = "survival")
pbc$id <- NULL
cr.o <- rfsrc(Surv(time, status) ~ ., pbc, splitrule = "logrank", cause = 2)
cr.smp.o <- subsample(cr.o, B = 100)
plot.subsample(cr.smp.o, target = 2)
}
## ------------------------------------------------------------
## multivariate family
## ------------------------------------------------------------
if (library("mlbench", logical.return = TRUE)) {
## simulate the data
data(BostonHousing)
bh <- BostonHousing
bh$rm <- factor(round(bh$rm))
o <- rfsrc(cbind(medv, rm) ~ ., bh)
so <- subsample(o)
plot(so)
plot(so, m.target = "rm")
}
## ------------------------------------------------------------
## largish data example - use rfsrc.fast for fast forests
## ------------------------------------------------------------
if (library("caret", logical.return = TRUE)) {
## largish data set
d <- twoClassSim(1000, linearVars = 15, noiseVars = 5)
## use a subsampled forest with Brier score performance
## remember to request forests in rfsrc.fast
o <- rfsrc.fast(Class ~ ., d, ntree = 100,
forest = TRUE, perf.type = "brier")
so <- subsample(o, B = 100)
plot.subsample(so, cex.axis = .7)
}
# }
Run the code above in your browser using DataLab