# \donttest{
## ------------------------------------------------------------
## van de Vijver microarray breast cancer survival data
## high dimensional example
## ------------------------------------------------------------
data(vdv, package = "randomForestSRC")
o <- cv.varpro(Surv(Time, Censoring) ~ ., vdv)
print(o)
## ------------------------------------------------------------
## boston housing
## ------------------------------------------------------------
data(BostonHousing, package = "mlbench")
print(cv.varpro(medv~., BostonHousing))
## ------------------------------------------------------------
## boston housing - original/hot-encoded vimp
## ------------------------------------------------------------
## load the data
data(BostonHousing, package = "mlbench")
## convert some of the features to factors
Boston <- BostonHousing
Boston$zn <- factor(Boston$zn)
Boston$chas <- factor(Boston$chas)
Boston$lstat <- factor(round(0.2 * Boston$lstat))
Boston$nox <- factor(round(20 * Boston$nox))
Boston$rm <- factor(round(Boston$rm))
## make cv call
o <-cv.varpro(medv~., Boston)
print(o)
## importance original variables (default)
print(get.orgvimp(o, pretty = FALSE))
## importance for hot-encoded variables
print(get.vimp(o, pretty = FALSE))
## ------------------------------------------------------------
## multivariate regression example: boston housing
## vimp is collapsed across the outcomes
## ------------------------------------------------------------
data(BostonHousing, package = "mlbench")
print(cv.varpro(cbind(lstat, nox) ~., BostonHousing))
## ------------------------------------------------------------
## iris
## ------------------------------------------------------------
print(cv.varpro(Species~., iris))
## ------------------------------------------------------------
## friedman 1
## ------------------------------------------------------------
print(cv.varpro(y~., data.frame(mlbench::mlbench.friedman1(1000))))
##----------------------------------------------------------------
## class imbalanced problem
##
## - simulation example using the caret R-package
## - creates imbalanced data by randomly sampling the class 1 values
##
##----------------------------------------------------------------
if (library("caret", logical.return = TRUE)) {
## experimental settings
n <- 5000
q <- 20
ir <- 6
f <- as.formula(Class ~ .)
## simulate the data, create minority class data
d <- twoClassSim(n, linearVars = 15, noiseVars = q)
d$Class <- factor(as.numeric(d$Class) - 1)
idx.0 <- which(d$Class == 0)
idx.1 <- sample(which(d$Class == 1), sum(d$Class == 1) / ir , replace = FALSE)
d <- d[c(idx.0,idx.1),, drop = FALSE]
d <- d[sample(1:nrow(d)), ]
## cv.varpro call
print(cv.varpro(f, d))
}
## ------------------------------------------------------------
## pbc survival with rmst vector
## note that vimp is collapsed across the rmst values
## similar to mv-regression
## ------------------------------------------------------------
data(pbc, package = "randomForestSRC")
print(cv.varpro(Surv(days, status)~., pbc, rmst = c(500, 1000)))
## ------------------------------------------------------------
## peak VO2 with cutoff selected using fast option
## (a) C-index (default) (b) CRPS performance metric
## ------------------------------------------------------------
data(peakVO2, package = "randomForestSRC")
f <- as.formula(Surv(ttodead, died)~.)
## Harrel's C-index (default)
print(cv.varpro(f, peakVO2, ntree = 100, fast = TRUE))
## Harrel's C-index with smaller bootstrap
print(cv.varpro(f, peakVO2, ntree = 100, fast = TRUE, sampsize = 100))
## CRPS with smaller bootstrap
print(cv.varpro(f, peakVO2, crps = TRUE, ntree = 100, fast = TRUE, sampsize = 100))
## ------------------------------------------------------------
## largish data set: illustrates various options to speed up calculations
## ------------------------------------------------------------
## roughly impute the data
data(housing, package = "randomForestSRC")
housing2 <- roughfix(housing)
## use bigger nodesize
print(cv.varpro(SalePrice~., housing2, fast = TRUE, ntree = 50, nodesize = 150))
## use smaller bootstrap
print(cv.varpro(SalePrice~., housing2, fast = TRUE, ntree = 50, nodesize = 150, sampsize = 250))
# }
Run the code above in your browser using DataLab