## Not run:
# ## ------------------------------------------------------------
# ## compare synthetic forests to regular forest (classification)
# ## ------------------------------------------------------------
#
# ## rfsrc and rfsrcSyn calls
# if (library("mlbench", logical.return = TRUE)) {
#
# ## simulate the data
# ring <- data.frame(mlbench.ringnorm(250, 20))
#
# ## classification forests
# ringRF <- rfsrc(classes ~., data = ring)
#
# ## synthetic forests:
# ## 1 = nodesize varied
# ## 2 = nodesize/mtry varied
# ringSyn1 <- rfsrcSyn(classes ~., data = ring)
# ringSyn2 <- rfsrcSyn(classes ~., data = ring, mtrySeq = c(1, 10, 20))
#
# ## test-set performance
# ring.test <- data.frame(mlbench.ringnorm(500, 20))
# pred.ringRF <- predict(ringRF, newdata = ring.test)
# pred.ringSyn1 <- rfsrcSyn(object = ringSyn1, newdata = ring.test)$rfSynPred
# pred.ringSyn2 <- rfsrcSyn(object = ringSyn2, newdata = ring.test)$rfSynPred
#
#
# print(pred.ringRF)
# print(pred.ringSyn1)
# print(pred.ringSyn2)
#
# }
#
# ## ------------------------------------------------------------
# ## compare synthetic forest to regular forest (regression)
# ## ------------------------------------------------------------
#
# ## simulate the data
# n <- 250
# ntest <- 1000
# N <- n + ntest
# d <- 50
# std <- 0.1
# x <- matrix(runif(N * d, -1, 1), ncol = d)
# y <- 1 * (x[,1] + x[,4]^3 + x[,9] + sin(x[,12]*x[,18]) + rnorm(n, sd = std)>.38)
# dat <- data.frame(x = x, y = y)
# test <- (n+1):N
#
# ## regression forests
# regF <- rfsrc(y ~ ., data = dat[-test, ], )
# pred.regF <- predict(regF, dat[test, ], importance = "none")
#
# ## synthetic forests
# ## we pass both the training and testing data
# ## but this can be split into separate commands as in the
# ## previous classification example
# synF1 <- rfsrcSyn(y ~ ., data = dat[-test, ],
# newdata = dat[test, ])
# synF2 <- rfsrcSyn(y ~ ., data = dat[-test, ],
# newdata = dat[test, ], mtrySeq = c(1, 10, 20, 30, 40, 50))
#
# ## standardized MSE performance
# mse <- c(tail(pred.regF$err.rate, 1),
# tail(synF1$rfSynPred$err.rate, 1),
# tail(synF2$rfSynPred$err.rate, 1)) / var(y[-test])
# names(mse) <- c("forest", "synthetic1", "synthetic2")
# print(mse)
#
# ## ------------------------------------------------------------
# ## multivariate synthetic forests
# ## ------------------------------------------------------------
#
# mtcars.new <- mtcars
# mtcars.new$cyl <- factor(mtcars.new$cyl)
# mtcars.new$carb <- factor(mtcars.new$carb, ordered = TRUE)
# trn <- sample(1:nrow(mtcars.new), nrow(mtcars.new)/2)
# mvSyn <- rfsrcSyn(cbind(carb, mpg, cyl) ~., data = mtcars.new[trn,])
# mvSyn.pred <- rfsrcSyn(object = mvSyn, newdata = mtcars.new[-trn,])
# ## End(Not run)
Run the code above in your browser using DataLab