## ------------------------------------------------------------
## compare synthetic forests to regular forest (classification)
## ------------------------------------------------------------
## rfsrc and rfsrcSyn calls
if (library("mlbench", logical.return = TRUE)) {
## simulate the data
ring <- data.frame(mlbench.ringnorm(250, 20))
## classification forests
ringRF <- rfsrc(classes ~., data = ring)
## synthetic forests:
## 1 = nodesize varied
## 2 = nodesize/mtry varied
ringSyn1 <- rfsrcSyn(classes ~., data = ring)
ringSyn2 <- rfsrcSyn(classes ~., data = ring, mtrySeq = c(1, 10, 20))
## test-set performance
ring.test <- data.frame(mlbench.ringnorm(500, 20))
print(predict(ringRF, newdata = ring.test))
print(rfsrcSyn(object = ringSyn1, newdata = ring.test))
print(rfsrcSyn(object = ringSyn2, newdata = ring.test))
}
## ------------------------------------------------------------
## compare synthetic forest to regular forest (regression)
## ------------------------------------------------------------
## simulate the data
n <- 250
ntest <- 1000
N <- n + ntest
d <- 50
std <- 0.1
x <- matrix(runif(N * d, -1, 1), ncol = d)
y <- 1 * (x[,1] + x[,4]^3 + x[,9] + sin(x[,12]*x[,18]) + rnorm(n, sd = std)>.38)
dat <- data.frame(x = x, y = y)
test <- (n+1):N
## regression forests
regF <- rfsrc(y ~ ., data = dat[-test, ], )
pred.regF <- predict(regF, dat[test, ])$predicted
## synthetic forests
## we pass both the training and testing data
## but this can be split into separate commands as in the
## previous classification example
synF1 <- rfsrcSyn(y ~ ., data = dat[-test, ],
newdata = dat[test, ])
synF2 <- rfsrcSyn(y ~ ., data = dat[-test, ],
newdata = dat[test, ], mtrySeq = c(1, 10, 20, 30, 40, 50))
## standardized MSE performance
mse <- c(mean((y[test] - pred.regF)^2, na.rm = TRUE),
mean((y[test] - synF1$predicted)^2, na.rm = TRUE),
mean((y[test] - synF2$predicted)^2, na.rm = TRUE)) / var(y[-test])
names(mse) <- c("forest", "synthetic1", "synthetic2")
print(mse)
Run the code above in your browser using DataLab