## ------------------------------------------------------------
## compare synthetic forests to regular forest (classification)
## ------------------------------------------------------------
## rfsrc and rfsrcSyn calls
if (library("mlbench", logical.return = TRUE)) {
ring <- data.frame(mlbench.ringnorm(250, 20))
ringRF <- rfsrc(classes ~., data = ring)
ringSyn <- rfsrcSyn(classes ~., data = ring)
## OOB performance
print(ringRF)
print(ringSyn)
}
## ------------------------------------------------------------
## compare MSE of synthetic forest to regular forest (regression)
## 1) m-fold validation
## 2) oob MSE
## ------------------------------------------------------------
## synthetic data
n <- 250
d <- 50
std <- 0.1
x <- matrix(runif(n * d, -1, 1), ncol = d)
y <- 1 * (x[,1] + x[,4]^3 + x[,9] + sin(x[,12]*x[,18]) + rnorm(n, sd = std)>.38)
dat <- data.frame(x = x, y = y)
## define the cv folds
nfolds <- 10
n <- nrow(dat)
cvFolds <- split(sample(1:n), rep(1:nfolds, length = n))
mse <- matrix(NA, nfolds, 2)
rownames(mse) <- paste("fold", 1:nfolds)
colnames(mse) <- c("forest", "synthetic")
## cv loop
for (k in 1:nfolds) {
## train/test split
cat("mfold:", k, "\n")
test <- cvFolds[[k]]
ytest <- dat$y[test]
## regular forest
regF <- rfsrc(y ~ ., data = dat[-test, ])
## synthetic forest
synF <- rfsrcSyn(y ~ ., data = dat[-test, ],
newdata = dat[test, ], verbose = FALSE)
## extract predicted values
pred.regF <- predict(regF, dat[test, ])$predicted
pred.synF <- synF$predicted
## MSE performance
mse[k, 1] <- mean((ytest - pred.regF)^2, na.rm = TRUE)
mse[k, 2] <- mean((ytest - pred.synF)^2, na.rm = TRUE)
}
## output MSE performance
mse <- rbind(mse,
avg = apply(mse, 2, mean, na.rm = TRUE),
se = sqrt(apply(mse, 2, var, na.rm = TRUE) / nfolds))
print(round(mse, 4))
## compare with OOB MSE
regF <- rfsrc(y ~ ., data = dat, ntree = 5000)
synF <- rfsrcSyn(y ~ ., data = dat, ntree = 5000)
cat("forest MSE:", regF$err.rate[5000], "\n")
cat("synthetic MSE:", synF$err.rate, "\n")
Run the code above in your browser using DataLab