## Not run:
# ## simulate data
# set.seed(23432)
# ## training set
# n <- 500
# p <- 50
# X <- matrix(rnorm(n*p), nrow = n, ncol = p)
# colnames(X) <- paste("X", 1:p, sep="")
# X <- data.frame(X)
# Y <- X[, 1] + sqrt(abs(X[, 2] * X[, 3])) + X[, 2] - X[, 3] + rnorm(n)
#
# ## test set
# m <- 1000
# newX <- matrix(rnorm(m*p), nrow = m, ncol = p)
# colnames(newX) <- paste("X", 1:p, sep="")
# newX <- data.frame(newX)
# newY <- newX[, 1] + sqrt(abs(newX[, 2] * newX[, 3])) + newX[, 2] -
# newX[, 3] + rnorm(m)
#
# # generate Library and run Super Learner
# SL.library <- c("SL.glm", "SL.randomForest", "SL.gam",
# "SL.polymars", "SL.mean")
# test <- SuperLearner(Y = Y, X = X, newX = newX, SL.library = SL.library,
# verbose = TRUE, method = "method.NNLS")
# test
#
# # library with screening
# SL.library <- list(c("SL.glmnet", "All"), c("SL.glm", "screen.randomForest",
# "All", "screen.SIS"), "SL.randomForest", c("SL.polymars", "All"), "SL.mean")
# test <- SuperLearner(Y = Y, X = X, newX = newX, SL.library = SL.library,
# verbose = TRUE, method = "method.NNLS")
# test
#
# # binary outcome
# set.seed(1)
# N <- 200
# X <- matrix(rnorm(N*10), N, 10)
# X <- as.data.frame(X)
# Y <- rbinom(N, 1, plogis(.2*X[, 1] + .1*X[, 2] - .2*X[, 3] +
# .1*X[, 3]*X[, 4] - .2*abs(X[, 4])))
#
# SL.library <- c("SL.glmnet", "SL.glm", "SL.knn", "SL.gam", "SL.mean")
#
# # least squares loss function
# test.NNLS <- SuperLearner(Y = Y, X = X, SL.library = SL.library,
# verbose = TRUE, method = "method.NNLS", family = binomial())
# test.NNLS
#
# # negative log binomial likelihood loss function
# test.NNloglik <- SuperLearner(Y = Y, X = X, SL.library = SL.library,
# verbose = TRUE, method = "method.NNloglik", family = binomial())
# test.NNloglik
#
# # 1 - AUC loss function
# test.AUC <- SuperLearner(Y = Y, X = X, SL.library = SL.library,
# verbose = TRUE, method = "method.AUC", family = binomial())
# test.AUC
#
# # 2
# # adapted from library(SIS)
# set.seed(1)
# # training
# b <- c(2, 2, 2, -3*sqrt(2))
# n <- 150
# p <- 200
# truerho <- 0.5
# corrmat <- diag(rep(1-truerho, p)) + matrix(truerho, p, p)
# corrmat[, 4] = sqrt(truerho)
# corrmat[4, ] = sqrt(truerho)
# corrmat[4, 4] = 1
# cholmat <- chol(corrmat)
# x <- matrix(rnorm(n*p, mean=0, sd=1), n, p)
# x <- x
# feta <- x[, 1:4]
# fprob <- exp(feta) / (1 + exp(feta))
# y <- rbinom(n, 1, fprob)
#
# # test
# m <- 10000
# newx <- matrix(rnorm(m*p, mean=0, sd=1), m, p)
# newx <- newx
# newfeta <- newx[, 1:4]
# newfprob <- exp(newfeta) / (1 + exp(newfeta))
# newy <- rbinom(m, 1, newfprob)
#
# DATA2 <- data.frame(Y = y, X = x)
# newDATA2 <- data.frame(Y = newy, X=newx)
#
# create.SL.knn <- function(k = c(20, 30)) {
# for(mm in seq(length(k))){
# eval(parse(text = paste('SL.knn.', k[mm], '<- function(..., k = ', k[mm],
# ') SL.knn(..., k = k)', sep = '')), envir = .GlobalEnv)
# }
# invisible(TRUE)
# }
# create.SL.knn(c(20, 30, 40, 50, 60, 70))
#
# # library with screening
# SL.library <- list(c("SL.glmnet", "All"), c("SL.glm", "screen.randomForest"),
# "SL.randomForest", "SL.knn", "SL.knn.20", "SL.knn.30", "SL.knn.40",
# "SL.knn.50", "SL.knn.60", "SL.knn.70",
# c("SL.polymars", "screen.randomForest"))
# test <- SuperLearner(Y = DATA2$Y, X = DATA2[, -1], newX = newDATA2[, -1],
# SL.library = SL.library, verbose = TRUE, family = binomial())
# test
#
# ## examples with multicore
# set.seed(23432)
# ## training set
# n <- 500
# p <- 50
# X <- matrix(rnorm(n*p), nrow = n, ncol = p)
# colnames(X) <- paste("X", 1:p, sep="")
# X <- data.frame(X)
# Y <- X[, 1] + sqrt(abs(X[, 2] * X[, 3])) + X[, 2] - X[, 3] + rnorm(n)
#
# ## test set
# m <- 1000
# newX <- matrix(rnorm(m*p), nrow = m, ncol = p)
# colnames(newX) <- paste("X", 1:p, sep="")
# newX <- data.frame(newX)
# newY <- newX[, 1] + sqrt(abs(newX[, 2] * newX[, 3])) + newX[, 2] - newX[, 3] + rnorm(m)
#
# # generate Library and run Super Learner
# SL.library <- c("SL.glm", "SL.randomForest", "SL.gam",
# "SL.polymars", "SL.mean")
#
# testMC <- mcSuperLearner(Y = Y, X = X, newX = newX, SL.library = SL.library,
# method = "method.NNLS")
# testMC
#
# ## examples with snow
# library(parallel)
# cl <- makeCluster(2, type = "PSOCK") # can use different types here
# clusterSetRNGStream(cl, iseed = 2343)
# testSNOW <- snowSuperLearner(cluster = cl, Y = Y, X = X, newX = newX,
# SL.library = SL.library, method = "method.NNLS")
# testSNOW
# stopCluster(cl)
#
# ## snow example with user-generated wrappers
# # If you write your own wrappers and are using snowSuperLearner()
# # These new wrappers need to be added to the SuperLearner namespace and exported to the clusters
# # Using a simple example here, but can define any new SuperLearner wrapper
# my.SL.wrapper <- function(...) SL.glm(...)
# # assign function into SuperLearner namespace
# environment(my.SL.wrapper) <-asNamespace("SuperLearner")
#
# cl <- makeCluster(2, type = "PSOCK") # can use different types here
# clusterSetRNGStream(cl, iseed = 2343)
# clusterExport(cl, c("my.SL.wrapper")) # copy the function to all clusters
# testSNOW <- snowSuperLearner(cluster = cl, Y = Y, X = X, newX = newX,
# SL.library = c("SL.glm", "SL.mean", "my.SL.wrapper"), method = "method.NNLS")
# testSNOW
# stopCluster(cl)
#
# ## timing
# replicate(5, system.time(SuperLearner(Y = Y, X = X, newX = newX,
# SL.library = SL.library, method = "method.NNLS")))
#
# replicate(5, system.time(mcSuperLearner(Y = Y, X = X, newX = newX,
# SL.library = SL.library, method = "method.NNLS")))
#
# cl <- makeCluster(2, type = 'PSOCK')
# replicate(5, system.time(snowSuperLearner(cl, Y = Y, X = X, newX = newX,
# SL.library = SL.library, method = "method.NNLS")))
# stopCluster(cl)
#
# ## End(Not run)
Run the code above in your browser using DataLab