if (FALSE) {
library(bigstep)
### small data
set.seed(1)
n <- 200
p <- 20
X <- matrix(rnorm(n * p), ncol = p)
colnames(X) <- paste0("X", 1:p)
y <- 1 + 0.4 * rowSums(X[, c(5, 10, 15, 20)]) + rnorm(n)
data <- prepare_data(y, X)
results <- stepwise(data, crit = aic)
results$model
summary(results)
### bigger data
set.seed(1)
n <- 1e3
p <- 1e4
X <- matrix(rnorm(p * n), ncol = p)
colnames(X) <- paste0("X", 1:p)
Xadd <- matrix(rnorm(5 * n), n, 5) # additional variables
colnames(Xadd) <- paste0("Xadd", 1:5)
y <- 0.2 * rowSums(X[, 1000 * (1:10)]) + Xadd[, 1] - 0.1 * Xadd[, 3] + rnorm(n)
data <- prepare_data(y, X, Xadd = Xadd)
data %>%
reduce_matrix(minpv = 0.15) %>%
stepwise(mbic) ->
results
summary(results)
### big data
Xbig <- read.big.matrix("X.txt", sep = " ", header = TRUE,
backingfile = "X.bin", descriptorfile = "X.desc")
# Xbig <- attach.big.matrix("X.desc") # much faster
y <- read.table("y.txt")
# data <- prepare_data(y, Xbig) # slow because of checking NA
data <- prepare_data(y, Xbig, na = FALSE) # set if you know that you do not have NA
m <- data %>%
reduce_matrix(minpv = 0.001) %>%
fast_forward(crit = bic, maxf = 50) %>%
multi_backward(crit = mbic) %>%
stepwise(crit = mbic)
summary(m)
# more examples: type browseVignettes("bigstep")
}
Run the code above in your browser using DataLab