## Not run:
# ## Example of reading data from external big data file, fit lasso model, run
# ## cross validation
#
# # simulated design matrix, 1000 observations, 500,000 variables, ~ 5GB
# # there are 10 true variables with non-zero coefficient 2.
# xfname <- 'x_e3_5e5.txt'
# yfname <- 'y_e3_5e5.txt' # response vector
# time <- system.time(
# X <- setupX(xfname, sep = '\t')
# )
# print(time) # ~ 8 minutes; this is just one time operation
# dim(X)
# y <- as.matrix(read.table(yfname, header = F))
# time.fit <- system.time(
# fit <- biglasso(X, y, family = 'gaussian')
# )
# print(time.fit) # ~ 1 minute for fitting a lasso model
#
# # cross validation in parallel
# seed <- 1234
# time.cvfit <- system.time(
# cvfit <- cv.biglasso(X, y, family = 'gaussian', seed = seed, ncores = 5)
# )
# print(time.cvfit) # ~ 4 minutes for 10-fold cross validation
# plot(cvfit)
# summary(cvfit)
#
# # the big.matrix can be retrived by its descriptor file
# rm(list = ls())
# # the descriptor file was created after calling setupX(), and stored on the disk
# xdesc <- 'x_e3_5e5.desc'
# X <- attach.big.matrix(xdesc)
# dim(X)
# ## End(Not run)
Run the code above in your browser using DataLab