## Quick example for multivariate linear model with multivariate response
# Simulate data
n <- 100
p <- 10; q <- 5
set.seed(123456)
x <- matrix(rnorm(n * p, sd = rep(1:p, each = n)), nrow = n, ncol = p)
e <- matrix(rnorm(n * q, sd = rep(q:1, each = n)), nrow = n, ncol = q)
beta <- matrix(((1:p - 1) / p)^2, nrow = p, ncol = q)
y <- x %*% beta + e
# Fit lasso (model with intercept, the default)
cv_glmnet(x = x, y = y, alpha = "lasso", cv_verbose = TRUE)$beta_hat
# \donttest{
## Multivariate linear model with multivariate response
# Simulate data
n <- 100
p <- 10; q <- 5
set.seed(123456)
x <- matrix(rnorm(n * p, sd = rep(1:p, each = n)), nrow = n, ncol = p)
e <- matrix(rnorm(n * q, sd = rep(q:1, each = n)), nrow = n, ncol = q)
beta <- matrix(((1:p - 1) / p)^2, nrow = p, ncol = q)
y <- x %*% beta + e
# Fit ridge
cv0 <- cv_glmnet(x = x, y = y, alpha = "ridge", intercept = FALSE,
cv_verbose = TRUE)
cv0$beta_hat
# Same fit for the chosen lambda
cv_glmnet(x = x, y = y, alpha = "ridge", lambda = cv0$lambda,
intercept = FALSE)$beta_hat
# Fit lasso (model with intercept, the default)
cv1 <- cv_glmnet(x = x, y = y, alpha = "lasso", cv_verbose = TRUE)
cv1$beta_hat
# Use cv_1se = FALSE
cv1_min <- cv_glmnet(x = x, y = y, alpha = "lasso", cv_verbose = TRUE,
cv_1se = FALSE)
# Compare it with ridge analytical solution. Observe the factor in lambda,
# necessary since lambda is searched for the standardized data. Note also
# that, differently to the case q = 1, no standardarization with respect to
# y happens
sd_x <- apply(x, 2, function(x) sd(x)) * sqrt((n - 1) / n)
cv_glmnet(x = x, y = y, alpha = "ridge", lambda = cv0$lambda,
thresh = 1e-20, intercept = FALSE)$beta_hat
solve(crossprod(x) + diag(cv0$lambda * sd_x^2 * n)) %*% t(x) %*% y
# If x is standardized, the match between glmnet and usual ridge
# analytical expression does not require scaling of lambda
x_std <- scale(x, scale = sd_x, center = TRUE)
cv_glmnet(x = x_std, y = y, alpha = "ridge", lambda = cv0$lambda,
intercept = FALSE, thresh = 1e-20)$beta_hat
solve(crossprod(x_std) + diag(rep(cv0$lambda * n, p))) %*% t(x_std) %*% y
## Simple linear model
# Simulate data
n <- 100
p <- 1; q <- 1
set.seed(123456)
x <- matrix(rnorm(n * p), nrow = n, ncol = p)
e <- matrix(rnorm(n * q), nrow = n, ncol = q)
beta <- 2
y <- x * beta + e
# Fit by ridge (model with intercept, the default)
cv0 <- cv_glmnet(x = x, y = y, alpha = "ridge", cv_verbose = TRUE)
cv0$beta_hat
cv0$intercept
# Comparison with linear model with intercept
lm(y ~ 1 + x)$coefficients
# Fit by ridge (model without intercept)
cv0 <- cv_glmnet(x = x, y = y, alpha = "ridge", cv_verbose = TRUE,
intercept = FALSE)
cv0$beta_hat
# Comparison with linear model without intercept
lm(y ~ 0 + x)$coefficients
# Same fit for the chosen lambda (and without intercept)
cv_glmnet(x = x, y = y, alpha = "ridge", lambda = cv0$lambda,
intercept = FALSE)$beta_hat
# Same for lasso (model with intercept, the default)
cv1 <- cv_glmnet(x = x, y = y, alpha = "lasso")
cv1$beta_hat
## Multivariate linear model (p = 3, q = 1)
# Simulate data
n <- 50
p <- 10; q <- 1
set.seed(123456)
x <- matrix(rnorm(n * p, mean = 1, sd = rep(1:p, each = n)),
nrow = n, ncol = p)
e <- matrix(rnorm(n * q), nrow = n, ncol = q)
beta <- ((1:p - 1) / p)^2
y <- x %*% beta + e
# Fit ridge (model without intercept)
cv0 <- cv_glmnet(x = x, y = y, alpha = "ridge", intercept = FALSE,
cv_verbose = TRUE)
cv0$beta_hat
# Same fit for the chosen lambda
cv_glmnet(x = x, y = y, alpha = "ridge", lambda = cv0$lambda,
intercept = FALSE)$beta_hat
# Compare it with ridge analytical solution. Observe the factor in lambda,
# necessary since lambda is searched for the standardized data
sd_x <- apply(x, 2, function(x) sd(x)) * sqrt((n - 1) / n)
sd_y <- sd(y) * sqrt((n - 1) / n)
cv_glmnet(x = x, y = y, alpha = "ridge", lambda = cv0$lambda,
intercept = FALSE, thresh = 1e-20)$beta_hat
solve(crossprod(x) + diag(cv0$lambda * sd_x^2 / sd_y * n)) %*% t(x) %*% y
# If x and y are standardized, the match between glmnet and usual ridge
# analytical expression does not require scaling of lambda
x_std <- scale(x, scale = sd_x, center = TRUE)
y_std <- scale(y, scale = sd_y, center = TRUE)
cv_glmnet(x = x_std, y = y_std, alpha = "ridge", lambda = cv0$lambda,
intercept = FALSE, thresh = 1e-20)$beta_hat
solve(crossprod(x_std) + diag(rep(cv0$lambda * n, p))) %*% t(x_std) %*% y_std
# Fit lasso (model with intercept, the default)
cv1 <- cv_glmnet(x = x, y = y, alpha = "lasso", cv_verbose = TRUE)
cv1$beta_hat
# ## Parallelization
#
# # Parallel
# doMC::registerDoMC(cores = 2)
# microbenchmark::microbenchmark(
# cv_glmnet(x = x, y = y, nlambda = 100, cv_parallel = TRUE),
# cv_glmnet(x = x, y = y, nlambda = 100, cv_parallel = FALSE),
# times = 10)
# }
Run the code above in your browser using DataLab