## ------------------------------------------------------------
## Example 1: Binary + continuous outcomes, with interactions
## and must-have variables (factor expanded to dummies)
## ------------------------------------------------------------
set.seed(123)
n <- 150
x1 <- rnorm(n)
x2 <- rnorm(n)
group <- factor(sample(c("A", "B", "C"), n, replace = TRUE))
## Generate outcomes with some signal in x1, x2 and group, plus an interaction
eta_bin <- -0.5 + 1.2 * x2 - 0.8 * (group == "C") + 0.5 * x1 * x2
p <- 1 / (1 + exp(-eta_bin))
y_bin <- rbinom(n, 1, p)
y_cont <- 1.5 * x1 - 2 * (group == "B") + 0.7 * x1 * x2 + rnorm(n, sd = 0.7)
df <- data.frame(y_bin = y_bin, y_cont = y_cont, x1 = x1, x2 = x2, group = group)
res1 <- select_auxiliary_variables_lasso_cv(
df = df,
outcome_vars = c("y_bin", "y_cont"),
auxiliary_vars = c("x1", "x2", "group"),
must_have_vars = c("x1", "group"), # 'group' (factor) expands to its dummies
check_twoway_int = TRUE,
nfolds = 3,
verbose = FALSE,
standardize = TRUE,
return_models = FALSE
)
## Inspect selections and metadata
res1$selected_variables
res1$by_outcome
res1$selected_lambdas
names(which(res1$penalty_factors == 0)) # must-keep terms (incl. factor dummies & interactions)
res1$interaction_metadata$full_formula
## ------------------------------------------------------------
## Example 2: Single continuous outcome, main effects only
## ------------------------------------------------------------
set.seed(456)
n2 <- 120
a <- rnorm(n2)
b <- rnorm(n2)
f <- factor(sample(c("a", "b"), n2, replace = TRUE))
y <- 2 * a - 1 * (f == "b") + rnorm(n2, sd = 1)
toy <- data.frame(y = y, a = a, b = b, f = f)
res2 <- select_auxiliary_variables_lasso_cv(
df = toy,
outcome_vars = "y",
auxiliary_vars = c("a", "b", "f"),
check_twoway_int = FALSE, # main effects only
nfolds = 3,
verbose = FALSE
)
res2$selected_variables
res2$selected_lambdas
res2$goodness_of_fit$y
Run the code above in your browser using DataLab