## Linear regression framework
# Data simulation
simul <- SimulateRegression()
# Subsampling
ids <- Resample(data = simul$ydata, family = "gaussian")
sum(duplicated(ids))
# Bootstrapping
ids <- Resample(data = simul$ydata, family = "gaussian", resampling = "bootstrap")
sum(duplicated(ids))
## Logistic regression framework
# Data simulation
simul <- SimulateRegression(family = "binomial")
# Subsampling
ids <- Resample(data = simul$ydata, family = "binomial")
sum(duplicated(ids))
prop.table(table(simul$ydata))
prop.table(table(simul$ydata[ids]))
# Data simulation for a binary confounder
conf <- ifelse(runif(n = 100) > 0.5, yes = 1, no = 0)
# User-defined resampling function
BalancedResampling <- function(data, tau, Z, ...) {
s <- NULL
for (z in unique(Z)) {
s <- c(s, sample(which((data == "0") & (Z == z)), size = tau * sum((data == "0") & (Z == z))))
s <- c(s, sample(which((data == "1") & (Z == z)), size = tau * sum((data == "1") & (Z == z))))
}
return(s)
}
# Resampling keeping proportions by Y and Z
ids <- Resample(data = simul$ydata, family = "binomial", resampling = BalancedResampling, Z = conf)
prop.table(table(simul$ydata, conf))
prop.table(table(simul$ydata[ids], conf[ids]))
# User-defined resampling for stability selection
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata, family = "binomial",
resampling = BalancedResampling, Z = conf
)
Run the code above in your browser using DataLab