data("lizards", package = "brglm2")
liz_fm <- cbind(grahami, opalinus) ~ height + diameter + light + time
## ML fit = MDYPL fit with `alpha = 1`
liz_ml <- glm(liz_fm, family = binomial(), data = lizards,
method = "mdyplFit", alpha = 1)
liz_ml0 <- glm(liz_fm, family = binomial(), data = lizards)
## liz_ml is the same fit as liz_ml0
summ_liz_ml <- summary(liz_ml)
summ_liz_ml0 <- summary(liz_ml0)
all.equal(coef(summ_liz_ml), coef(summ_liz_ml0))
## MDYPL fit with default `alpha` (see `?mdyplControl`)
liz_fm <- cbind(grahami, opalinus) ~ height + diameter + light + time
liz_mdypl <- glm(liz_ml, family = binomial(), data = lizards,
method = "mdyplFit")
## Comparing outputs from ML and MDYPL, with and without
## high-dimensionality corrections.
summary(liz_mdypl)
summary(liz_mdypl, hd_correction = TRUE)
summ_liz_ml
summary(liz_ml, hd_correction = TRUE)
## Not much difference in fits here as this is a low dimensional
## problem with dimensionality constant
(liz_ml$rank - 1) / sum(weights(liz_ml))
## The case study in Section 8 of Sterzinger and
## Kosmidis (2024)
data("MultipleFeatures", package = "brglm2")
## Center the fou.* and kar.* features
vars <- grep("fou|kar", names(MultipleFeatures), value = TRUE)
train_id <- which(MultipleFeatures$training)
MultipleFeatures[train_id, vars] <- scale(MultipleFeatures[train_id, vars], scale = FALSE)
## Compute the MDYPL fits
kappa <- length(vars) / sum(MultipleFeatures$training)
full_fm <- formula(paste("I(digit == 7) ~", paste(vars, collapse = " + ")))
nest_vars <- grep("fou", vars, value = TRUE)
nest_fm <- formula(paste("I(digit == 7) ~", paste(nest_vars, collapse = " + ")))
full_m <- glm(full_fm, data = MultipleFeatures, family = binomial(),
method = mdyplFit, alpha = 1 / (1 + kappa), subset = training)
nest_m <- update(full_m, nest_fm)
## With a naive penalized likelihood ratio test we get no evidence
## against the hypothesis that the model with only `fou` features
## is an as good descrition of `7` as the model with both `fou` and
## `kar` features.
plrtest(nest_m, full_m)
## With a high-dimensionality correction theres is strong evidence
## against the model with only `fou` features
plrtest(nest_m, full_m, hd_correction = TRUE)
# \donttest{
## A simulated data set as in Rigon & Aliverti (2023, Section 4.3)
set.seed(123)
n <- 1000
p <- 500
gamma <- sqrt(5)
X <- matrix(rnorm(n * p, 0, 1), nrow = n, ncol = p)
betas0 <- rep(c(-1, -1/2, 0, 2, 3), each = p / 5)
betas <- gamma * betas0 / sqrt(sum(betas0^2))
probs <- plogis(drop(X %*% betas))
y <- rbinom(n, 1, probs)
fit_mdypl <- glm(y ~ -1 + X, family = binomial(), method = "mdyplFit")
## The default value of `alpha` is `n / (n + p)` here
identical(n / (n + p), fit_mdypl$alpha)
## Aggregate bias of MDYPL and rescaled MDYPL estimators
ag_bias <- function(estimates, beta) mean(estimates - beta)
ag_bias(coef(summary(fit_mdypl))[, "Estimate"], betas)
ag_bias(coef(summary(fit_mdypl, hd_correction = TRUE))[, "Estimate"], betas)
# }
Run the code above in your browser using DataLab