#----------------------------------------------------------------------------
# The same data set formatted three ways
# First with basic case weights that, from ?glm, are used inappropriately.
ucb_weighted <- as.data.frame(UCBAdmissions)
ucb_weighted$Freq <- as.integer(ucb_weighted$Freq)
head(ucb_weighted)
nrow(ucb_weighted)
# Format when yes/no data are in individual rows (probably still inappropriate)
library(tidyr)
ucb_long <- uncount(ucb_weighted, Freq)
head(ucb_long)
nrow(ucb_long)
# Format where the outcome is formatted as number of events
ucb_events <-
ucb_weighted %>%
tidyr::pivot_wider(
id_cols = c(Gender, Dept),
names_from = Admit,
values_from = Freq,
values_fill = 0L
)
head(ucb_events)
nrow(ucb_events)
#----------------------------------------------------------------------------
# Different model fits
# Treat data as separate Bernoulli data:
glm(Admit ~ Gender + Dept, data = ucb_long, family = binomial)
# Weights produce the same statistics
glm(
Admit ~ Gender + Dept,
data = ucb_weighted,
family = binomial,
weights = ucb_weighted$Freq
)
# Data as binomial "x events out of n trials" format. Note that, to get the same
# coefficients, the order of the levels must be reversed.
glm(
cbind(Rejected, Admitted) ~ Gender + Dept,
data = ucb_events,
family = binomial
)
# The new function that starts with frequency weights and gets the correct place:
glm_grouped(Admit ~ Gender + Dept, data = ucb_weighted, weights = ucb_weighted$Freq)
Run the code above in your browser using DataLab