library("brglm2")
# Conducting covariate balancing on the `airquality` dataset. Our goal was to
# compare ozone levels by month, but we discovered that ozone levels are
# strongly correlated with wind intensity (measured in mph), and the average
# wind intensity varies across months. Therefore, we need to balance the
# months by wind values to ensure a valid comparison of ozone levels.
# Initial imbalance of means
tapply(airquality$Wind, airquality$Month, mean)
# Formula definition
formula_air <- formula(Month ~ Wind)
# Estimating the generalized propensity scores using brglm2 method using
# maximum penalized likelihood estimators with powers of the Jeffreys
gp_scores <- estimate_gps(formula_air,
data = airquality, method = "brglm2",
reference = "5", verbose_output = TRUE,
control = brglmControl(type = "MPL_Jeffreys")
)
# Filtering the observations outside the csr region
gps_csr <- csregion(gp_scores)
# Calculating imbalance after csr
filter_which <- attr(gps_csr, "filter_vector")
filtered_air <- airquality[filter_which, ]
tapply(filtered_air$Wind, filtered_air$Month, mean)
# We can also investigate the imbalance using the raincloud function
raincloud(filtered_air,
y = Wind,
group = Month,
significance = "t_test"
)
Run the code above in your browser using DataLab