data('nh0506')
# Create strata
age_cat <- cut(nh0506$age,
breaks = c(19, 39, 50, 85),
labels = c('< 40 years', '40 - 50 years', '> 50 years'))
strata <- age_cat : nh0506$sex
# Balance age, race, education, poverty ratio, and bmi both across and within the levels of strata
constraints <- generate_constraints(
balance_formulas = list(age + race + education + povertyr + bmi ~ 1 + strata),
z = nh0506$z,
data = nh0506)
# Choose one control for every treated unit in each stratum,
# balancing the covariates as described by the constraints
results <- optimize_controls(z = nh0506$z,
X = constraints$X,
st = strata,
importances = constraints$importances,
ratio = 1)
# If you want to use a ratio that's not feasible,
# you can supply a vector of the desired number of controls per stratum, q_s,
# typically generated by creating a distance matrix between strata and using
# generate_qs():
if (FALSE) {
age_dist <- matrix(data = c(0, 1, 2, 1, 0, 1, 2, 1, 0),
nrow = 3,
byrow = TRUE,
dimnames = list(levels(age_cat), levels(age_cat)))
sex_dist <- matrix(data = c(0, 1, 1, 0),
nrow = 2,
dimnames = list(levels(nh0506$sex), levels(nh0506$sex)))
strata_dist <- create_dist_matrix(age_dist, sex_dist)
qs <- generate_qs(z = nh0506$z,
st = strata,
ratio = 2.5,
max_ratio = 2.6,
max_extra_s = 0,
strata_dist = strata_dist)
results <- optimize_controls(z = nh0506$z,
X = constraints$X,
st = strata,
importances = constraints$importances,
q_s = qs)
}
# We can also have multiple treatment and control groups,
# as well as multiple simultaneous comparisons:
if (FALSE) {
data('nh0506_3groups')
strata2 <- cut(nh0506_3groups$age, breaks = c(19, 39, 50, 85),
labels = c('< 40 years', '40 - 50 years', '> 50 years'))
constraints2 <- generate_constraints(
balance_formulas = list(age + race + education + povertyr + bmi + sex ~ 1 + strata2),
z = nh0506_3groups$z,
data = nh0506_3groups,
treated = 'daily smoker')
q_star_s <- matrix(c(rep(table(nh0506_3groups$z, strata2)['some smoking', ] -
table(nh0506_3groups$z, strata2)['daily smoker', ], 2),
rep(0, 3)), byrow = TRUE, nrow = 3,
dimnames = list(levels(nh0506_3groups$z), levels(strata2)))
results <- optimize_controls(z = nh0506_3groups$z,
X = constraints2$X,
importances = constraints2$importances,
st = strata2,
ratio = 1,
treated = 'daily smoker',
treated_star = 'some smoking',
q_star_s = q_star_s,
correct_sizes = FALSE)
}
Run the code above in your browser using DataLab