# NOT RUN {
#####################################################################
# example one: predict whether respondent has a university degree
#####################################################################
require(effects)
data(WVS)
logit.model <- glm(degree ~ religion + gender + age, data=WVS, family=binomial(link="logit"))
summary(logit.model)
# compute cluster-adjusted p-values
clust.im.p <- cluster.im.glm(logit.model, WVS, ~ country, report = T)
############################################################################
# example two: linear model of whether respondent has a university degree
# with interaction between gender and age + country FEs
############################################################################
WVS$degree.n <- as.numeric(WVS$degree)
WVS$gender.n <- as.numeric(WVS$gender)
WVS$genderXage <- WVS$gender.n * WVS$age
lin.model <- glm(degree.n ~ gender.n + age + genderXage + religion + as.factor(country), data=WVS)
# compute marginal effect of male gender on probability of obtaining a university degree
# using conventional standard errors
age.vec <- seq(from=18, to=90, by=1)
me.age <- coefficients(lin.model)[2] + coefficients(lin.model)[4]*age.vec
plot(me.age ~ age.vec, type="l", ylim=c(-0.1, 0.1), xlab="age",
ylab="ME of male gender on Pr(university degree)")
se.age <- sqrt( vcov(lin.model)[2,2] + vcov(lin.model)[4,4]*(age.vec)^2 +
2*vcov(lin.model)[2,4]*age.vec)
ci.h <- me.age + qt(0.975, lower.tail=T, df=lin.model$df.residual) * se.age
ci.l <- me.age - qt(0.975, lower.tail=T, df=lin.model$df.residual) * se.age
lines(ci.h ~ age.vec, lty=2)
lines(ci.l ~ age.vec, lty=2)
# cluster on country, compute CIs for marginal effect of gender on degree attainment
# drop the FEs (absorbed into cluster-level coefficients)
lin.model.n <- glm(degree.n ~ gender.n + age + genderXage + religion, data=WVS)
clust.im.result <- cluster.im.glm(lin.model.n, WVS, ~ country, report = T, return.vcv = T)
# compute ME using average of cluster-level estimates (CIs center on this)
me.age.im <- clust.im.result$beta.bar[2] + clust.im.result$beta.bar[4]*age.vec
se.age.im <- sqrt( clust.im.result$vcv[2,2] + clust.im.result$vcv[4,4]*(age.vec)^2 +
2*clust.im.result$vcv[2,4]*age.vec)
# center the CIs on the ME using average of cluster-level estimates
# important: divide by sqrt(G) to convert SE of cluster-level estimates
# into SE of the mean, where G = number of clusters
G <- length(unique(WVS$country))
ci.h.im <- me.age.im + qt(0.975, lower.tail=T, df=(G-1)) * se.age.im/sqrt(G)
ci.l.im <- me.age.im - qt(0.975, lower.tail=T, df=(G-1)) * se.age.im/sqrt(G)
plot(me.age.im ~ age.vec, type="l", ylim=c(-0.2, 0.2), xlab="age",
ylab="ME of male gender on Pr(university degree)")
lines(ci.h.im ~ age.vec, lty=2)
lines(ci.l.im ~ age.vec, lty=2)
# for comparison, here's the ME estimate and CIs from the baseline model
lines(me.age ~ age.vec, lty=1, col="gray")
lines(ci.h ~ age.vec, lty=3, col="gray")
lines(ci.l ~ age.vec, lty=3, col="gray")
# }
Run the code above in your browser using DataLab