# NOT RUN {
#########################################
# example one: predict chicken weight
#########################################
# predict chick weight using diet, do not impose the null hypothesis
# because of factor variable "Diet"
data(ChickWeight)
weight.mod <- glm(formula = weight~Diet,data=ChickWeight)
cluster.wd.w.1 <-cluster.wild.glm(weight.mod, dat = ChickWeight,cluster = ~Chick, boot.reps = 1000)
# impose null
dum <- model.matrix(~ ChickWeight$Diet)
ChickWeight$Diet2 <- as.numeric(dum[,2])
ChickWeight$Diet3 <- as.numeric(dum[,3])
ChickWeight$Diet4 <- as.numeric(dum[,4])
weight.mod2 <- glm(formula = weight~Diet2+Diet3+Diet4,data=ChickWeight)
cluster.wd.w.2 <-cluster.wild.glm(weight.mod2, dat = ChickWeight,cluster = ~Chick, boot.reps = 1000)
############################################################################
# example two: linear model of whether respondent has a university degree
# with interaction between gender and age + country FEs
############################################################################
require(effects)
data(WVS)
WVS$degree.n <- as.numeric(WVS$degree)
WVS$gender.n <- as.numeric(WVS$gender)
WVS$genderXage <- WVS$gender.n * WVS$age
lin.model <- glm(degree.n ~ gender.n + age + genderXage + religion, data=WVS)
# compute marginal effect of male gender on probability of obtaining a university degree
# using conventional standard errors
age.vec <- seq(from=18, to=90, by=1)
me.age <- coefficients(lin.model)[2] + coefficients(lin.model)[4]*age.vec
plot(me.age ~ age.vec, type="l", ylim=c(-0.1, 0.1), xlab="age",
ylab="ME of male gender on Pr(university degree)")
se.age <- sqrt( vcov(lin.model)[2,2] + vcov(lin.model)[4,4]*(age.vec)^2 +
2*vcov(lin.model)[2,4]*age.vec)
ci.h <- me.age + qt(0.975, lower.tail=T, df=lin.model$df.residual) * se.age
ci.l <- me.age - qt(0.975, lower.tail=T, df=lin.model$df.residual) * se.age
lines(ci.h ~ age.vec, lty=2)
lines(ci.l ~ age.vec, lty=2)
# cluster on country, compute CIs for marginal effect of gender on degree attainment
clust.wild.result <- cluster.wild.glm(lin.model, WVS, ~ country,
impose.null = F, report = T,
output.replicates=T)
replicates <- clust.wild.result$replicates
me.boot <- matrix(data=NA, nrow=dim(replicates)[1], ncol=length(age.vec))
for(i in 1:dim(replicates)[1]){
me.boot[i,] <- replicates[i,"gender.n"] + replicates[i,"genderXage"]*age.vec
}
ci.wild <- apply(FUN=quantile, X=me.boot, MARGIN=2, probs=c(0.025, 0.975))
# a little lowess smoothing applied to compensate for discontinuities
# arising from shifting between replicates
lines(lowess(ci.wild[1,] ~ age.vec), lty=3)
lines(lowess(ci.wild[2,] ~ age.vec), lty=3)
# finishing touches to plot
legend(lty=c(1,2,3), "topleft",
legend=c("Model Marginal Effect", "Conventional 95% CI",
"Wild BS 95% CI"))
# }
Run the code above in your browser using DataLab