# NOT RUN {
#
data(dynarski)
# Table 14.1 of "Design of Observational Studies" (2nd edition)
head(dynarski)
# Table 14.2 of "Design of Observational Studies" (2nd edition)
zb<-dynarski$zb
zbf<-factor(zb,levels=c(1,0),labels=c("Father deceased",
"Father not deceased"))
table(zbf)
Xb<-dynarski[,3:10]
# Estimate the propensity score, Rosenbaum (2010, Section 14.3)
p<-glm(zb~Xb$faminc+Xb$incmiss+Xb$black+Xb$hisp
+Xb$afqtpct+Xb$edmissm+Xb$edm+Xb$female,
family=binomial)$fitted.values
# Figure 14.1 in "Design of Observational Studies" (2nd edition)
boxplot(p~zbf,ylab="Propensity score",main="1979-1981 Cohort")
# Read about missing covariate values in section 14.4
# of "Design of Observational Studies" (2nd edition)
# Robust Mahalanobis distance matrix, treated x control
dmat<-smahal(zb,Xb)
dim(dmat)
# Table 14.3 in "Design of Observational Studies" (2nd edition)
round(dmat[1:5,1:5],2)
# Add a caliper on the propensity score using a penalty function
dmat<-addcaliper(dmat,zb,p,caliper=.2)
dim(dmat)
# Table 14.4 in "Design of Observational Studies" (2nd edition)
round(dmat[1:5,1:5],2)
# }
# NOT RUN {
# YOU MUST LOAD the 'optmatch' package and accept its license to continue.
# Note that the 'optmatch' package has changed since 2010. It now suggests
# that you indicate the data explicitly as data=dynarski.
# Creating a 1-to-10 match, as in section 14.6 of
# "Design of Observational Studies" (2nd edition)
# This may take a few minutes.
m<-fullmatch(dmat,data=dynarski,min.controls = 10,max.controls = 10,
omit.fraction = 1379/2689)
length(m)
sum(matched(m))
1441/11 # There are 131 matched sets, 1 treated, 10 controls
# Alternative, simpler code to do the same thing
m2<-pairmatch(dmat,controls=10,data=dynarski)
# Results are the same:
sum(m[matched(m)]!=m2[matched(m2)])
# Housekeeping
im<-as.integer(m)
dynarski<-cbind(dynarski,im)
dm<-dynarski[matched(m),]
dm<-dm[order(dm$im,1-dm$zb),]
# Table 14.5 in "Design of Observational Studies" (2nd edition)
which(dm$id==10)
dm[188:198,]
which(dm$id==396)
dm[23:33,]
which(dm$id==3051)
dm[1068:1078,]
# In principle, there can be a tie, in which several different
# matched samples all minimize the total distance. On my
# computer, this calculation reproduces Table 14.5, but were
# there a tie, 'optmatch' should return one of the tied optimal
# matches, but not any particular one.
# }
Run the code above in your browser using DataLab