# NOT RUN {
## IN THESE EXAMPLES YOU NEED TO LOAD THE R PACKAGE
# 'optmatch'. OTHERWISE, THEY WILL RUN IN A CODE CHECK MODE.
# library(optmatch)
###### Usage 1 ##########
data(nh0506)
# }
# NOT RUN {
res <- makeblocks(nh0506, bsize=4, maxItr=15,
vars=c("female","age","black","hispanic","education","povertyr"))
# }
# NOT RUN {
###### Some diagnostics
v = "age"
## Boxplots
temp = res$.data
temp[[paste0(v,'.instrata')]] <- temp[[v]]
meanbystrat <- aggregate(temp[[v]], by =list(strata=temp$strata), mean)
for(b in unique(temp$strata)){
who <- which( temp$strata == b )
temp[[paste0(v,'.instrata')]][who] <- temp[[paste0(v,'.instrata')]][who]
- meanbystrat[meanbystrat[,1]==b,2]
}
temp[[paste0(v,'.instrata')]] = temp[[paste0(v,'.instrata')]] + mean(temp[[v]])
boxplot(v~x,
data = data.frame(v=c(temp[[v]],temp[[paste0(v,'.instrata')]][temp$strata>0]),
x=c(rep('prestratification', nrow(temp)),
rep('poststratification', sum(temp$strata>0)))))
## Anova
summary(aov(age~factor(strata), data=res$.data[res$.data$strata>0,]))
# }
# NOT RUN {
###### Usage 2 ##########
distmat <- smahal(nh0506[,c("female","age","black","hispanic",
"education","povertyr")])
res <- makeblocks(distmat, .data=nh0506, bsize=4, maxItr=15)
# }
# NOT RUN {
# Other usages
## Internally calls 'smahal' to create the distances.
res <- makeblocks(nh0506[,c("female","age","black","hispanic",
"education","povertyr")], 4)
## Returns the blocking structure in a matrix form, if data is not provided.
distmat <- smahal(nh0506[,c("female","age","black",
"hispanic","education","povertyr")])
res <- makeblocks(distmat, bsize=4, maxItr=20)
# }
# NOT RUN {
###### Usage 3 ##########
data(wls)
# }
# NOT RUN {
data(wls)
library(optmatch)
wls4match <- wls
## This code replicates the blocking algorithm used in the paper
## Karmakar, Small, and Rosenbaum (2018).
## Create the distance matrix
distmat1 <- smahal(wls4match[,"gwiiq_bm"]) ## IQ
## Father's and mother's edu and parent's income
distmat2 <- smahal(wls4match[,c("edfa57q.NoNA", "edmo57q.NoNA", "bmpin1.NoNA",
## Father's and mother's edu and parent's income
"incg400", "incg250")])
## Indicators for income in the top 5<!-- % and 1% -->
## occupation score
distmat2.2 <- smahal(wls4match[,c("ocsf57.NoNA", "ocpf57.NoNA")])
## missing indicators
distmat3 <- smahal(wls4match[,c("edfa57q.miss", "edmo57q.miss",
"bmpin1.miss", "ocsf57.miss", "ocpf57.miss")])
## The IQ = gwiiq_bm is given more weight.
## parents' education and parent's income
distmat = distmat1*10+6*distmat2+3*distmat2.2+2*distmat3
## creating the blocks. This can take about 30min to run.
## May take more time depending of the computation power of the system.
set.seed(0841)
res.20.2 = makeblocks(distmat, bsize=25, Itr=250, maxItr=250, .data=wls4match)
# }
Run the code above in your browser using DataLab