#Generate a datasets with group indicator and four variables:
#- var1, continuous, sampled from normal distributions;
#- var2, continuous, sampled from beta distributions;
#- var3, categorical with 4 levels;
#- var4, binary.
set.seed(1234567)
dat <- data.frame(group = c(rep("A",10),rep("B",20),rep("C",30)),
var1 = c(rnorm(10,mean=0,sd=1),
rnorm(20,mean=1,sd=2),
rnorm(30,mean=-1,sd=2)),
var2 = c(rbeta(10,shape1=1,shape2=1),
rbeta(20,shape1=2,shape2=1),
rbeta(30,shape1=1,shape2=2)),
var3 = factor(c(rbinom(10,size=3,prob=.4),
rbinom(20,size=3,prob=.5),
rbinom(30,size=3,prob=.3))),
var4 = factor(c(rbinom(10,size=1,prob=.5),
rbinom(20,size=1,prob=.3),
rbinom(30,size=1,prob=.7))))
#Match on propensity score
#-------------------------
#With multiple groups, need a multinomial model for the PS
library(VGAM)
psModel <- vglm(group ~ var1 + var2 + var3 + var4,
family=multinomial, data=dat)
#Estimated logits - 2 for each unit: log(P(group=A)/P(group=C)), log(P(group=B)/P(group=C))
logitPS <- predict(psModel, type = "link")
dat$logit_AvsC <- logitPS[,1]
dat$logit_BvsC <- logitPS[,2]
#Match on logits of PS
resultPs <- polymatch(group ~ logit_AvsC + logit_BvsC, data = dat,
distance = "euclidean")
dat$match_id_ps <- resultPs$match_id
#Match on covariates
#--------------------
#Match on continuous covariates with exact match on categorical/binary variables
resultCov <- polymatch(group ~ var1 + var2, data = dat,
distance = "mahalanobis",
exactMatch = ~var3+var4)
dat$match_id_cov <- resultCov$match_id
Run the code above in your browser using DataLab