#############################################################################
# EXAMPLE 1: Plausible value imputation for data.ma04 | 2 scales
#############################################################################
data(data.ma04)
dat <- data.ma04
# Scale 1 consists of items A1,...,A4
# Scale 2 consists of items B1,...,B5
dat$scale1 <- NA
dat$scale2 <- NA
# empty imputation
imp <- mice( dat , m=0 , maxit=0 )
summary(imp)
# define predictors
predM <- imp$pred
# define imputation methods
impMethod <- imp$method
impMethod <- rep( "norm" , length(impMethod) )
names(impMethod) <- names( imp$method )
# look at missing proportions
colSums( is.na(dat) )
# redefine imputation methods for plausible value imputation
impMethod[ "scale1" ] <- "2l.plausible.values"
predM[ "scale1" , ] <- 1
predM[ "scale1" , c("A1" , "A2" , "A3" , "A4" ) ] <- 3
# items corresponding to a scale should be declared by a 3 in the predictor matrix
impMethod[ "scale2" ] <- "2l.plausible.values"
predM[ ,"scale2" ] <- 0
predM[ "scale2" , c("A2","A3","A4","V6","V7") ] <- 1
diag(predM) <- 0
# use imputed scale values as predictors for V5, V6 and V7
predM[ c("V5","V6","V7") , c("scale1","scale2" ) ] <- 1
# exclude for V5, V6 and V7 the items of scales A and B as predictors
predM[ c("V5","V6","V7") , c( paste0("A",2:4) , paste0("B",1:5) ) ] <- 0
# exclude 'group' as a predictor
predM[,"group"] <- 0
# look at imputation method and predictor matrix
impMethod
predM
#-------------------------------
# Parameter for imputation
#***
# scale 1 (A1,...,A4)
# known Cronbach's Alpha
alpha <- NULL
alpha <- list( "scale1" = .8 )
alpha.se <- list( "scale1" = .05 ) # sample alpha with a standard deviation of .05
#***
# scale 2 (B1,...,B5)
# means and SE's of scale scores are assumed to be known
M.scale2 <- rowMeans( dat[ , paste("B",1:5,sep="") ] )
# M.scale2[ is.na( m1) ] <- mean( M.scale2 , na.rm=TRUE )
SE.scale2 <- rep( sqrt( var(M.scale2,na.rm=T)*(1-.8) ) , nrow(dat) )
# => heterogeneous measurement errors are allowed
scale.values <- list( "scale2" = list( "M" = M.scale2 , "SE" = SE.scale2 ) )
#*** Imputation Model 1: Imputation four using parallel chains
imp1 <- mice( dat , predictorMatrix = predM , m = 4, maxit = 5 ,
alpha.se = alpha.se , imputationMethod = impMethod , allow.na = TRUE , alpha = alpha,
scale.values = scale.values )
summary(imp1)
# extract first imputed dataset
dat11 <- complete( imp , 1 )
#*** Imputation Model 2: Imputation using one long chain
imp2 <- mice.1chain( dat , predictorMatrix = predM , burnin=10 , iter=20 , Nimp=4 ,
alpha.se = alpha.se , imputationMethod = impMethod , allow.na = TRUE , alpha = alpha,
scale.values = scale.values )
summary(imp2)
#-------------
#*** Imputation Model 3: Imputation including group level variables
# use group indicator for plausible value estimation
predM[ "scale1" , "group" ] <- -2
# V7 and B1 should be aggregated at the group level
predM[ "scale1" , c("V7","B1") ] <- 2
predM[ "scale2" , "group" ] <- -2
predM[ "scale2" , c("V7","A1") ] <- 2
# perform single imputation (m=1)
imp <- mice( dat , predictorMatrix = predM , m = 1 , maxit=10 ,
imputationMethod = impMethod , allow.na = TRUE , alpha = alpha,
scale.values = scale.values )
dat10 <- complete(imp)
# multilevel model
library(lme4)
mod <- lmer( scale1 ~ ( 1 | group) , data = dat11 )
summary(mod)
mod <- lmer( scale1 ~ ( 1 | group) , data = dat10)
summary(mod)
#############################################################################
# SIMULATED EXAMPLE 2: Plausible value imputation with chained equations
#############################################################################
# - simulate a latent variable theta and dichotomous item responses
# - two covariates X in which the second covariate has measurement error
library(sirt)
library(TAM)
library(lavaan)
set.seed(7756)
N <- 2000 # number of persons
I <- 10 # number of items
# simulate covariates
X <- mvrnorm( N , mu=c(0,0) , Sigma = matrix( c(1,.5,.5,1) ,2 ,2 ) )
colnames(X) <- paste0("X",1:2)
# second covariate with measurement error with variance var.err
var.err <- .3
X.err <- X
X.err[,2] <-X[,2] + rnorm(N, sd = sqrt(var.err) )
# simulate theta
theta <- .5*X[,1] + .4*X[,2] + rnorm( N , sd = .5 )
# simulate item responses
itemdiff <- seq( -2 , 2 , length=I) # item difficulties
dat <- sirt::sim.raschtype( theta , b = itemdiff )
#***********************
#*** Model 0: Regression model with true variables
mod0 <- lm( theta ~ X )
summary(mod0)
#**********************
# plausible value imputation for abilities and error-prone
# covariates using the mice package
# creating the likelihood for plausible value for abilities
mod11 <- TAM::tam.mml( dat )
likePV <- IRT.likelihood(mod11)
# creating the likelihood for error-prone covariate X2
# The known measurement error variance is 0.3.
lavmodel <- "
X2true =~ 1*X2
X2 ~~ 0.3*X2
"
mod12 <- lavaan::cfa( lavmodel , data = as.data.frame(X.err) )
summary(mod12)
likeX2 <- IRTLikelihood.cfa( data= X.err , cfaobj=mod12)
str(likeX2)
#-- create data input for mice package
data <- data.frame( "PVA" = NA , "X1" = X[,1] , "X2" = NA )
vars <- colnames(data)
V <- length(vars)
predictorMatrix <- 1 - diag(V)
rownames(predictorMatrix) <- colnames(predictorMatrix) <- vars
imputationMethod <- rep("norm" , V )
names(imputationMethod) <- vars
imputationMethod[c("PVA","X2")] <- "2l.plausible.values"
#-- create argument lists for plausible value imputation
# likelihood and theta grid of plausible value derived from IRT model
like <- list( "PVA" = likePV , "X2" = likeX2 )
theta <- list( "PVA" = attr(likePV,"theta") ,
"X2" = attr(likeX2 , "theta") )
#-- initial imputations
data.init <- data
data.init$PVA <- mod11$person$EAP
data.init$X2 <- X.err[,"X2"]
#-- imputation using the mice and miceadds package
imp1 <- mice::mice( as.matrix(data) , predictorMatrix = predictorMatrix , m = 4, maxit = 6 ,
imputationMethod = imputationMethod , allow.na = TRUE ,
theta=theta , like=like , data.init=data.init )
summary(imp1)
# compute linear regression
mod4a <- with( imp1 , lm( PVA ~ X1 + X2 ) )
summary( pool(mod4a) )
Run the code above in your browser using DataLab