#############################################################################
# EXAMPLE 1: Dichotomous unidimensional data sim.rasch
#############################################################################
data(sim.rasch)
resp <- sim.rasch[ 1:500 , 1:15 ] # select subsample of students and items
# estimate Rasch model
mod <- tam.mml(resp)
# draw 5 plausible values without a normality
# assumption of the posterior and 2000 ability nodes
pv1a <- tam.pv( mod , nplausible=5 , ntheta=2000 )
# draw 5 plausible values with a normality
# assumption of the posterior and 500 ability nodes
pv1b <- tam.pv( mod , nplausible=5 , ntheta=500 , normal.approx=TRUE )
# distribution of first plausible value from imputation pv1
hist(pv1a$pv$PV1.Dim1 )
# boxplot of all plausible values from imputation pv2
boxplot(pv1b$pv[ , 2:6 ] )
#############################################################################
# EXAMPLE 2: Unidimensional plausible value imputation with
# background variables; dataset data.pisaRead from sirt package
#############################################################################
data(data.pisaRead, package="sirt")
dat <- data.pisaRead$data
## > colnames(dat)
## [1] "idstud" "idschool" "female" "hisei" "migra" "R432Q01"
## [7] "R432Q05" "R432Q06" "R456Q01" "R456Q02" "R456Q06" "R460Q01"
## [13] "R460Q05" "R460Q06" "R466Q02" "R466Q03" "R466Q06"
## Note that reading items have variable names starting with R4
# estimate 2PL model without covariates
items <- grep("R4" , colnames(dat) ) # select test items from data
mod2a <- tam.mml.2pl( resp=dat[,items] )
summary(mod2a)
# fix item parameters for plausible value imputation
# fix item intercepts by defining xsi.fixed
xsi0 <- mod2a$xsi$xsi
xsi.fixed <- cbind( seq(1,length(xsi0)) , xsi0 )
# fix item slopes using mod2$B
# matrix of latent regressors female, hisei and migra
Y <- dat[ , c("female" , "hisei" , "migra") ]
mod2b <- tam.mml( resp=dat[,items] , B=mod2a$B , xsi.fixed=xsi.fixed , Y=Y)
# plausible value imputation with normality assumption
# and ignoring uncertainty about regression coefficients
# -> the default is samp.regr=FALSE
pv2c <- tam.pv( mod2b , nplausible=10 , ntheta=500 , normal.approx=TRUE )
# sampling of regression coefficients
pv2d <- tam.pv( mod2b , nplausible=10 , ntheta=500 , samp.regr=TRUE)
# sampling of regression coefficients, normal approximation using the
# theta grid from the model
pv2e <- tam.pv( mod2b , samp.regr=TRUE , theta.model=TRUE , normal.approx=TRUE)
#--- create list of multiply imputed datasets with plausible values
# define dataset with covariates to be matched
Y <- dat[ , c("idstud" , "idschool" , "female" , "hisei" , "migra") ]
# define plausible value names
pvnames <- c("PVREAD")
# create list of imputed datasets
datlist1 <- tampv2datalist( pv2e , pvnames = pvnames , Y=Y , Y.pid="idstud")
str(datlist1)
# create a matrix of covariates with different set of students than in pv2e
Y1 <- Y[ seq( 1 , 600 , 2 ) , ]
# create list of multiply imputed datasets
datlist2 <- tampv2datalist( pv2e , pvnames = c("PVREAD"), Y=Y1 , Y.pid="idstud")
#--- fit some models in lavaan and semTools
library(lavaan)
library(semTools)
#*** Model 1: Linear regression
lavmodel <- "
PVREAD ~ migra + hisei
PVREAD ~~ PVREAD
"
mod1 <- semTools::lavaan.mi( lavmodel , data = datlist1 , m=0)
summary(mod1 , standardized=TRUE, rsquare=TRUE)
# apply lavaan for third imputed dataset
mod1a <- lavaan::lavaan( lavmodel , data = datlist1[[3]] )
summary(mod1a , standardized=TRUE, rsquare=TRUE)
# compare with mod1 by looping over all datasets
mod1b <- lapply( datlist1 , FUN = function(dat0){
mod1a <- lavaan( lavmodel , data = dat0 )
coef( mod1a)
} )
mod1b
mod1b <- matrix( unlist( mod1b ) , ncol= length( coef(mod1)) , byrow=TRUE )
mod1b
round( colMeans(mod1b) , 3 )
coef(mod1) # -> results coincide
#*** Model 2: Path model
lavmodel <- "
PVREAD ~ migra + hisei
hisei ~ migra
PVREAD ~~ PVREAD
hisei ~~ hisei
"
mod2 <- semTools::lavaan.mi( lavmodel , data = datlist1 )
summary(mod2 , standardized=TRUE, rsquare=TRUE)
# fit statistics
inspect( mod2 , what="fit")
#--- using mice
library(mice)
library(miceadds)
# convert datalist into a mids object
mids1 <- miceadds::datalist2mids( datlist1 )
# fit linear regression
mod1c <- with( mids1 , lm( PVREAD ~ migra + hisei ) )
summary( pool(mod1c) )
#############################################################################
# SIMULATED EXAMPLE 3: Multidimensional plausible value imputation
#############################################################################
# (1) simulate some data
set.seed(6778)
library(mvtnorm)
N <- 1000
Y <- cbind( rnorm( N ) , rnorm(N) )
theta <- rmvnorm( N,mean=c(0,0), sigma=matrix( c(1,.5,.5,1) , 2 , 2 ))
theta[,1] <- theta[,1] + .4 * Y[,1] + .2 * Y[,2] # latent regression model
theta[,2] <- theta[,2] + .8 * Y[,1] + .5 * Y[,2] # latent regression model
I <- 20
p1 <- plogis( outer( theta[,1] , seq( -2 , 2 , len=I ) , "-" ) )
resp1 <- 1 * ( p1 > matrix( runif( N*I ) , nrow=N , ncol=I ) )
p1 <- plogis( outer( theta[,2] , seq( -2 , 2 , len=I ) , "-" ) )
resp2 <- 1 * ( p1 > matrix( runif( N*I ) , nrow=N , ncol=I ) )
resp <- cbind(resp1,resp2)
colnames(resp) <- paste("I" , 1:(2*I), sep="")
# (2) define loading Matrix
Q <- array( 0 , dim = c( 2*I , 2 ))
Q[cbind(1:(2*I), c( rep(1,I) , rep(2,I) ))] <- 1
# (3) fit latent regression model
mod <- tam.mml( resp=resp , Y=Y , Q=Q , control=list(maxiter=5) )
# (4) draw plausible values with normal approximation using the orginal theta grid
pv1 <- tam.pv( mod , normal.approx=TRUE , theta.mod = TRUE )
# (5) convert plausible values to list of imputed datasets
Y1 <- data.frame(Y)
colnames(Y1) <- paste0("Y",1:2)
pvnames <- c("PVFA","PVFB")
# create list of imputed datasets
datlist1 <- tampv2datalist( pv1 , pvnames = pvnames , Y=Y1 )
str(datlist1)
# (6) apply statistical models
library(semTools)
# define linear regression
lavmodel <- "
PVFA ~ Y1 + Y2
PVFA ~~ PVFA
"
mod1 <- semTools::lavaan.mi( lavmodel , data = datlist1 )
summary(mod1 , standardized=TRUE, rsquare=TRUE)
#############################################################################
# SIMULATED EXAMPLE 4: Plausible value imputation with measurement
# errors in covariates
#############################################################################
library(sirt)
set.seed(7756)
N <- 2000 # number of persons
I <- 10 # number of items
# simulate covariates
X <- mvrnorm( N , mu=c(0,0) , Sigma = matrix( c(1,.5,.5,1) ,2 ,2 ) )
colnames(X) <- paste0("X",1:2)
# second covariate with measurement error with variance var.err
var.err <- .3
X.err <- X
X.err[,2] <-X[,2] + rnorm(N, sd = sqrt(var.err) )
# simulate theta
theta <- .5*X[,1] + .4*X[,2] + rnorm( N , sd = .5 )
# simulate item responses
itemdiff <- seq( -2 , 2 , length=I) # item difficulties
dat <- sirt::sim.raschtype( theta , b = itemdiff )
#***********************
#*** Model 0: Regression model with true variables
mod0 <- lm( theta ~ X )
summary(mod0)
#***********************
#*** Model 1: latent regression model with true covariates X
xsi.fixed <- cbind( 1:I , itemdiff )
mod1 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X)
summary(mod1)
# draw plausible values
res1a <- tam.pv( mod1 , normal.approx=TRUE , ntheta=200 , samp.regr=TRUE)
# create list of multiply imputed datasets
library(miceadds)
datlist1a <- tampv2datalist( res1a , Y=X )
imp1a <- miceadds::datalist2mids( datlist1a )
# fit linear model
# linear regression with measurement errors in X
lavmodel <- "
PV.Dim1 ~ X1 + X2true
X2true =~ 1*X2
X2 ~~ 0.3*X2 # = var.err
PV.Dim1 ~~ PV.Dim1
X2true ~~ X2true
"
mod1a <- semTools::lavaan.mi( lavmodel , datlist1a)
summary(mod1a , standardized=TRUE, rsquare=TRUE)
#***********************
#*** Model 2: latent regression model with error-prone covariates X.err
mod2 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X.err)
summary(mod2)
#***********************
#*** Model 3: Adjustment of covariates
cov.X.err <- cov( X.err )
# matrix of variance of measurement errors
measerr <- diag( c(0,var.err) )
# true covariance matrix
cov.X <- cov.X.err - measerr
# mean of X.err
mu <- colMeans(X.err)
muM <- matrix( mu , nrow=nrow(X.err) , ncol=ncol(X.err) , byrow=TRUE)
# reliability matrix
W <- solve( cov.X.err ) %*% cov.X
ident <- diag(2)
# adjusted scores of X
X.adj <- ( X.err - muM ) %*% W + muM %*% ( ident - W )
# fit latent regression model
mod3 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X.adj)
summary(mod3)
# draw plausible values
res3a <- tam.pv( mod3 , normal.approx=TRUE , ntheta=200 , samp.regr=TRUE)
# create list of multiply imputed datasets
library(semTools)
#*** PV dataset 1
# datalist with error-prone covariates
datlist3a <- tampv2datalist( res3a , Y=X.err )
# datalist with adjusted covariates
datlist3b <- tampv2datalist( res3a , Y=X.adj )
# linear regression with measurement errors in X
lavmodel <- "
PV.Dim1 ~ X1 + X2true
X2true =~ 1*X2
X2 ~~ 0.3*X2 # = var.err
PV.Dim1 ~~ PV.Dim1
X2true ~~ X2true
"
mod3a <- semTools::lavaan.mi( lavmodel , datlist3a)
summary(mod3a , standardized=TRUE, rsquare=TRUE)
lavmodel <- "
PV.Dim1 ~ X1 + X2
PV.Dim1 ~~ PV.Dim1
"
mod3b <- semTools::lavaan.mi( lavmodel , datlist3b)
summary(mod3b , standardized=TRUE, rsquare=TRUE)
# => mod3b leads to the correct estimate.
#*********************************************
# plausible value imputation for abilities and error-prone
# covariates using the mice package
library(mice)
library(miceadds)
# creating the likelihood for plausible value for abilities
mod11 <- tam.mml( dat , xsi.fixed = xsi.fixed )
likePV <- IRT.likelihood(mod11)
# creating the likelihood for error-prone covariate X2
lavmodel <- "
X2true =~ 1*X2
X2 ~~ 0.3*X2
"
mod12 <- lavaan::cfa( lavmodel , data = as.data.frame(X.err) )
summary(mod12)
likeX2 <- IRTLikelihood.cfa( data= X.err , cfaobj=mod12)
str(likeX2)
#-- create data input for mice package
data <- data.frame( "PVA" = NA , "X1" = X[,1] , "X2" = NA )
vars <- colnames(data)
V <- length(vars)
predictorMatrix <- 1 - diag(V)
rownames(predictorMatrix) <- colnames(predictorMatrix) <- vars
imputationMethod <- rep("norm" , V )
names(imputationMethod) <- vars
imputationMethod[c("PVA","X2")] <- "2l.plausible.values"
#-- create argument lists for plausible value imputation
# likelihood and theta grid of plausible value derived from IRT model
like <- list( "PVA" = likePV , "X2" = likeX2 )
theta <- list( "PVA" = attr(likePV,"theta") ,
"X2" = attr(likeX2 , "theta") )
#-- initial imputations
data.init <- data
data.init$PVA <- mod11$person$EAP
data.init$X2 <- X.err[,"X2"]
#-- imputation using the mice and miceadds package
imp1 <- mice::mice( as.matrix(data) , predictorMatrix = predictorMatrix , m = 4, maxit = 6 ,
imputationMethod = imputationMethod , allow.na = TRUE ,
theta=theta , like=like , data.init=data.init )
summary(imp1)
# compute linear regression
mod4a <- with( imp1 , lm( PVA ~ X1 + X2 ) )
summary( pool(mod4a) )
Run the code above in your browser using DataLab