#############################################################################
# EXAMPLE 1: Dichotomous unidimensional data sim.rasch
#############################################################################
data(data.sim.rasch)
resp <- data.sim.rasch[ 1:500 , 1:15 ] # select subsample of students and items
# estimate Rasch model
mod <- tam.mml(resp)
# draw 5 plausible values without a normality
# assumption of the posterior and 2000 ability nodes
pv1a <- tam.pv( mod , nplausible=5 , ntheta=2000 )
# draw 5 plausible values with a normality
# assumption of the posterior and 500 ability nodes
pv1b <- tam.pv( mod , nplausible=5 , ntheta=500 , normal.approx=TRUE )
# distribution of first plausible value from imputation pv1
hist(pv1a$pv$PV1.Dim1 )
# boxplot of all plausible values from imputation pv2
boxplot(pv1b$pv[ , 2:6 ] )
## Not run:
# #############################################################################
# # EXAMPLE 2: Unidimensional plausible value imputation with
# # background variables; dataset data.pisaRead from sirt package
# #############################################################################
#
# data(data.pisaRead, package="sirt")
# dat <- data.pisaRead$data
# ## > colnames(dat)
# ## [1] "idstud" "idschool" "female" "hisei" "migra" "R432Q01"
# ## [7] "R432Q05" "R432Q06" "R456Q01" "R456Q02" "R456Q06" "R460Q01"
# ## [13] "R460Q05" "R460Q06" "R466Q02" "R466Q03" "R466Q06"
#
# ## Note that reading items have variable names starting with R4
#
# # estimate 2PL model without covariates
# items <- grep("R4" , colnames(dat) ) # select test items from data
# mod2a <- tam.mml.2pl( resp=dat[,items] )
# summary(mod2a)
#
# # fix item parameters for plausible value imputation
# # fix item intercepts by defining xsi.fixed
# xsi0 <- mod2a$xsi$xsi
# xsi.fixed <- cbind( seq(1,length(xsi0)) , xsi0 )
# # fix item slopes using mod2$B
# # matrix of latent regressors female, hisei and migra
# Y <- dat[ , c("female" , "hisei" , "migra") ]
# mod2b <- tam.mml( resp=dat[,items] , B=mod2a$B , xsi.fixed=xsi.fixed , Y=Y,
# pid = dat$idstud)
#
# # plausible value imputation with normality assumption
# # and ignoring uncertainty about regression coefficients
# # -> the default is samp.regr=FALSE
# pv2c <- tam.pv( mod2b , nplausible=10 , ntheta=500 , normal.approx=TRUE )
# # sampling of regression coefficients
# pv2d <- tam.pv( mod2b , nplausible=10 , ntheta=500 , samp.regr=TRUE)
# # sampling of regression coefficients, normal approximation using the
# # theta grid from the model
# pv2e <- tam.pv( mod2b , samp.regr=TRUE , theta.model=TRUE , normal.approx=TRUE)
#
# #--- create list of multiply imputed datasets with plausible values
# # define dataset with covariates to be matched
# Y <- dat[ , c("idstud" , "idschool" , "female" , "hisei" , "migra") ]
#
# # define plausible value names
# pvnames <- c("PVREAD")
# # create list of imputed datasets
# datlist1 <- tampv2datalist( pv2e , pvnames = pvnames , Y=Y , Y.pid="idstud")
# str(datlist1)
#
# # create a matrix of covariates with different set of students than in pv2e
# Y1 <- Y[ seq( 1 , 600 , 2 ) , ]
# # create list of multiply imputed datasets
# datlist2 <- tampv2datalist( pv2e , pvnames = c("PVREAD"), Y=Y1 , Y.pid="idstud")
#
# #--- fit some models in lavaan and semTools
# library(lavaan)
# library(semTools)
#
# #*** Model 1: Linear regression
# lavmodel <- "
# PVREAD ~ migra + hisei
# PVREAD ~~ PVREAD
# "
# mod1 <- semTools::lavaan.mi( lavmodel , data = datlist1 , m=0)
# summary(mod1 , standardized=TRUE, rsquare=TRUE)
#
# # apply lavaan for third imputed dataset
# mod1a <- lavaan::lavaan( lavmodel , data = datlist1[[3]] )
# summary(mod1a , standardized=TRUE, rsquare=TRUE)
#
# # compare with mod1 by looping over all datasets
# mod1b <- lapply( datlist1 , FUN = function(dat0){
# mod1a <- lavaan( lavmodel , data = dat0 )
# coef( mod1a)
# } )
# mod1b
# mod1b <- matrix( unlist( mod1b ) , ncol= length( coef(mod1)) , byrow=TRUE )
# mod1b
# round( colMeans(mod1b) , 3 )
# coef(mod1) # -> results coincide
#
# #*** Model 2: Path model
# lavmodel <- "
# PVREAD ~ migra + hisei
# hisei ~ migra
# PVREAD ~~ PVREAD
# hisei ~~ hisei
# "
# mod2 <- semTools::lavaan.mi( lavmodel , data = datlist1 )
# summary(mod2 , standardized=TRUE, rsquare=TRUE)
# # fit statistics
# inspect( mod2 , what="fit")
#
# #--- using mitools package
# library(mitools)
# # convert datalist into an object of class imputationList
# datlist1a <- mitools::imputationList( datlist1 )
# # fit linear regression
# mod1c <- with( datlist1a , stats::lm( PVREAD ~ migra + hisei ) )
# summary( mitools::MIcombine(mod1c) )
#
# #--- using mice package
# library(mice)
# library(miceadds)
# # convert datalist into a mids object
# mids1 <- miceadds::datalist2mids( datlist1 )
# # fit linear regression
# mod1c <- with( mids1 , stats::lm( PVREAD ~ migra + hisei ) )
# summary( mice::pool(mod1c) )
#
# #############################################################################
# # SIMULATED EXAMPLE 3: Multidimensional plausible value imputation
# #############################################################################
#
# # (1) simulate some data
# set.seed(6778)
# library(mvtnorm)
# N <- 1000
# Y <- cbind( stats::rnorm( N ) , stats::rnorm(N) )
# theta <- mvtnorm::rmvnorm( N,mean=c(0,0), sigma=matrix( c(1,.5,.5,1) , 2 , 2 ))
# theta[,1] <- theta[,1] + .4 * Y[,1] + .2 * Y[,2] # latent regression model
# theta[,2] <- theta[,2] + .8 * Y[,1] + .5 * Y[,2] # latent regression model
# I <- 20
# p1 <- stats::plogis( outer( theta[,1] , seq( -2 , 2 , len=I ) , "-" ) )
# resp1 <- 1 * ( p1 > matrix( stats::runif( N*I ) , nrow=N , ncol=I ) )
# p1 <- stats::plogis( outer( theta[,2] , seq( -2 , 2 , len=I ) , "-" ) )
# resp2 <- 1 * ( p1 > matrix( stats::runif( N*I ) , nrow=N , ncol=I ) )
# resp <- cbind(resp1,resp2)
# colnames(resp) <- paste("I" , 1:(2*I), sep="")
#
# # (2) define loading Matrix
# Q <- array( 0 , dim = c( 2*I , 2 ))
# Q[cbind(1:(2*I), c( rep(1,I) , rep(2,I) ))] <- 1
#
# # (3) fit latent regression model
# mod <- tam.mml( resp=resp , Y=Y , Q=Q , control=list(maxiter=5) )
#
# # (4) draw plausible values with normal approximation using the orginal theta grid
# pv1 <- tam.pv( mod , normal.approx=TRUE , theta.model = TRUE )
#
# # (5) convert plausible values to list of imputed datasets
# Y1 <- data.frame(Y)
# colnames(Y1) <- paste0("Y",1:2)
# pvnames <- c("PVFA","PVFB")
# # create list of imputed datasets
# datlist1 <- tampv2datalist( pv1 , pvnames = pvnames , Y=Y1 )
# str(datlist1)
#
# # (6) apply statistical models
# library(semTools)
# # define linear regression
# lavmodel <- "
# PVFA ~ Y1 + Y2
# PVFA ~~ PVFA
# "
# mod1 <- semTools::lavaan.mi( lavmodel , data = datlist1 )
# summary(mod1 , standardized=TRUE, rsquare=TRUE)
#
# #############################################################################
# # SIMULATED EXAMPLE 4: Plausible value imputation with measurement
# # errors in covariates
# #############################################################################
#
# library(sirt)
# set.seed(7756)
# N <- 2000 # number of persons
# I <- 10 # number of items
#
# # simulate covariates
# X <- mvrnorm( N , mu=c(0,0) , Sigma = matrix( c(1,.5,.5,1) ,2 ,2 ) )
# colnames(X) <- paste0("X",1:2)
# # second covariate with measurement error with variance var.err
# var.err <- .3
# X.err <- X
# X.err[,2] <-X[,2] + rnorm(N, sd = sqrt(var.err) )
# # simulate theta
# theta <- .5*X[,1] + .4*X[,2] + rnorm( N , sd = .5 )
# # simulate item responses
# itemdiff <- seq( -2 , 2 , length=I) # item difficulties
# dat <- sirt::sim.raschtype( theta , b = itemdiff )
#
# #***********************
# #*** Model 0: Regression model with true variables
# mod0 <- stats::lm( theta ~ X )
# summary(mod0)
#
# #***********************
# #*** Model 1: latent regression model with true covariates X
# xsi.fixed <- cbind( 1:I , itemdiff )
# mod1 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X)
# summary(mod1)
#
# # draw plausible values
# res1a <- tam.pv( mod1 , normal.approx=TRUE , ntheta=200 , samp.regr=TRUE)
# # create list of multiply imputed datasets
# library(miceadds)
# datlist1a <- tampv2datalist( res1a , Y=X )
# imp1a <- miceadds::datalist2mids( datlist1a )
#
# # fit linear model
# # linear regression with measurement errors in X
# lavmodel <- "
# PV.Dim1 ~ X1 + X2true
# X2true =~ 1*X2
# X2 ~~ 0.3*X2 # = var.err
# PV.Dim1 ~~ PV.Dim1
# X2true ~~ X2true
# "
# mod1a <- semTools::lavaan.mi( lavmodel , datlist1a)
# summary(mod1a , standardized=TRUE, rsquare=TRUE)
#
# #***********************
# #*** Model 2: latent regression model with error-prone covariates X.err
# mod2 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X.err)
# summary(mod2)
#
# #***********************
# #*** Model 3: Adjustment of covariates
#
# cov.X.err <- cov( X.err )
# # matrix of variance of measurement errors
# measerr <- diag( c(0,var.err) )
# # true covariance matrix
# cov.X <- cov.X.err - measerr
# # mean of X.err
# mu <- colMeans(X.err)
# muM <- matrix( mu , nrow=nrow(X.err) , ncol=ncol(X.err) , byrow=TRUE)
# # reliability matrix
# W <- solve( cov.X.err ) %*% cov.X
# ident <- diag(2)
# # adjusted scores of X
# X.adj <- ( X.err - muM ) %*% W + muM %*% ( ident - W )
#
# # fit latent regression model
# mod3 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X.adj)
# summary(mod3)
#
# # draw plausible values
# res3a <- tam.pv( mod3 , normal.approx=TRUE , ntheta=200 , samp.regr=TRUE)
#
# # create list of multiply imputed datasets
# library(semTools)
#
# #*** PV dataset 1
# # datalist with error-prone covariates
# datlist3a <- tampv2datalist( res3a , Y=X.err )
# # datalist with adjusted covariates
# datlist3b <- tampv2datalist( res3a , Y=X.adj )
#
# # linear regression with measurement errors in X
# lavmodel <- "
# PV.Dim1 ~ X1 + X2true
# X2true =~ 1*X2
# X2 ~~ 0.3*X2 # = var.err
# PV.Dim1 ~~ PV.Dim1
# X2true ~~ X2true
# "
# mod3a <- semTools::lavaan.mi( lavmodel , datlist3a)
# summary(mod3a , standardized=TRUE, rsquare=TRUE)
#
# lavmodel <- "
# PV.Dim1 ~ X1 + X2
# PV.Dim1 ~~ PV.Dim1
# "
# mod3b <- semTools::lavaan.mi( lavmodel , datlist3b)
# summary(mod3b , standardized=TRUE, rsquare=TRUE)
# # => mod3b leads to the correct estimate.
#
# #*********************************************
# # plausible value imputation for abilities and error-prone
# # covariates using the mice package
#
# library(mice)
# library(miceadds)
#
# # creating the likelihood for plausible value for abilities
# mod11 <- tam.mml( dat , xsi.fixed = xsi.fixed )
# likePV <- IRT.likelihood(mod11)
# # creating the likelihood for error-prone covariate X2
# lavmodel <- "
# X2true =~ 1*X2
# X2 ~~ 0.3*X2
# "
# mod12 <- lavaan::cfa( lavmodel , data = as.data.frame(X.err) )
# summary(mod12)
# likeX2 <- IRTLikelihood.cfa( data= X.err , cfaobj=mod12)
# str(likeX2)
#
# #-- create data input for mice package
# data <- data.frame( "PVA" = NA , "X1" = X[,1] , "X2" = NA )
# vars <- colnames(data)
# V <- length(vars)
# predictorMatrix <- 1 - diag(V)
# rownames(predictorMatrix) <- colnames(predictorMatrix) <- vars
# imputationMethod <- rep("norm" , V )
# names(imputationMethod) <- vars
# imputationMethod[c("PVA","X2")] <- "2l.plausible.values"
#
# #-- create argument lists for plausible value imputation
# # likelihood and theta grid of plausible value derived from IRT model
# like <- list( "PVA" = likePV , "X2" = likeX2 )
# theta <- list( "PVA" = attr(likePV,"theta") ,
# "X2" = attr(likeX2 , "theta") )
# #-- initial imputations
# data.init <- data
# data.init$PVA <- mod11$person$EAP
# data.init$X2 <- X.err[,"X2"]
#
# #-- imputation using the mice and miceadds package
# imp1 <- mice::mice( as.matrix(data) , predictorMatrix = predictorMatrix , m = 4, maxit = 6 ,
# imputationMethod = imputationMethod , allow.na = TRUE ,
# theta=theta , like=like , data.init=data.init )
# summary(imp1)
#
# # compute linear regression
# mod4a <- with( imp1 , stats::lm( PVA ~ X1 + X2 ) )
# summary( mice::pool(mod4a) )
# ## End(Not run)
Run the code above in your browser using DataLab