tam.pv: Plausible Value Imputation

Description

Plausible value imputation for objects of the classes tam and tam.mml (Adams & Wu, 2007). For converting generated plausible values into a list of multiply imputed datasets see tampv2datalist and the Examples 2 and 3 of this function.

Usage

tam.pv(tamobj, nplausible = 10, ntheta = 2000, normal.approx = FALSE,  samp.regr = FALSE, theta.model=FALSE, np.adj=8 )

Arguments

tamobj

Object of class tam or tam.mml

nplausible

Number of plausible values to be drawn

ntheta

Number of ability nodes for plausible value imputation. Note that in this function ability nodes are simulated for the whole sample, not for every person (contrary to the software ConQuest).

normal.approx

An optional logical indicating whether the individual posterior distributions should be approximated by a normal distribution? The default is FALSE. In the case normal.approx=TRUE (normal distribution approximation), the number of ability nodes ntheta can be substantially smaller than 2000, say 200 or 500. The normal approximation is implemented for unidimensional and multidimensional models.

samp.regr

An optional logical indicating whether regression coefficients should be fixed in the plausible value imputation or also sampled from their posterior distribution? The default is FALSE. Sampled regression coefficients are obtained by nonparametric bootstrap.

theta.model

Logical indicating whether the theta grid from the tamobj object should be used for plausible value imputation. In case of normal.approx=TRUE, this should be sufficient in many applications.

np.adj

This parameter defines the "spread" of the random theta values for drawing plausible values when normal.approx=FALSE. If $s_{EAP}$ denotes the standard deviation of the posterior distribution of theta (in the one-dimensional case), then theta is simulated from a normal distribution with standard deviation np.adj times $s_{EAP}$.

...

Further arguments to be passed

Value

pv: A data frame containing a person identifier (pid) and plausible values denoted by PVxx.Dimyy which is the xxth plausible value of dimension yy.
hwt: Individual posterior distribution evaluated at the ability grid theta
hwt1: Cumulated individual posterior distribution
theta: Simulated ability nodes

References

Adams, R. J., & Wu, M. L. (2007). The mixed-coefficients multinomial logit model. A generalized form of the Rasch model. In M. von Davier & C. H. Carstensen (Eds.): Multivariate and mixture distribution Rasch models: Extensions and applications (pp. 55-76). New York: Springer.

Examples

Run this code

#############################################################################
# EXAMPLE 1: Dichotomous unidimensional data sim.rasch
#############################################################################

data(data.sim.rasch)
resp <- data.sim.rasch[ 1:500 , 1:15 ]  # select subsample of students and items

# estimate Rasch model
mod <- tam.mml(resp)

# draw 5 plausible values without a normality
# assumption of the posterior and 2000 ability nodes
pv1a <- tam.pv( mod , nplausible=5 , ntheta=2000 )

# draw 5 plausible values with a normality
# assumption of the posterior and 500 ability nodes
pv1b <- tam.pv( mod , nplausible=5 , ntheta=500 , normal.approx=TRUE )

# distribution of first plausible value from imputation pv1
hist(pv1a$pv$PV1.Dim1 )
# boxplot of all plausible values from imputation pv2
boxplot(pv1b$pv[ , 2:6 ] )

## Not run: 
# #############################################################################
# # EXAMPLE 2: Unidimensional plausible value imputation with
# #            background variables; dataset data.pisaRead from sirt package
# #############################################################################
# 
# data(data.pisaRead, package="sirt")
# dat <- data.pisaRead$data
#   ##   > colnames(dat)
#   ##    [1] "idstud"   "idschool" "female"   "hisei"    "migra"    "R432Q01" 
#   ##    [7] "R432Q05"  "R432Q06"  "R456Q01"  "R456Q02"  "R456Q06"  "R460Q01" 
#   ##   [13] "R460Q05"  "R460Q06"  "R466Q02"  "R466Q03"  "R466Q06" 
# 
# ## Note that reading items have variable names starting with R4
# 
# # estimate 2PL model without covariates
# items <- grep("R4" , colnames(dat) )    # select test items from data
# mod2a <- tam.mml.2pl( resp=dat[,items] )
# summary(mod2a)
# 
# # fix item parameters for plausible value imputation
#    # fix item intercepts by defining xsi.fixed
# xsi0 <- mod2a$xsi$xsi
# xsi.fixed <- cbind( seq(1,length(xsi0)) , xsi0 )
#    # fix item slopes using mod2$B
# # matrix of latent regressors female, hisei and migra
# Y <- dat[ , c("female" , "hisei" , "migra") ]
# mod2b <- tam.mml( resp=dat[,items] , B=mod2a$B , xsi.fixed=xsi.fixed , Y=Y, 
#             pid = dat$idstud)
# 
# # plausible value imputation with normality assumption
# # and ignoring uncertainty about regression coefficients
# #    -> the default is samp.regr=FALSE
# pv2c <- tam.pv( mod2b , nplausible=10 , ntheta=500 , normal.approx=TRUE )
# # sampling of regression coefficients
# pv2d <- tam.pv( mod2b , nplausible=10 , ntheta=500 , samp.regr=TRUE)
# # sampling of regression coefficients, normal approximation using the
# # theta grid from the model
# pv2e <- tam.pv( mod2b , samp.regr=TRUE , theta.model=TRUE , normal.approx=TRUE)
# 
# #--- create list of multiply imputed datasets with plausible values
# # define dataset with covariates to be matched
# Y <- dat[ , c("idstud" , "idschool" , "female" , "hisei" , "migra") ]
# 
# # define plausible value names
# pvnames <- c("PVREAD")
# # create list of imputed datasets
# datlist1 <- tampv2datalist( pv2e , pvnames = pvnames , Y=Y , Y.pid="idstud")
# str(datlist1)
# 
# # create a matrix of covariates with different set of students than in pv2e
# Y1 <- Y[ seq( 1 , 600 , 2 ) , ]
# # create list of multiply imputed datasets
# datlist2 <- tampv2datalist( pv2e , pvnames = c("PVREAD"), Y=Y1 , Y.pid="idstud")
# 
# #--- fit some models in lavaan and semTools
# library(lavaan)
# library(semTools)
# 
# #*** Model 1: Linear regression
# lavmodel <- "
#    PVREAD ~ migra + hisei
#    PVREAD ~~ PVREAD
#         "
# mod1 <- semTools::lavaan.mi( lavmodel , data = datlist1 , m=0)        
# summary(mod1 , standardized=TRUE, rsquare=TRUE)
# 
# # apply lavaan for third imputed dataset
# mod1a <- lavaan::lavaan( lavmodel , data = datlist1[[3]] )        
# summary(mod1a , standardized=TRUE, rsquare=TRUE)
# 
# # compare with mod1 by looping over all datasets
# mod1b <- lapply( datlist1 , FUN = function(dat0){
#     mod1a <- lavaan( lavmodel , data = dat0 )        
#     coef( mod1a)
#         } )
# mod1b        
# mod1b <- matrix( unlist( mod1b ) , ncol= length( coef(mod1)) , byrow=TRUE )
# mod1b
# round( colMeans(mod1b) , 3 )
# coef(mod1)   # -> results coincide
# 
# #*** Model 2: Path model
# lavmodel <- "
#    PVREAD ~ migra + hisei
#    hisei ~ migra
#    PVREAD ~~ PVREAD
#    hisei ~~ hisei
#         "
# mod2 <- semTools::lavaan.mi( lavmodel , data = datlist1 )        
# summary(mod2 , standardized=TRUE, rsquare=TRUE)
# # fit statistics
# inspect( mod2 , what="fit")
# 
# #--- using mitools package
# library(mitools)
# # convert datalist into an object of class imputationList
# datlist1a <- mitools::imputationList( datlist1 )
# # fit linear regression
# mod1c <- with( datlist1a , stats::lm( PVREAD ~ migra + hisei ) )
# summary( mitools::MIcombine(mod1c) )
# 
# #--- using mice package
# library(mice)
# library(miceadds)
# # convert datalist into a mids object
# mids1 <- miceadds::datalist2mids( datlist1 )
# # fit linear regression
# mod1c <- with( mids1 , stats::lm( PVREAD ~ migra + hisei ) )
# summary( mice::pool(mod1c) )
# 
# #############################################################################
# # SIMULATED EXAMPLE 3: Multidimensional plausible value imputation
# #############################################################################
# 
# # (1) simulate some data
# set.seed(6778)
# library(mvtnorm)
# N <- 1000      
# Y <- cbind( stats::rnorm( N ) , stats::rnorm(N) )
# theta <- mvtnorm::rmvnorm( N,mean=c(0,0), sigma=matrix( c(1,.5,.5,1) , 2 , 2 ))
# theta[,1] <- theta[,1] + .4 * Y[,1] + .2 * Y[,2]  # latent regression model
# theta[,2] <- theta[,2] + .8 * Y[,1] + .5 * Y[,2]  # latent regression model
# I <- 20
# p1 <- stats::plogis( outer( theta[,1] , seq( -2 , 2 , len=I ) , "-" ) )
# resp1 <- 1 * ( p1 > matrix( stats::runif( N*I ) , nrow=N , ncol=I ) )
# p1 <- stats::plogis( outer( theta[,2] , seq( -2 , 2 , len=I ) , "-" ) )
# resp2 <- 1 * ( p1 > matrix( stats::runif( N*I ) , nrow=N , ncol=I ) )
# resp <- cbind(resp1,resp2)
# colnames(resp) <- paste("I" , 1:(2*I), sep="")
#   
# # (2) define loading Matrix
# Q <- array( 0 , dim = c( 2*I , 2 ))
# Q[cbind(1:(2*I), c( rep(1,I) , rep(2,I) ))] <- 1
# 
# # (3) fit latent regression model
# mod <- tam.mml( resp=resp , Y=Y , Q=Q , control=list(maxiter=5) )
# 
# # (4) draw plausible values with normal approximation using the orginal theta grid
# pv1 <- tam.pv( mod , normal.approx=TRUE , theta.model = TRUE )
# 
# # (5) convert plausible values to list of imputed datasets
# Y1 <- data.frame(Y)
# colnames(Y1) <- paste0("Y",1:2)
# pvnames <- c("PVFA","PVFB")
# # create list of imputed datasets
# datlist1 <- tampv2datalist( pv1 , pvnames = pvnames , Y=Y1 )
# str(datlist1)
# 
# # (6) apply statistical models
# library(semTools)
# # define linear regression
# lavmodel <- "
#    PVFA ~ Y1 + Y2
#    PVFA ~~ PVFA
#         "
# mod1 <- semTools::lavaan.mi( lavmodel , data = datlist1 )        
# summary(mod1 , standardized=TRUE, rsquare=TRUE)
# 
# #############################################################################
# # SIMULATED EXAMPLE 4: Plausible value imputation with measurement 
# #                      errors in covariates
# #############################################################################
# 
# library(sirt)
# set.seed(7756)
# N <- 2000    # number of persons
# I <- 10     # number of items
# 
# # simulate covariates
# X <- mvrnorm( N , mu=c(0,0) , Sigma = matrix( c(1,.5,.5,1) ,2 ,2 ) )
# colnames(X) <- paste0("X",1:2)
# # second covariate with measurement error with variance var.err
# var.err <- .3
# X.err <- X
# X.err[,2] <-X[,2] + rnorm(N, sd = sqrt(var.err) )
# # simulate theta
# theta <- .5*X[,1] + .4*X[,2] + rnorm( N , sd = .5 )
# # simulate item responses
# itemdiff <- seq( -2 , 2 , length=I)  # item difficulties
# dat <- sirt::sim.raschtype( theta , b = itemdiff )
# 
# #***********************
# #*** Model 0: Regression model with true variables
# mod0 <- stats::lm( theta ~ X )
# summary(mod0)
# 
# #***********************
# #*** Model 1: latent regression model with true covariates X
# xsi.fixed <- cbind( 1:I , itemdiff )
# mod1 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X)
# summary(mod1)
# 
# # draw plausible values
# res1a <- tam.pv( mod1 , normal.approx=TRUE , ntheta=200 , samp.regr=TRUE)
# # create list of multiply imputed datasets
# library(miceadds)
# datlist1a <- tampv2datalist( res1a , Y=X ) 
# imp1a <- miceadds::datalist2mids( datlist1a )
# 
# # fit linear model
# # linear regression with measurement errors in X
# lavmodel <- "
#    PV.Dim1 ~ X1 + X2true
#    X2true =~ 1*X2
#    X2 ~~ 0.3*X2  # = var.err
#    PV.Dim1 ~~ PV.Dim1
#    X2true ~~ X2true
#         "              
# mod1a <- semTools::lavaan.mi( lavmodel , datlist1a)
# summary(mod1a , standardized=TRUE, rsquare=TRUE)
# 
# #***********************
# #*** Model 2: latent regression model with error-prone covariates X.err
# mod2 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X.err)
# summary(mod2)
# 
# #***********************
# #*** Model 3: Adjustment of covariates
# 
# cov.X.err <- cov( X.err )
# # matrix of variance of measurement errors
# measerr <- diag( c(0,var.err) )
# # true covariance matrix
# cov.X <- cov.X.err - measerr
# # mean of X.err
# mu <- colMeans(X.err)
# muM <- matrix( mu , nrow=nrow(X.err) , ncol=ncol(X.err) , byrow=TRUE)
# # reliability matrix
# W <- solve( cov.X.err ) %*% cov.X
# ident <- diag(2)
# # adjusted scores of X
# X.adj <- ( X.err - muM ) %*% W   + muM %*% ( ident - W )
# 
# # fit latent regression model
# mod3 <- tam.mml( dat , xsi.fixed=xsi.fixed , Y=X.adj)
# summary(mod3)
# 
# # draw plausible values
# res3a <- tam.pv( mod3 , normal.approx=TRUE , ntheta=200 , samp.regr=TRUE)
# 
# # create list of multiply imputed datasets
# library(semTools)
# 
# #*** PV dataset 1
# # datalist with error-prone covariates
# datlist3a <- tampv2datalist( res3a , Y=X.err ) 
# # datalist with adjusted covariates
# datlist3b <- tampv2datalist( res3a , Y=X.adj ) 
# 
# # linear regression with measurement errors in X
# lavmodel <- "
#    PV.Dim1 ~ X1 + X2true
#    X2true =~ 1*X2
#    X2 ~~ 0.3*X2  # = var.err
#    PV.Dim1 ~~ PV.Dim1
#    X2true ~~ X2true
#         "              
# mod3a <- semTools::lavaan.mi( lavmodel , datlist3a)
# summary(mod3a , standardized=TRUE, rsquare=TRUE)
# 
# lavmodel <- "
#    PV.Dim1 ~ X1 + X2
#    PV.Dim1 ~~ PV.Dim1
#         "     
# mod3b <- semTools::lavaan.mi( lavmodel , datlist3b)
# summary(mod3b , standardized=TRUE, rsquare=TRUE)
# # => mod3b leads to the correct estimate.
# 
# #*********************************************
# # plausible value imputation for abilities and error-prone
# # covariates using the mice package
# 
# library(mice)
# library(miceadds)
# 
# # creating the likelihood for plausible value for abilities
# mod11 <- tam.mml( dat , xsi.fixed = xsi.fixed )
# likePV <- IRT.likelihood(mod11)
# # creating the likelihood for error-prone covariate X2
# lavmodel <- "
#   X2true =~ 1*X2
#   X2 ~~ 0.3*X2
#     "
# mod12 <- lavaan::cfa( lavmodel , data = as.data.frame(X.err) )
# summary(mod12)
# likeX2 <- IRTLikelihood.cfa( data= X.err , cfaobj=mod12)
# str(likeX2)
# 
# #-- create data input for mice package
# data <- data.frame( "PVA" = NA , "X1" = X[,1] , "X2" = NA  ) 
# vars <- colnames(data)
# V <- length(vars)
# predictorMatrix <- 1 - diag(V)
# rownames(predictorMatrix) <- colnames(predictorMatrix) <- vars
# imputationMethod <- rep("norm" , V )
# names(imputationMethod) <- vars
# imputationMethod[c("PVA","X2")] <- "2l.plausible.values"
# 
# #-- create argument lists for plausible value imputation
# # likelihood and theta grid of plausible value derived from IRT model
# like <- list( "PVA" = likePV  , "X2" = likeX2 )
# theta <- list( "PVA" = attr(likePV,"theta") ,
#                 "X2" = attr(likeX2 , "theta") )                     
# #-- initial imputations
# data.init <- data
# data.init$PVA <- mod11$person$EAP
# data.init$X2 <- X.err[,"X2"]
# 
# #-- imputation using the mice and miceadds package
# imp1 <- mice::mice( as.matrix(data) , predictorMatrix = predictorMatrix , m = 4, maxit = 6 ,  
#              imputationMethod = imputationMethod ,  allow.na = TRUE ,
#              theta=theta , like=like , data.init=data.init )
# summary(imp1)
# 
# # compute linear regression
# mod4a <- with( imp1 , stats::lm( PVA ~ X1 + X2 ) )
# summary( mice::pool(mod4a) )
# ## End(Not run)