data.read: Dataset Reading

Description

This dataset contains $N=328$ students and $I=12$ items measuring reading competence. All 12 items are arranged into 3 testlets (items with common text stimulus) labeled as A, B and C. The allocation of items to testlets is indicated by their variable names.

Usage

data(data.read)

Arguments

format

A data frame with 328 persons on the following 12 variables. Rows correspond to persons and columns to items. The following items are included in data.read: Testlet A: A1, A2, A3, A4 Testlet B: B1, B2, B3, B4 Testlet C: C1, C2, C3, C4

Example Index

class.accuracy.rasch (Example 1), fit.isop (Example 1), greenyang.reliability (Example 1), isop.dich (Example 1), isop.scoring (Example 1), lavaan2mirt (Example 1), marginal.truescore.reliability (Example 3), mcmc.2pno (Example 1), mcmc.3pno.testlet (Example 1), mcmclist2coda (Example 1), mirt.wrapper (Example 2), mle.pcm.group (Example 2), modelfit.sirt (Example 1), noharm.sirt (Example 1), np.dich (Example 1), person.parameter.rasch.copula (Example 1), personfit.stat (Example 1), prmse.subscores.scales (Example 1), prob.guttman (Example 1), rasch.copula2 (Example 1), rasch.jml.biascorr (Example 1), rasch.mirtlc (Example 1), rasch.mml2 (Example 1), rasch.pairwise (Example 1), rasch.pml2 (Example 1), rasch.prox (Example 1), smirt (Example 2), stratified.cronbach.alpha (Example 1), tam2mirt (Example 1), testlet.yen.q3 (Example 1), tetrachoric2 (Example 1), truescore.irt (Example 2), unidim.test.csn (Example 1), wle.rasch (Example 1), wle.rasch.jackknife (Example 1), yen.q3 (Example 1),

Examples

Run this code

data(data.read)
dat <- data.read
I <- ncol(dat)

# list of needed packages for the following examples
packages <- scan(what="character")
     eRm  ltm  TAM mRm  CDM  mirt psychotools  IsingFit  igraph  qgraph  pcalg 
     poLCA  randomLCA psychomix MplusAutomation
     
# install packages     
if (FALSE){  # default is FALSE
   install.packages(packages)
   	  }
# load packages
for (pack in packages){ 
    library(pack, character.only=TRUE) 
        }

#*****************************************************
# Model 1: Rasch model
#*****************************************************

#----  M1a: rasch.mml2 (in sirt)
mod1a <- sirt::rasch.mml2(dat)
summary(mod1a)

#----  M1b: smirt (in sirt)
Qmatrix <- matrix(1,nrow=I , ncol=1)
mod1b <- sirt::smirt(dat,Qmatrix=Qmatrix)
summary(mod1b)

#----  M1c: gdm (in CDM)
theta.k <- seq(-6,6,len=21)
mod1c <- CDM::gdm(dat,theta.k=theta.k,irtmodel="1PL", skillspace="normal")
summary(mod1c)

#----  M1d: tam.mml (in TAM)
mod1d <- TAM::tam.mml( resp=dat )
summary(mod1d)

#----  M1e: RM (in eRm) 
mod1e <- eRm::RM( dat )
  # eRm uses Conditional Maximum Likelihood (CML) as the estimation method.
summary(mod1e)
eRm::plotPImap(mod1e)

#----  M1f: mrm (in mRm)
mod1f <- mRm::mrm( dat , cl=1)   # CML estimation
mod1f$beta  # item parameters

#----  M1g: mirt (in mirt)
mod1g <- mirt::mirt( dat , model=1 , itemtype="Rasch" , verbose=TRUE )
print(mod1g)
summary(mod1g)
coef(mod1g)
    # arrange coefficients in nicer layout
mirt.wrapper.coef(mod1g)$coef  

#----  M1h: rasch (in ltm)
mod1h <- ltm::rasch( dat  , control=list(verbose=TRUE ) )
summary(mod1h)
coef(mod1h)

#----  M1i: RaschModel.fit (in psychotools)
mod1i <- psychotools::RaschModel.fit(dat)  # CML estimation
summary(mod1i)
plot(mod1i)

#----  M1j: noharm.sirt (in sirt)
Fpatt <- matrix( 0 , I , 1 )
Fval <- 1 + 0*Fpatt
Ppatt <- Pval <- matrix(1,1,1)
mod1j <- sirt::noharm.sirt( dat=dat , Ppatt=Ppatt,Fpatt=Fpatt , Fval=Fval , Pval=Pval )
summary(mod1j)
  #   Normal-ogive model, multiply item discriminations with constant D=1.7. 
  #   The same holds for other examples with noharm.sirt and R2noharm.
plot(mod1j) 

#----  M1k: rasch.pml3 (in sirt)
mod1k <- sirt::rasch.pml3( dat=dat)
  #         pairwise marginal maximum likelihood estimation
summary(mod1k)

#----  M1l: running Mplus (using MplusAutomation package)
mplus_path <- "c:/Mplus7/Mplus.exe"    # locate Mplus executable
  # specify Mplus object
mplusmod <- MplusAutomation::mplusObject(
    TITLE = "1PL in Mplus ;" , 
    VARIABLE = paste0( "CATEGORICAL ARE " , paste0(colnames(dat),collapse=" ") ) , 
    MODEL = "
       ! fix all item loadings to 1
       F1 BY A1@1 A2@1 A3@1 A4@1 ;
       F1 BY B1@1 B2@1 B3@1 B4@1 ;
       F1 BY C1@1 C2@1 C3@1 C4@1 ;
       ! estimate variance
       F1 ;
            ",
    ANALYSIS = "ESTIMATOR=MLR;" , 
    OUTPUT = "stand;" ,
    usevariables = colnames(dat)  ,  rdata = dat )
  # write Mplus syntax
filename <- "mod1u"   # specify file name
  # create Mplus syntaxes
res2 <- MplusAutomation::mplusModeler(object = mplusmod , dataout = paste0(filename,".dat") , 
               modelout= paste0(filename,".inp"), run = 0 )         
  # run Mplus model
MplusAutomation::runModels( filefilter = inpfile ,  Mplus_command = mplus_path)
  # alternatively, the system() command can also be used
  # get results
mod1l <- MplusAutomation::readModels(target = getwd() , filefilter = filename )
mod1l$summaries    # summaries
mod1l$parameters$unstandardized   # parameter estimates

#*****************************************************
# Model 2: 2PL model
#*****************************************************

#----  M2a: rasch.mml2 (in sirt)
mod2a <- sirt::rasch.mml2(dat , est.a=1:I)
summary(mod2a)

#----  M2b: smirt (in sirt)
mod2b <- sirt::smirt(dat,Qmatrix=Qmatrix,est.a="2PL")
summary(mod2b)

#----  M2c: gdm (in CDM)
mod2c <- CDM::gdm(dat,theta.k=theta.k,irtmodel="2PL", skillspace="normal")
summary(mod2c)

#----  M2d: tam.mml (in TAM)
mod2d <- TAM::tam.mml.2pl( resp=dat )
summary(mod2d)

#----  M2e: mirt (in mirt)
mod2e <- mirt::mirt( dat , model=1 , itemtype="2PL" )
print(mod2e)
summary(mod2e)
mirt.wrapper.coef(mod1g)$coef

#----  M2f: ltm (in ltm)
mod2f <- ltm::ltm( dat ~ z1 , control=list(verbose=TRUE ) )
summary(mod2f)
coef(mod2f)
plot(mod2f)

#----  M2g: R2noharm (in NOHARM, running from within R using sirt package)
  # define noharm.path where 'NoharmCL.exe' is located
noharm.path <- "c:/NOHARM"
  # covariance matrix
P.pattern <- matrix( 1 , ncol=1 , nrow=1 )
P.init <- P.pattern
P.init[1,1] <- 1
  # loading matrix
F.pattern <- matrix(1,I,1)
F.init <- F.pattern
  # estimate model
mod2g <- sirt::R2noharm( dat = dat , model.type="CFA" , F.pattern = F.pattern , 
             F.init = F.init , P.pattern = P.pattern , P.init = P.init , 
             writename = "ex2g" , noharm.path = noharm.path  , dec ="," )
summary(mod2g)

#----  M2h: noharm.sirt (in sirt)
mod2h <- sirt::noharm.sirt( dat=dat , Ppatt=P.pattern,Fpatt=F.pattern , 
              Fval=F.init , Pval=P.init )
summary(mod2h)
plot(mod2h)

#----  M2i: rasch.pml2 (in sirt)
mod2i <- sirt::rasch.pml2(dat, est.a=1:I)
summary(mod2i)

#*****************************************************
# Model 3: 3PL model (note that results can be quite unstable!)
#*****************************************************

#----  M3a: rasch.mml2 (in sirt)
mod3a <- sirt::rasch.mml2(dat , est.a=1:I, est.c=1:I)
summary(mod3a)

#----  M3b: smirt (in sirt)
mod3b <- sirt::smirt(dat,Qmatrix=Qmatrix,est.a="2PL" , est.c=1:I)
summary(mod3b)

#----  M3c: mirt (in mirt)
mod3c <- mirt::mirt( dat , model=1 , itemtype="3PL" , verbose=TRUE)
summary(mod3c)
coef(mod3c)
  # stabilize parameter estimating using informative priors for guessing parameters
mirtmodel <- mirt::mirt.model("
            F = 1-12
            PRIOR = (1-12, g, norm, -1.38, 0.25)
            ")
  # a prior N(-1.38,.25) is specified for transformed guessing parameters: qlogis(g)
  # simulate values from this prior for illustration
N <- 100000
logit.g <- rnorm(N, mean=-1.38 , sd=sqrt(.5) )
plot( density(logit.g) )  # transformed qlogis(g)
plot( density(plogis(logit.g)) )  # g parameters
  # estimate 3PL with priors
mod3c1 <- mirt::mirt(dat, mirtmodel, itemtype = "3PL",verbose=TRUE)
coef(mod3c1)
  # In addition, set upper bounds for g parameters of .35
mirt.pars <- mirt::mirt( dat , mirtmodel , itemtype = "3PL" ,  pars="values")
ind <- which( mirt.pars$name == "g" )
mirt.pars[ ind , "value" ] <- plogis(-1.38)
mirt.pars[ ind , "ubound" ] <- .35
  # prior distribution for slopes
ind <- which( mirt.pars$name == "a1" )
mirt.pars[ ind , "prior_1" ] <- 1.3
mirt.pars[ ind , "prior_2" ] <- 2
mod3c2 <- mirt::mirt(dat, mirtmodel, itemtype = "3PL",
                pars=mirt.pars,verbose=TRUE , technical=list(NCYCLES=100) )
coef(mod3c2)
mirt.wrapper.coef(mod3c2)

#----  M3d: ltm (in ltm)
mod3d <- ltm::tpm( dat , control=list(verbose=TRUE ) , max.guessing=.3)
summary(mod3d)
coef(mod3d) # => numerical instabilities

#*****************************************************
# Model 4: 3-dimensional Rasch model
#*****************************************************

# define Q-matrix
Q <- matrix( 0 , nrow=12 , ncol=3 )
Q[ cbind(1:12 , rep(1:3,each=4) ) ] <- 1
rownames(Q) <- colnames(dat)
colnames(Q) <- c("A","B","C")

# define nodes
theta.k <- seq(-6,6,len=13 )

#----  M4a: smirt (in sirt)
mod4a <- sirt::smirt(dat,Qmatrix=Q,irtmodel="comp" , theta.k=theta.k , maxiter=30)
summary(mod4a)

#----  M4b: rasch.mml2 (in sirt)
mod4b <- sirt::rasch.mml2(dat,Q=Q,theta.k=theta.k , mmliter=30)
summary(mod4b)

#----  M4c: gdm (in CDM)
mod4c <- CDM::gdm( dat , irtmodel="1PL" , theta.k=theta.k , skillspace="normal" , 
            Qmatrix=Q , maxiter=30 , centered.latent=TRUE )
summary(mod4c)

#----  M4d: tam.mml (in TAM)
mod4d <- TAM::tam.mml( resp=dat , Q=Q , control=list(nodes=theta.k , maxiter=30) )
summary(mod4d)

#----  M4e: R2noharm (in NOHARM, running from within R using sirt package)
noharm.path <- "c:/NOHARM"
  # covariance matrix
P.pattern <- matrix( 1 , ncol=3 , nrow=3 )
P.init <- 0.8+0*P.pattern
diag(P.init) <- 1
  # loading matrix
F.pattern <- 0*Q
F.init <- Q
  # estimate model
mod4e <- sirt::R2noharm( dat = dat , model.type="CFA" , F.pattern = F.pattern , 
    F.init = F.init , P.pattern = P.pattern , P.init = P.init , 
    writename = "ex4e" , noharm.path = noharm.path  , dec ="," )
summary(mod4e)

#----  M4f: mirt (in mirt)
cmodel <- mirt::mirt.model("
     F1 = 1-4
     F2 = 5-8 
     F3 = 9-12
     # equal item slopes correspond to the Rasch model
     CONSTRAIN = (1-4, a1), (5-8, a2) , (9-12,a3)
     COV = F1*F2, F1*F3 , F2*F3 
     " )
mod4f <- mirt::mirt(dat, cmodel , verbose=TRUE)
summary(mod4f)

#*****************************************************
# Model 5: 3-dimensional 2PL model
#*****************************************************

#----  M5a: smirt (in sirt)
mod5a <- sirt::smirt(dat,Qmatrix=Q,irtmodel="comp" , est.a="2PL" , theta.k=theta.k , 
                 maxiter=30)
summary(mod5a)

#----  M5b: rasch.mml2 (in sirt)
mod5b <- sirt::rasch.mml2(dat,Q=Q,theta.k=theta.k ,est.a=1:12, mmliter=30)
summary(mod5b)

#----  M5c: gdm (in CDM)
mod5c <- CDM::gdm( dat , irtmodel="2PL" , theta.k=theta.k , skillspace="loglinear" , 
            Qmatrix=Q , maxiter=30 , centered.latent=TRUE ,
            standardized.latent=TRUE)
summary(mod5c)

#----  M5d: tam.mml (in TAM)
mod5d <- TAM::tam.mml.2pl( resp=dat , Q=Q , control=list(nodes=theta.k , maxiter=30) )
summary(mod5d)

#----  M5e: R2noharm (in NOHARM, running from within R using sirt package)
noharm.path <- "c:/NOHARM"
  # covariance matrix
P.pattern <- matrix( 1 , ncol=3 , nrow=3 )
diag(P.pattern) <- 0
P.init <- 0.8+0*P.pattern
diag(P.init) <- 1
  # loading matrix
F.pattern <- Q
F.init <- Q
  # estimate model
mod5e <- sirt::R2noharm( dat = dat , model.type="CFA" , F.pattern = F.pattern , 
    F.init = F.init , P.pattern = P.pattern , P.init = P.init , 
    writename = "ex5e" , noharm.path = noharm.path  , dec ="," )
summary(mod5e)

#----  M5f: mirt (in mirt)
cmodel <- mirt::mirt.model("
   F1 = 1-4
   F2 = 5-8
   F3 = 9-12
   COV = F1*F2, F1*F3 , F2*F3 
   "  )
mod5f <- mirt::mirt(dat, cmodel , verbose=TRUE)
summary(mod5f)

#*****************************************************
# Model 6: Network models (Graphical models)
#*****************************************************

#----  M6a: Ising model using the IsingFit package (undirected graph)
#        - fit Ising model using the "OR rule" (AND=FALSE)
mod6a <- IsingFit::IsingFit(x=dat, family="binomial" , AND=FALSE)
summary(mod6a)
##           Network Density:                 0.29 
##    Gamma:                  0.25 
##    Rule used:              Or-rule 
# plot results
qgraph(mod6a$weiadj,fade = FALSE)

#**-- graph estimation using pcalg package

# some packages from Bioconductor must be downloaded at first (if not yet done)
if (FALSE){  # set 'if (TRUE)' if packages should be downloaded 
     source("http://bioconductor.org/biocLite.R")
     biocLite("RBGL")
     biocLite("Rgraphviz")
	}

#----  M6b: graph estimation based on Pearson correlations
V <- colnames(dat)
n <- nrow(dat)
mod6b <- pcalg::pc(suffStat = list(C = cor(dat), n = n ),
             indepTest = gaussCItest, ## indep.test: partial correlations
             alpha=0.05, labels = V, verbose = TRUE)
plot(mod6b)
# plot in qgraph package
qgraph::qgraph(mod6b , label.color= rep( c( "red" , "blue","darkgreen" ) , each=4 ) ,
         edge.color="black")
summary(mod6b)

#----  M6c: graph estimation based on tetrachoric correlations
mod6c <- pcalg::pc(suffStat = list(C = tetrachoric2(dat)$rho, n = n ),
             indepTest = gaussCItest, alpha=0.05, labels = V, verbose = TRUE)
plot(mod6c)
summary(mod6c)

#----  M6d: Statistical implicative analysis (in sirt)
mod6d <- sirt::sia.sirt(dat , significance=.85 )
  # plot results with igraph and qgraph package
plot( mod6d$igraph.obj  , vertex.shape="rectangle" , vertex.size=30 )
qgraph::qgraph( mod6d$adj.matrix )

#*****************************************************
# Model 7: Latent class analysis with 3 classes
#*****************************************************

#----  M7a: randomLCA (in randomLCA)
  #        - use two trials of starting values
mod7a <- randomLCA::randomLCA(dat,nclass=3, notrials=2, verbose=TRUE)
summary(mod7a)
plot(mod7a,type="l" , xlab="Item")

#----  M7b: rasch.mirtlc (in sirt)
mod7b <- sirt::rasch.mirtlc( dat , Nclasses = 3 ,seed= -30 ,  nstarts=2 )   
summary(mod7b)
matplot( t(mod7b$pjk) , type="l" , xlab="Item" )

#----  M7c: poLCA (in poLCA)
  #   define formula for outcomes
f7c <- paste0( "cbind(" , paste0(colnames(dat),collapse=",") , ") ~ 1 " )
dat1 <- as.data.frame( dat + 1 ) # poLCA needs integer values from 1,2,..
mod7c <- poLCA::poLCA( as.formula(f7c),dat1,nclass=3 , verbose=TRUE)
plot(mod7c)

#----  M7d: gom.em (in sirt) 
  #    - the latent class model is a special grade of membership model
mod7d <- sirt::gom.em( dat , K=3 , problevels=c(0,1) ,  model="GOM"  )            
summary(mod7d)

#---- - M7e: mirt (in mirt)
  # define three latent classes
Theta <- diag(3)
  # define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        C1 = 1-12
        C2 = 1-12 
        C3 = 1-12
        ")
  # get initial parameter values
mod.pars <- mirt::mirt(dat, model=mirtmodel ,  pars = "values")   
  # modify parameters: only slopes refer to item-class probabilities
set.seed(9976)
  # set starting values for class specific item probabilities
mod.pars[ mod.pars$name == "d" ,"value" ]  <- 0
mod.pars[ mod.pars$name == "d" ,"est" ]  <- FALSE
b1 <- qnorm( colMeans( dat ) )
mod.pars[ mod.pars$name == "a1" ,"value" ]  <- b1
  # random starting values for other classes
mod.pars[ mod.pars$name %in% c("a2","a3") ,"value" ]  <- b1+runif( 12*2 , -1 ,1 )
mod.pars
  #** define prior for latent class analysis
lca_prior <- function(Theta,Etable){
  # number of latent Theta classes
  TP <- nrow(Theta)
  # prior in initial iteration
  if ( is.null(Etable) ){ prior <- rep( 1/TP , TP ) }    
  # process Etable (this is correct for datasets without missing data)
  if ( ! is.null(Etable) ){  
    # sum over correct and incorrect expected responses 
    prior <- ( rowSums(Etable[ , seq(1,2*I,2)]) + rowSums(Etable[,seq(2,2*I,2)]) )/I
                 }
  prior <- prior / sum(prior)  
  return(prior)
}
  #** estimate model
mod7e <- mirt::mirt(dat, mirtmodel , pars = mod.pars , verbose=TRUE , 
            technical = list( customTheta=Theta , customPriorFun = lca_prior) )
  # compare estimated results
print(mod7e)
summary(mod7b)
  # The number of estimated parameters is incorrect because mirt does not correctly count
  # estimated parameters from the user customized  prior distribution.
mod7e@nest <- as.integer(sum(mod.pars$est) + 2)  # two additional class probabilities
  # extract log-likelihood
mod7e@logLik
  # compute AIC and BIC
( AIC <- -2*mod7e@logLik+2*mod7e@nest )
( BIC <- -2*mod7e@logLik+log(mod7e@Data$N)*mod7e@nest )
  # RMSEA and SRMSR fit statistic
mirt::M2(mod7e)     # TLI and CFI does not make sense in this example            
  #** extract item parameters
mirt.wrapper.coef(mod7e)
  #** extract class-specific item-probabilities
probs <- apply( coef1[ , c("a1","a2","a3") ] , 2 , plogis )
matplot( probs , type="l" , xlab="Item" , main="mirt::mirt")
  #** inspect estimated distribution
mod7e@Theta
mod7e@Prior[[1]]

#*****************************************************
# Model 8: Mixed Rasch model with two classes
#*****************************************************

#----  M8a: raschmix (in psychomix)
mod8a <- psychomix::raschmix(data= as.matrix(dat) , k = 2, scores = "saturated")
summary(mod8a)

#----  M8b: mrm (in mRm)
mod8b <- mRm::mrm(data.matrix=dat, cl=2)
mod8b$conv.to.bound
plot(mod8b)
print(mod8b)

#----  M8c: mirt (in mirt)
  #* define theta grid
theta.k <- seq( -5 , 5 , len=9 )
TP <- length(theta.k)
Theta <- matrix( 0 , nrow=2*TP , ncol=4)
Theta[1:TP,1:2] <- cbind(theta.k , 1 )
Theta[1:TP + TP,3:4] <- cbind(theta.k , 1 )
Theta
  # define model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        F1a = 1-12  # slope Class 1
        F1b = 1-12  # difficulty Class 1
        F2a = 1-12  # slope Class 2
        F2b = 1-12  # difficulty Class 2
        CONSTRAIN = (1-12,a1),(1-12,a3)
        ")
  # get initial parameter values
mod.pars <- mirt::mirt(dat, model=mirtmodel ,  pars = "values")   
  # set starting values for class specific item probabilities
mod.pars[ mod.pars$name == "d" ,"value" ]  <- 0
mod.pars[ mod.pars$name == "d" ,"est" ]  <- FALSE
mod.pars[ mod.pars$name == "a1" ,"value" ]  <- 1
mod.pars[ mod.pars$name == "a3" ,"value" ]  <- 1
  # initial values difficulties
b1 <-  qlogis( colMeans(dat) )
mod.pars[ mod.pars$name == "a2" ,"value" ]  <- b1
mod.pars[ mod.pars$name == "a4" ,"value" ]  <- b1 + runif(I , -1 , 1)
  #* define prior for mixed Rasch analysis
mixed_prior <- function(Theta,Etable){
  NC <- 2   # number of theta classes
  TP <- nrow(Theta) / NC
  prior1 <- dnorm( Theta[1:TP,1] )
  prior1 <- prior1 / sum(prior1) 
  if ( is.null(Etable) ){   prior <- c( prior1 , prior1 ) }
  if ( ! is.null(Etable) ){  
    prior <- ( rowSums( Etable[ , seq(1,2*I,2)] ) + 
                   rowSums( Etable[,seq(2,2*I,2)]) )/I
    a1 <- aggregate( prior , list( rep(1:NC , each=TP) ) , sum )
    a1[,2] <- a1[,2] / sum( a1[,2])
    # print some information during estimation
    cat( paste0( " Class proportions: " , 
              paste0( round(a1[,2] , 3 ) , collapse= " " ) ) , "\n")
    a1 <- rep( a1[,2] , each=TP )
    # specify mixture of two normal distributions
    prior <- a1*c(prior1,prior1)
         }  
  prior <- prior / sum(prior)  
  return(prior)
     }
  #* estimate model
mod8c <- mirt::mirt(dat, mirtmodel , pars=mod.pars , verbose=TRUE ,
        technical = list(  customTheta=Theta , customPriorFun = mixed_prior ) )
  # Like in Model 7e, the number of estimated parameters must be included.
mod8c@nest <- as.integer(sum(mod.pars$est) + 1)  
      # two class proportions and therefore one probability is freely estimated.
  #* extract item parameters
mirt.wrapper.coef(mod8c)
  #* estimated distribution
mod8c@Theta
mod8c@Prior

#*****************************************************
# Model 9: Mixed 2PL model with two classes
#*****************************************************

#---- - TO BE INCLUDED.

#*****************************************************
# Model 10: Rasch testlet model
#*****************************************************

#----  M10a: tam.fa (in TAM)
dims <- substring( colnames(dat),1,1 )  # define dimensions
mod10a <- TAM::tam.fa( resp=dat , irtmodel="bifactor1" , dims=dims ,
                control=list(maxiter=60) )
summary(mod10a)

#----  M10b: mirt (in mirt)
cmodel <- mirt::mirt.model("
        G = 1-12 
        A = 1-4
        B = 5-8
        C = 9-12
        CONSTRAIN = (1-12,a1), (1-4, a2), (5-8, a3) , (9-12,a4)    
      ")
mod10b <- mirt::mirt(dat, model=cmodel , verbose=TRUE)
summary(mod10b)
coef(mod10b)
mod10b@logLik   # equivalent is slot( mod10b , "logLik")

#alternatively, using a dimensional reduction approach (faster and better accuracy)
cmodel <- mirt::mirt.model("
      G = 1-12
      CONSTRAIN = (1-12,a1), (1-4, a2), (5-8, a3) , (9-12,a4)
     ")
item_bundles <- rep(c(1,2,3), each = 4)
mod10b1 <- mirt::bfactor(dat, model=item_bundles, model2=cmodel , verbose=TRUE)
coef(mod10b1)

#----  M10c: smirt (in sirt)
  # define Q-matrix
Qmatrix <- matrix(0,12,4)
Qmatrix[,1] <- 1
Qmatrix[ cbind( 1:12 , match( dims , unique(dims)) +1 ) ]  <- 1
  # uncorrelated factors
variance.fixed <- cbind( c(1,1,1,2,2,3) , c(2,3,4,3,4,4) , 0 )
  # estimate model
mod10c <- sirt::smirt( dat , Qmatrix=Qmatrix , irtmodel="comp" ,
              variance.fixed=variance.fixed , qmcnodes=1000 , maxiter=60)
summary(mod10c)

#*****************************************************
# Model 11: Bifactor model
#*****************************************************

#----  M11a: tam.fa (in TAM)
dims <- substring( colnames(dat),1,1 )  # define dimensions
mod11a <- TAM::tam.fa( resp=dat , irtmodel="bifactor2" , dims=dims ,
                 control=list(maxiter=60) )
summary(mod11a)

#----  M11b: bfactor (in mirt)
dims1 <- match( dims , unique(dims) )
mod11b <- mirt::bfactor(dat, model=dims1 , verbose=TRUE)
summary(mod11b)
coef(mod11b)
mod11b@logLik

#----  M11c: smirt (in sirt)
  # define Q-matrix
Qmatrix <- matrix(0,12,4)
Qmatrix[,1] <- 1
Qmatrix[ cbind( 1:12 , match( dims , unique(dims)) +1 ) ]  <- 1
  # uncorrelated factors
variance.fixed <- cbind( c(1,1,1,2,2,3) , c(2,3,4,3,4,4) , 0 )
  # estimate model
mod11c <- sirt::smirt( dat , Qmatrix=Qmatrix , irtmodel="comp" , est.a="2PL" ,  
                variance.fixed=variance.fixed , qmcnodes=1000 , maxiter=60)
summary(mod11c)

#*****************************************************
# Model 12: Located latent class model: Rasch model with three theta classes
#*****************************************************

# use 10th item as the reference item
ref.item <- 10
# ability grid
theta.k <- seq(-4,4,len=9)

#----  M12a: rasch.mirtlc (in sirt)
mod12a <- sirt::rasch.mirtlc(dat , Nclasses=3, modeltype="MLC1" , ref.item=ref.item )
summary(mod12a)

#----  M12b: gdm (in CDM)
theta.k <- seq(-1 , 1 , len=3)      # initial matrix
b.constraint <- matrix( c(10,1,0) , nrow=1,ncol=3)
  # estimate model
mod12b <- CDM::gdm( dat , theta.k = theta.k , skillspace="est" , irtmodel="1PL",
              b.constraint=b.constraint , maxiter=200)
summary(mod12b)

#----  M12c: mirt (in mirt)
items <- colnames(dat)
  # define three latent classes
Theta <- diag(3)
  # define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        C1 = 1-12
        C2 = 1-12 
        C3 = 1-12
        CONSTRAIN = (1-12,a1),(1-12,a2),(1-12,a3)
        ")
  # get parameters
mod.pars <- mirt(dat, model=mirtmodel ,  pars = "values")   
 # set starting values for class specific item probabilities
mod.pars[ mod.pars$name == "d" ,"value" ]  <- qlogis( colMeans(dat,na.rm=TRUE) )
  # set item difficulty of reference item to zero
ind <- which( ( paste(mod.pars$item) == items[ref.item] ) & 
               ( ( paste(mod.pars$name) == "d" ) ) )                        
mod.pars[ ind ,"value" ]  <- 0
mod.pars[ ind ,"est" ]  <- FALSE
  # initial values for a1, a2 and a3
mod.pars[ mod.pars$name %in% c("a1","a2","a3") ,"value" ]  <- c(-1,0,1)
mod.pars
  #* define prior for latent class analysis
lca_prior <- function(Theta,Etable){
  # number of latent Theta classes
  TP <- nrow(Theta)
  # prior in initial iteration
  if ( is.null(Etable) ){
    prior <- rep( 1/TP , TP )
              }    
  # process Etable (this is correct for datasets without missing data)
  if ( ! is.null(Etable) ){  
    # sum over correct and incorrect expected responses 
    prior <- ( rowSums( Etable[ , seq(1,2*I,2)] ) + rowSums( Etable[ , seq(2,2*I,2)] ) )/I
            }
  prior <- prior / sum(prior)  
  return(prior)
   }
 #* estimate model
mod12c <- mirt(dat, mirtmodel , technical = list(
            customTheta=Theta , customPriorFun = lca_prior) ,
            pars = mod.pars , verbose=TRUE )
  # estimated parameters from the user customized  prior distribution.
mod12c@nest <- as.integer(sum(mod.pars$est) + 2)            
  #* extract item parameters
coef1 <- mirt.wrapper.coef(mod12c)
  #* inspect estimated distribution
mod12c@Theta
coef1$coef[1,c("a1","a2","a3")]
mod12c@Prior[[1]]

#*****************************************************
# Model 13: Multidimensional model with discrete traits
#*****************************************************
# define Q-Matrix
Q <- matrix( 0 , nrow=12,ncol=3)
Q[1:4,1] <- 1
Q[5:8,2] <- 1
Q[9:12,3] <- 1
# define discrete theta distribution with 3 dimensions
Theta <- scan(what="character",nlines=1)
  000 100 010 001 110 101 011 111
Theta <- as.numeric( unlist( lapply( Theta , strsplit , split="")   ) )
Theta <- matrix(Theta , 8 , 3 , byrow=TRUE )
Theta

#----  Model 13a: din (in CDM)
mod13a <- CDM::din( dat , q.matrix=Q , rule="DINA")
summary(mod13a)
# compare used Theta distributions
cbind( Theta , mod13a$attribute.patt.splitted)

#----  Model 13b: gdm (in CDM)
mod13b <- CDM::gdm( dat , Qmatrix=Q , theta.k=Theta , skillspace="full")
summary(mod13b)

#----  Model 13c: mirt (in mirt)
  # define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        F1 = 1-4
        F2 = 5-8
        F3 = 9-12
        ")
  # get parameters
mod.pars <- mirt(dat, model=mirtmodel ,  pars = "values")   
# starting values d parameters (transformed guessing parameters)
ind <- which(  mod.pars$name == "d"  )
mod.pars[ind,"value"] <- qlogis(.2)
# starting values transformed slipping parameters
ind <- which( ( mod.pars$name %in% paste0("a",1:3)  ) &  ( mod.pars$est ) )
mod.pars[ind,"value"] <- qlogis(.8) - qlogis(.2)
mod.pars

  #* define prior for latent class analysis
lca_prior <- function(Theta,Etable){
  TP <- nrow(Theta)
  if ( is.null(Etable) ){
    prior <- rep( 1/TP , TP )
              }    
  if ( ! is.null(Etable) ){  
    prior <- ( rowSums( Etable[ , seq(1,2*I,2)] ) + rowSums( Etable[ , seq(2,2*I,2)] ) )/I
            }
  prior <- prior / sum(prior)  
  return(prior)
}
 #* estimate model
mod13c <- mirt(dat, mirtmodel , technical = list(
            customTheta=Theta , customPriorFun = lca_prior) ,
            pars = mod.pars , verbose=TRUE )
  # estimated parameters from the user customized  prior distribution.
mod13c@nest <- as.integer(sum(mod.pars$est) + 2)            
  #* extract item parameters
coef13c <- mirt.wrapper.coef(mod13c)$coef
  #* inspect estimated distribution
mod13c@Theta
mod13c@Prior[[1]]

 #-* comparisons of estimated  parameters
# extract guessing and slipping parameters from din
dfr <- coef(mod13a)[ , c("guess","slip") ]
colnames(dfr) <- paste0("din.",c("guess","slip") )
# estimated parameters from gdm
dfr$gdm.guess <- plogis(mod13b$item$b)
dfr$gdm.slip <- 1 - plogis( rowSums(mod13b$item[,c("b.Cat1","a.F1","a.F2","a.F3")] ) )
# estimated parameters from mirt
dfr$mirt.guess <- plogis( coef13c$d )
dfr$mirt.slip <- 1 - plogis( rowSums(coef13c[,c("d","a1","a2","a3")]) )
# comparison
round(dfr[, c(1,3,5,2,4,6)],3)
  ##      din.guess gdm.guess mirt.guess din.slip gdm.slip mirt.slip
  ##   A1     0.691     0.684      0.686    0.000    0.000     0.000
  ##   A2     0.491     0.489      0.489    0.031    0.038     0.036
  ##   A3     0.302     0.300      0.300    0.184    0.193     0.190
  ##   A4     0.244     0.239      0.240    0.337    0.340     0.339
  ##   B1     0.568     0.579      0.577    0.163    0.148     0.151
  ##   B2     0.329     0.344      0.340    0.344    0.326     0.329
  ##   B3     0.817     0.827      0.825    0.014    0.007     0.009
  ##   B4     0.431     0.463      0.456    0.104    0.089     0.092
  ##   C1     0.188     0.191      0.189    0.013    0.013     0.013
  ##   C2     0.050     0.050      0.050    0.239    0.238     0.239
  ##   C3     0.000     0.002      0.001    0.065    0.065     0.065
  ##   C4     0.000     0.004      0.000    0.212    0.212     0.212

# estimated class sizes
dfr <- data.frame( "Theta" = Theta , "din"=mod13a$attribute.patt$class.prob ,
                 "gdm"=mod13b$pi.k , "mirt" = mod13c@Prior[[1]])
# comparison
round(dfr,3)
  ##     Theta.1 Theta.2 Theta.3   din   gdm  mirt
  ##   1       0       0       0 0.039 0.041 0.040
  ##   2       1       0       0 0.008 0.009 0.009
  ##   3       0       1       0 0.009 0.007 0.008
  ##   4       0       0       1 0.394 0.417 0.412
  ##   5       1       1       0 0.011 0.011 0.011
  ##   6       1       0       1 0.017 0.042 0.037
  ##   7       0       1       1 0.042 0.008 0.016
  ##   8       1       1       1 0.480 0.465 0.467   

#*****************************************************
# Model 14: DINA model with two skills
#*****************************************************

# define some simple Q-Matrix (does not really make in this application)
Q <- matrix( 0 , nrow=12,ncol=2)
Q[1:4,1] <- 1
Q[5:8,2] <- 1
Q[9:12,1:2] <- 1
# define discrete theta distribution with 3 dimensions
Theta <- scan(what="character",nlines=1)
  00 10 01 11
Theta <- as.numeric( unlist( lapply( Theta , strsplit , split="")   ) )
Theta <- matrix(Theta , 4 , 2 , byrow=TRUE )
Theta

#----  Model 14a: din (in CDM)
mod14a <- CDM::din( dat , q.matrix=Q , rule="DINA")
summary(mod14a)
# compare used Theta distributions
cbind( Theta , mod14a$attribute.patt.splitted)

#----  Model 14b: mirt (in mirt)
  # define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- mirt::mirt.model("
        F1 = 1-4
        F2 = 5-8
        (F1*F2) = 9-12
        ")    
#-> constructions like (F1*F2*F3) are also allowed in mirt.model
  # get parameters
mod.pars <- mirt(dat, model=mirtmodel ,  pars = "values")   
# starting values d parameters (transformed guessing parameters)
ind <- which(  mod.pars$name == "d"  )
mod.pars[ind,"value"] <- qlogis(.2)
# starting values transformed slipping parameters
ind <- which( ( mod.pars$name %in% paste0("a",1:3)  ) &  ( mod.pars$est ) )
mod.pars[ind,"value"] <- qlogis(.8) - qlogis(.2)
mod.pars
 #* use above defined prior lca_prior
 # lca_prior <- function(prior,Etable) ...
 #* estimate model
mod14b <- mirt(dat, mirtmodel , technical = list(
            customTheta=Theta , customPriorFun = lca_prior) ,
            pars = mod.pars , verbose=TRUE )
  # estimated parameters from the user customized  prior distribution.
mod14b@nest <- as.integer(sum(mod.pars$est) + 2)            
  #* extract item parameters
coef14b <- mirt.wrapper.coef(mod14b)$coef

 #-* comparisons of estimated  parameters
# extract guessing and slipping parameters from din
dfr <- coef(mod14a)[ , c("guess","slip") ]
colnames(dfr) <- paste0("din.",c("guess","slip") )
# estimated parameters from mirt
dfr$mirt.guess <- plogis( coef14b$d )
dfr$mirt.slip <- 1 - plogis( rowSums(coef14b[,c("d","a1","a2","a3")]) )
# comparison
round(dfr[, c(1,3,2,4)],3)
  ##      din.guess mirt.guess din.slip mirt.slip
  ##   A1     0.674      0.671    0.030     0.030
  ##   A2     0.423      0.420    0.049     0.050
  ##   A3     0.258      0.255    0.224     0.225
  ##   A4     0.245      0.243    0.394     0.395
  ##   B1     0.534      0.543    0.166     0.164
  ##   B2     0.338      0.347    0.382     0.380
  ##   B3     0.796      0.802    0.016     0.015
  ##   B4     0.421      0.436    0.142     0.140
  ##   C1     0.850      0.851    0.000     0.000
  ##   C2     0.480      0.480    0.097     0.097
  ##   C3     0.746      0.746    0.026     0.026
  ##   C4     0.575      0.577    0.136     0.137

# estimated class sizes
dfr <- data.frame( "Theta" = Theta , "din"=mod13a$attribute.patt$class.prob ,
                    "mirt" = mod14b@Prior[[1]])
# comparison
round(dfr,3)
  ##     Theta.1 Theta.2   din  mirt
  ##   1       0       0 0.357 0.369
  ##   2       1       0 0.044 0.049
  ##   3       0       1 0.047 0.031
  ##   4       1       1 0.553 0.551  
  
#*****************************************************
# Model 15: Rasch model with non-normal distribution
#*****************************************************

# A non-normal theta distributed is specified by log-linear smoothing
# the distribution as described in
# Xu, X., & von Davier, M. (2008). Fitting the structured general diagnostic model 
# to NAEP data. ETS Research Report ETS RR-08-27. Princeton, ETS. 

# define theta grid
theta.k <- matrix( seq(-4,4,len=15) , ncol=1 )
# define design matrix for smoothing (up to cubic moments)
delta.designmatrix <- cbind( 1 , theta.k , theta.k^2 , theta.k^3 )
# constrain item fifficulty of fifth item (item B1) to zero
b.constraint <- matrix( c(5,1,0) , ncol=3 )

#----  Model 15a: gdm (in CDM)
mod15a <- CDM::gdm( dat , irtmodel="1PL" , theta.k=theta.k , 
               b.constraint=b.constraint  )
summary(mod15a)
 # plot estimated distribution
barplot( mod15a$pi.k[,1] , space=0 , names.arg = round(theta.k[,1],2) ,
           main="Estimated Skewed Distribution (gdm function)")

#----  Model 15b: mirt (in mirt)
 # define mirt model
mirtmodel <- mirt::mirt.model("
    F = 1-12
    ")   
 # get parameters
mod.pars <- mirt(dat, model=mirtmodel , pars = "values" , itemtype="Rasch")
  # fix variance (just for correct counting of parameters)
mod.pars[ mod.pars$name=="COV_11" , "est"] <- FALSE
  # fix item difficulty
ind <- which( ( mod.pars$item == "B1" ) & ( mod.pars$name == "d" ) )
mod.pars[ ind , "value"] <- 0
mod.pars[ ind , "est"] <- FALSE

 # define prior
loglinear_prior <- function(Theta,Etable){
    TP <- nrow(Theta)
    if ( is.null(Etable) ){
    prior <- rep( 1/TP , TP )
           }
    # process Etable (this is correct for datasets without missing data)
    if ( ! is.null(Etable) ){
          # sum over correct and incorrect expected responses
       prior <- ( rowSums( Etable[ , seq(1,2*I,2)] ) + rowSums( Etable[ , seq(2,2*I,2)] ) )/I
       # smooth prior using the above design matrix and a log-linear model
       # see Xu & von Davier (2008).
       y <- log( prior + 1E-15 )
       lm1 <- lm( y ~ 0 + delta.designmatrix , weights = prior )
       prior <- exp(fitted(lm1))   # smoothed prior
           }
    prior <- prior / sum(prior)
    return(prior)
}

#* estimate model
mod15b <- mirt(dat, mirtmodel , technical = list(
                customTheta= theta.k , customPriorFun = loglinear_prior ) ,
                pars = mod.pars , verbose=TRUE )                                              
# estimated parameters from the user customized prior distribution.
mod15b@nest <- as.integer(sum(mod.pars$est) + 3)
#* extract item parameters
coef1 <- mirt.wrapper.coef(mod15b)$coef

#** compare estimated item parameters
dfr <- data.frame( "gdm"=mod15a$item$b.Cat1 , "mirt"=coef1$d )
rownames(dfr) <- colnames(dat)
round(t(dfr),4)
  ##            A1     A2      A3      A4 B1      B2     B3     B4     C1    C2     C3    C4
  ##   gdm  0.9818 0.1538 -0.7837 -1.3197  0 -1.0902 1.6088 -0.170 1.9778 0.006 1.1859 0.135
  ##   mirt 0.9829 0.1548 -0.7826 -1.3186  0 -1.0892 1.6099 -0.169 1.9790 0.007 1.1870 0.136
# compare estimated theta distribution
dfr <- data.frame( "gdm"=mod15a$pi.k , "mirt"= mod15b@Prior[[1]] )
round(t(dfr),4)
  ##        1 2     3     4      5      6      7      8      9     10     11     12     13
  ##   gdm  0 0 1e-04 9e-04 0.0056 0.0231 0.0652 0.1299 0.1881 0.2038 0.1702 0.1129 0.0612
  ##   mirt 0 0 1e-04 9e-04 0.0056 0.0232 0.0653 0.1300 0.1881 0.2038 0.1702 0.1128 0.0611
  ##            14    15
  ##   gdm  0.0279 0.011
  ##   mirt 0.0278 0.011

Run the code above in your browser using DataLab