# NOT RUN {
#############################################################################
# EXAMPLE 1: PISA data mathematics
#############################################################################
data(data.pisaMath)
dat <- data.pisaMath$data
dat <- dat[, grep("M", colnames(dat)) ]
#***
# Model 1: Discrete GOM with 3 classes and 5 probability levels
problevels <- seq( 0, 1, len=5 )
mod1 <- sirt::gom.em( dat, K=3, problevels, model="GOM" )
summary(mod1)
# }
# NOT RUN {
#***
# Model 2: Discrete GOM with 4 classes and 5 probability levels
problevels <- seq( 0, 1, len=5 )
mod2 <- sirt::gom.em( dat, K=4, problevels, model="GOM" )
summary(mod2)
# model comparison
smod1 <- IRT.modelfit(mod1)
smod2 <- IRT.modelfit(mod2)
IRT.compareModels(smod1,smod2)
#***
# Model 2a: Estimate discrete GOM with 4 classes and restricted space of probability levels
# the 2nd, 4th and 6th class correspond to "intermediate stages"
problevels <- scan()
1 0 0 0
.5 .5 0 0
0 1 0 0
0 .5 .5 0
0 0 1 0
0 0 .5 .5
0 0 0 1
problevels <- matrix( problevels, ncol=4, byrow=TRUE)
mod2a <- sirt::gom.em( dat, K=4, problevels, model="GOM" )
# probability distribution for latent classes
cbind( mod2a$theta.k, mod2a$pi.k )
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.0 0.0 0.0 0.0 0.17214630
## [2,] 0.5 0.5 0.0 0.0 0.04965676
## [3,] 0.0 1.0 0.0 0.0 0.09336660
## [4,] 0.0 0.5 0.5 0.0 0.06555719
## [5,] 0.0 0.0 1.0 0.0 0.27523678
## [6,] 0.0 0.0 0.5 0.5 0.08458620
## [7,] 0.0 0.0 0.0 1.0 0.25945016
# }
# NOT RUN {
#***
# Model 3: Rasch GOM
mod3 <- sirt::gom.em( dat, model="GOMRasch", maxiter=20 )
summary(mod3)
#***
# Model 4: 'Ordinary' Rasch model
mod4 <- sirt::rasch.mml2( dat )
summary(mod4)
# }
# NOT RUN {
#############################################################################
# EXAMPLE 2: Grade of membership model with 2 classes
#############################################################################
#********* DATASET 1 *************
# define an ordinary 2 latent class model
set.seed(8765)
I <- 10
prob.class1 <- stats::runif( I, 0, .35 )
prob.class2 <- stats::runif( I, .70, .95 )
probs <- cbind( prob.class1, prob.class2 )
# define classes
N <- 1000
latent.class <- c( rep( 1, 1/4*N ), rep( 2,3/4*N ) )
# simulate item responses
dat <- matrix( NA, nrow=N, ncol=I )
for (ii in 1:I){
dat[,ii] <- probs[ ii, latent.class ]
dat[,ii] <- 1 * ( stats::runif(N) < dat[,ii] )
}
colnames(dat) <- paste0( "I", 1:I)
# Model 1: estimate latent class model
mod1 <- sirt::gom.em(dat, K=2, problevels=c(0,1), model="GOM" )
summary(mod1)
# Model 2: estimate GOM
mod2 <- sirt::gom.em(dat, K=2, problevels=seq(0,1,0.5), model="GOM" )
summary(mod2)
# estimated distribution
cbind( mod2$theta.k, mod2$pi.k )
## [,1] [,2] [,3]
## [1,] 1.0 0.0 0.243925644
## [2,] 0.5 0.5 0.006534278
## [3,] 0.0 1.0 0.749540078
#********* DATASET 2 *************
# define a 2-class model with graded membership
set.seed(8765)
I <- 10
prob.class1 <- stats::runif( I, 0, .35 )
prob.class2 <- stats::runif( I, .70, .95 )
prob.class3 <- .5*prob.class1+.5*prob.class2 # probabilities for 'fuzzy class'
probs <- cbind( prob.class1, prob.class2, prob.class3)
# define classes
N <- 1000
latent.class <- c( rep(1,round(1/3*N)),rep(2,round(1/2*N)),rep(3,round(1/6*N)))
# simulate item responses
dat <- matrix( NA, nrow=N, ncol=I )
for (ii in 1:I){
dat[,ii] <- probs[ ii, latent.class ]
dat[,ii] <- 1 * ( stats::runif(N) < dat[,ii] )
}
colnames(dat) <- paste0( "I", 1:I)
#** Model 1: estimate latent class model
mod1 <- sirt::gom.em(dat, K=2, problevels=c(0,1), model="GOM" )
summary(mod1)
#** Model 2: estimate GOM
mod2 <- sirt::gom.em(dat, K=2, problevels=seq(0,1,0.5), model="GOM" )
summary(mod2)
# inspect distribution
cbind( mod2$theta.k, mod2$pi.k )
## [,1] [,2] [,3]
## [1,] 1.0 0.0 0.3335666
## [2,] 0.5 0.5 0.1810114
## [3,] 0.0 1.0 0.4854220
#***
# Model2m: estimate discrete GOM in mirt
# define latent classes
Theta <- scan( nlines=1)
1 0 .5 .5 0 1
Theta <- matrix( Theta, nrow=3, ncol=2,byrow=TRUE)
# define mirt model
I <- ncol(dat)
#*** create customized item response function for mirt model
name <- 'gom'
par <- c("a1"=-1, "a2"=1 )
est <- c(TRUE, TRUE)
P.gom <- function(par,Theta,ncat){
# GOM for two extremal classes
pext1 <- stats::plogis(par[1])
pext2 <- stats::plogis(par[2])
P1 <- Theta[,1]*pext1 + Theta[,2]*pext2
cbind(1-P1, P1)
}
# create item response function
icc_gom <- mirt::createItem(name, par=par, est=est, P=P.gom)
#** define prior for latent class analysis
lca_prior <- function(Theta,Etable){
# number of latent Theta classes
TP <- nrow(Theta)
# prior in initial iteration
if ( is.null(Etable) ){ prior <- rep( 1/TP, TP ) }
# process Etable (this is correct for datasets without missing data)
if ( ! is.null(Etable) ){
# sum over correct and incorrect expected responses
prior <- ( rowSums(Etable[, seq(1,2*I,2)]) + rowSums(Etable[,seq(2,2*I,2)]) )/I
}
prior <- prior / sum(prior)
return(prior)
}
#*** estimate discrete GOM in mirt package
mod2m <- mirt::mirt(dat, 1, rep( "icc_gom",I), customItems=list("icc_gom"=icc_gom),
technical=list( customTheta=Theta, customPriorFun=lca_prior) )
# correct number of estimated parameters
mod2m@nest <- as.integer(sum(mod.pars$est) + nrow(Theta)-1 )
# extract log-likelihood and compute AIC and BIC
mod2m@logLik
( AIC <- -2*mod2m@logLik+2*mod2m@nest )
( BIC <- -2*mod2m@logLik+log(mod2m@Data$N)*mod2m@nest )
# extract coefficients
( cmod2m <- sirt::mirt.wrapper.coef(mod2m) )
# compare estimated distributions
round( cbind( "sirt"=mod2$pi.k, "mirt"=mod2m@Prior[[1]] ), 5 )
## sirt mirt
## [1,] 0.33357 0.33627
## [2,] 0.18101 0.17789
## [3,] 0.48542 0.48584
# compare estimated item parameters
dfr <- data.frame( "sirt"=mod2$item[,4:5] )
dfr$mirt <- apply(cmod2m$coef[, c("a1", "a2") ], 2, stats::plogis )
round(dfr,4)
## sirt.lam.Cl1 sirt.lam.Cl2 mirt.a1 mirt.a2
## 1 0.1157 0.8935 0.1177 0.8934
## 2 0.0790 0.8360 0.0804 0.8360
## 3 0.0743 0.8165 0.0760 0.8164
## 4 0.0398 0.8093 0.0414 0.8094
## 5 0.1273 0.7244 0.1289 0.7243
## [...]
# }
Run the code above in your browser using DataLab