#############################################################################
# EXAMPLE 1: PISA data mathematics
#############################################################################
data(data.pisaMath)
dat <- data.pisaMath$data
dat <- dat[ , grep("M" , colnames(dat)) ]
#***
# Model 1: Discrete GOM with 3 classes and 5 probability levels
problevels <- seq( 0 , 1 , len=5 )
mod1 <- gom.em( dat , K=3 , problevels , model="GOM" )
summary(mod1)
#***
# Model 2: Discrete GOM with 4 classes and 5 probability levels
problevels <- seq( 0 , 1 , len=5 )
mod2 <- gom.em( dat , K=4 , problevels , model="GOM" )
summary(mod2)
# model comparison
smod1 <- IRT.modelfit(mod1)
smod2 <- IRT.modelfit(mod2)
IRT.compareModels(smod1,smod2)
#***
# Model 2a: Estimate discrete GOM with 4 classes and restricted space of probability levels
# the 2nd, 4th and 6th class correspond to "intermediate stages"
problevels <- scan()
1 0 0 0
.5 .5 0 0
0 1 0 0
0 .5 .5 0
0 0 1 0
0 0 .5 .5
0 0 0 1
problevels <- matrix( problevels, ncol=4 , byrow=TRUE)
mod2a <- gom.em( dat , K=4 , problevels , model="GOM" )
# probability distribution for latent classes
cbind( mod2a$theta.k , mod2a$pi.k )
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.0 0.0 0.0 0.0 0.17214630
## [2,] 0.5 0.5 0.0 0.0 0.04965676
## [3,] 0.0 1.0 0.0 0.0 0.09336660
## [4,] 0.0 0.5 0.5 0.0 0.06555719
## [5,] 0.0 0.0 1.0 0.0 0.27523678
## [6,] 0.0 0.0 0.5 0.5 0.08458620
## [7,] 0.0 0.0 0.0 1.0 0.25945016
#***
# Model 3: Rasch GOM
mod3 <- gom.em( dat , model="GOMRasch" , maxiter=20 )
summary(mod3)
#***
# Model 4: 'Ordinary' Rasch model
mod4 <- rasch.mml2( dat )
summary(mod4)
#############################################################################
# SIMULATED EXAMPLE 2: Grade of membership model with 2 classes
#############################################################################
#********* DATASET 1 *************
# define an ordinary 2 latent class model
set.seed(8765)
I <- 10
prob.class1 <- runif( I , 0 , .35 )
prob.class2 <- runif( I , .70 , .95 )
probs <- cbind( prob.class1 , prob.class2 )
# define classes
N <- 1000
latent.class <- c( rep( 1 , 1/4*N ) , rep( 2,3/4*N ) )
# simulate item responses
dat <- matrix( NA , nrow=N , ncol=I )
for (ii in 1:I){
dat[,ii] <- probs[ ii , latent.class ]
dat[,ii] <- 1 * ( runif(N) < dat[,ii] )
}
colnames(dat) <- paste0( "I" , 1:I)
# Model 1: estimate latent class model
mod1 <- gom.em(dat, K=2, problevels= c(0,1) , model="GOM" )
summary(mod1)
# Model 2: estimate GOM
mod2 <- gom.em(dat, K=2, problevels= seq(0,1,0.5) , model="GOM" )
summary(mod2)
# estimated distribution
cbind( mod2$theta.k , mod2$pi.k )
## [,1] [,2] [,3]
## [1,] 1.0 0.0 0.243925644
## [2,] 0.5 0.5 0.006534278
## [3,] 0.0 1.0 0.749540078
#********* DATASET 2 *************
# define a 2-class model with graded membership
set.seed(8765)
I <- 10
prob.class1 <- runif( I , 0 , .35 )
prob.class2 <- runif( I , .70 , .95 )
prob.class3 <- .5*prob.class1+.5*prob.class2 # probabilities for 'fuzzy class'
probs <- cbind( prob.class1 , prob.class2 , prob.class3)
# define classes
N <- 1000
latent.class <- c( rep(1,round(1/3*N)),rep(2,round(1/2*N)),rep(3,round(1/6*N)))
# simulate item responses
dat <- matrix( NA , nrow=N , ncol=I )
for (ii in 1:I){
dat[,ii] <- probs[ ii , latent.class ]
dat[,ii] <- 1 * ( runif(N) < dat[,ii] )
}
colnames(dat) <- paste0( "I" , 1:I)
#** Model 1: estimate latent class model
mod1 <- gom.em(dat, K=2, problevels= c(0,1) , model="GOM" )
summary(mod1)
#** Model 2: estimate GOM
mod2 <- gom.em(dat, K=2, problevels= seq(0,1,0.5) , model="GOM" )
summary(mod2)
# inspect distribution
cbind( mod2$theta.k , mod2$pi.k )
## [,1] [,2] [,3]
## [1,] 1.0 0.0 0.3335666
## [2,] 0.5 0.5 0.1810114
## [3,] 0.0 1.0 0.4854220
#***
# Model2m: estimate discrete GOM in mirt
# define latent classes
Theta <- scan( nlines=1)
1 0 .5 .5 0 1
Theta <- matrix( Theta , nrow=3 , ncol=2,byrow=TRUE)
# define mirt model
I <- ncol(dat)
#*** create customized item response function for mirt model
name <- 'gom'
par <- c("a1" = -1 , "a2" = 1 )
est <- c(TRUE, TRUE)
P.gom <- function(par,Theta,ncat){
# GOM for two extremal classes
pext1 <- plogis(par[1])
pext2 <- plogis(par[2])
P1 <- Theta[,1]*pext1 + Theta[,2]*pext2
cbind(1-P1, P1)
}
# create item response function
icc_gom <- mirt::createItem(name, par=par, est=est, P=P.gom)
#** define prior for latent class analysis
lca_prior <- function(Theta,Etable){
# number of latent Theta classes
TP <- nrow(Theta)
# prior in initial iteration
if ( is.null(Etable) ){ prior <- rep( 1/TP , TP ) }
# process Etable (this is correct for datasets without missing data)
if ( ! is.null(Etable) ){
# sum over correct and incorrect expected responses
prior <- ( rowSums(Etable[ , seq(1,2*I,2)]) + rowSums(Etable[,seq(2,2*I,2)]) )/I
}
prior <- prior / sum(prior)
return(prior)
}
#*** estimate discrete GOM in mirt package
mod2m <- mirt::mirt(dat, 1, rep( "icc_gom",I) , customItems=list("icc_gom"=icc_gom),
technical = list( customTheta=Theta , customPriorFun = lca_prior) )
# correct number of estimated parameters
mod2m@nest <- as.integer(sum(mod.pars$est) + nrow(Theta)-1 )
# extract log-likelihood and compute AIC and BIC
mod2m@logLik
( AIC <- -2*mod2m@logLik+2*mod2m@nest )
( BIC <- -2*mod2m@logLik+log(mod2m@Data$N)*mod2m@nest )
# extract coefficients
( cmod2m <- mirt.wrapper.coef(mod2m) )
# compare estimated distributions
round( cbind( "sirt" = mod2$pi.k , "mirt" = mod2m@Prior[[1]] ) , 5 )
## sirt mirt
## [1,] 0.33357 0.33627
## [2,] 0.18101 0.17789
## [3,] 0.48542 0.48584
# compare estimated item parameters
dfr <- data.frame( "sirt" = mod2$item[,4:5] )
dfr$mirt <- apply(cmod2m$coef[ , c("a1" , "a2") ] , 2 , plogis )
round(dfr,4)
## sirt.lam.Cl1 sirt.lam.Cl2 mirt.a1 mirt.a2
## 1 0.1157 0.8935 0.1177 0.8934
## 2 0.0790 0.8360 0.0804 0.8360
## 3 0.0743 0.8165 0.0760 0.8164
## 4 0.0398 0.8093 0.0414 0.8094
## 5 0.1273 0.7244 0.1289 0.7243
## [...]
Run the code above in your browser using DataLab