# load LSAT section 7 data and compute 1 and 2 factor models
data <- expand.table(LSAT7)
itemstats(data)
(mod1 <- mirt(data, 1))
coef(mod1)
summary(mod1)
plot(mod1)
plot(mod1, type = 'trace')
if (FALSE) {
(mod2 <- mirt(data, 1, SE = TRUE)) #standard errors via the Oakes method
(mod2 <- mirt(data, 1, SE = TRUE, SE.type = 'SEM')) #standard errors with SEM method
coef(mod2)
(mod3 <- mirt(data, 1, SE = TRUE, SE.type = 'Richardson')) #with numerical Richardson method
residuals(mod1)
plot(mod1) #test score function
plot(mod1, type = 'trace') #trace lines
plot(mod2, type = 'info') #test information
plot(mod2, MI=200) #expected total score with 95% confidence intervals
# estimated 3PL model for item 5 only
(mod1.3PL <- mirt(data, 1, itemtype = c('2PL', '2PL', '2PL', '2PL', '3PL')))
coef(mod1.3PL)
# internally g and u pars are stored as logits, so usually a good idea to include normal prior
# to help stabilize the parameters. For a value around .182 use a mean
# of -1.5 (since 1 / (1 + exp(-(-1.5))) == .182)
model <- 'F = 1-5
PRIOR = (5, g, norm, -1.5, 3)'
mod1.3PL.norm <- mirt(data, model, itemtype = c('2PL', '2PL', '2PL', '2PL', '3PL'))
coef(mod1.3PL.norm)
#limited information fit statistics
M2(mod1.3PL.norm)
# unidimensional ideal point model
idealpt <- mirt(data, 1, itemtype = 'ideal')
plot(idealpt, type = 'trace', facet_items = TRUE)
plot(idealpt, type = 'trace', facet_items = FALSE)
# two factors (exploratory)
mod2 <- mirt(data, 2)
coef(mod2)
summary(mod2, rotate = 'oblimin') #oblimin rotation
residuals(mod2)
plot(mod2)
plot(mod2, rotate = 'oblimin')
anova(mod1, mod2) #compare the two models
scoresfull <- fscores(mod2) #factor scores for each response pattern
head(scoresfull)
scorestable <- fscores(mod2, full.scores = FALSE) #save factor score table
head(scorestable)
# confirmatory (as an example, model is not identified since you need 3 items per factor)
# Two ways to define a confirmatory model: with mirt.model, or with a string
# these model definitions are equivalent
cmodel <- mirt.model('
F1 = 1,4,5
F2 = 2,3')
cmodel2 <- 'F1 = 1,4,5
F2 = 2,3'
cmod <- mirt(data, cmodel)
# cmod <- mirt(data, cmodel2) # same as above
coef(cmod)
anova(cmod, mod2)
# check if identified by computing information matrix
(cmod <- mirt(data, cmodel, SE = TRUE))
###########
# data from the 'ltm' package in numeric format
itemstats(Science)
pmod1 <- mirt(Science, 1)
plot(pmod1)
plot(pmod1, type = 'trace')
plot(pmod1, type = 'itemscore')
summary(pmod1)
# Constrain all slopes to be equal with the constrain = list() input or mirt.model() syntax
# first obtain parameter index
values <- mirt(Science,1, pars = 'values')
values #note that slopes are numbered 1,5,9,13, or index with values$parnum[values$name == 'a1']
(pmod1_equalslopes <- mirt(Science, 1, constrain = list(c(1,5,9,13))))
coef(pmod1_equalslopes)
# using mirt.model syntax, constrain all item slopes to be equal
model <- 'F = 1-4
CONSTRAIN = (1-4, a1)'
(pmod1_equalslopes <- mirt(Science, model))
coef(pmod1_equalslopes)
coef(pmod1_equalslopes)
anova(pmod1_equalslopes, pmod1) #significantly worse fit with almost all criteria
pmod2 <- mirt(Science, 2)
summary(pmod2)
plot(pmod2, rotate = 'oblimin')
itemplot(pmod2, 1, rotate = 'oblimin')
anova(pmod1, pmod2)
# unidimensional fit with a generalized partial credit and nominal model
(gpcmod <- mirt(Science, 1, 'gpcm'))
coef(gpcmod)
# for the nominal model the lowest and highest categories are assumed to be the
# theoretically lowest and highest categories that related to the latent trait(s)
(nomod <- mirt(Science, 1, 'nominal'))
coef(nomod) #ordering of ak values suggest that the items are indeed ordinal
anova(gpcmod, nomod)
itemplot(nomod, 3)
# generalized graded unfolding model
(ggum <- mirt(Science, 1, 'ggum'))
coef(ggum, simplify=TRUE)
plot(ggum)
plot(ggum, type = 'trace')
plot(ggum, type = 'itemscore')
# monotonic polyomial models
(monopoly <- mirt(Science, 1, 'monopoly'))
coef(monopoly, simplify=TRUE)
plot(monopoly)
plot(monopoly, type = 'trace')
plot(monopoly, type = 'itemscore')
# unipolar IRT model
unimod <- mirt(Science, itemtype = 'ULL')
coef(unimod, simplify=TRUE)
plot(unimod)
plot(unimod, type = 'trace')
itemplot(unimod, 1)
# following use the correct log-normal density for latent trait
itemfit(unimod)
M2(unimod, type = 'C2')
fs <- fscores(unimod)
hist(fs, 20)
fscores(unimod, method = 'EAPsum', full.scores = FALSE)
## example applying survey weights.
# weight the first half of the cases to be more representative of population
survey.weights <- c(rep(2, nrow(Science)/2), rep(1, nrow(Science)/2))
survey.weights <- survey.weights/sum(survey.weights) * nrow(Science)
unweighted <- mirt(Science, 1)
weighted <- mirt(Science, 1, survey.weights=survey.weights)
###########
# empirical dimensionality testing that includes 'guessing'
data(SAT12)
data <- key2binary(SAT12,
key = c(1,4,5,2,3,1,2,1,3,1,2,4,2,1,5,3,4,4,1,4,3,3,4,1,3,5,1,3,1,5,4,5))
itemstats(data)
mod1 <- mirt(data, 1)
extract.mirt(mod1, 'time') #time elapsed for each estimation component
# optionally use Newton-Raphson for (generally) faster convergence in the M-step's
mod1 <- mirt(data, 1, optimizer = 'NR')
extract.mirt(mod1, 'time')
mod2 <- mirt(data, 2, optimizer = 'NR')
# difficulty converging with reduced quadpts, reduce TOL
mod3 <- mirt(data, 3, TOL = .001, optimizer = 'NR')
anova(mod1,mod2)
anova(mod2, mod3) #negative AIC, 2 factors probably best
# same as above, but using the QMCEM method for generally better accuracy in mod3
mod3 <- mirt(data, 3, method = 'QMCEM', TOL = .001, optimizer = 'NR')
anova(mod2, mod3)
# with fixed guessing parameters
mod1g <- mirt(data, 1, guess = .1)
coef(mod1g)
###########
# graded rating scale example
# make some data
set.seed(1234)
a <- matrix(rep(1, 10))
d <- matrix(c(1,0.5,-.5,-1), 10, 4, byrow = TRUE)
c <- seq(-1, 1, length.out=10)
data <- simdata(a, d + c, 2000, itemtype = rep('graded',10))
itemstats(data)
mod1 <- mirt(data, 1)
mod2 <- mirt(data, 1, itemtype = 'grsm')
coef(mod2)
anova(mod2, mod1) #not sig, mod2 should be preferred
itemplot(mod2, 1)
itemplot(mod2, 5)
itemplot(mod2, 10)
###########
# 2PL nominal response model example (Suh and Bolt, 2010)
data(SAT12)
SAT12[SAT12 == 8] <- NA #set 8 as a missing value
head(SAT12)
# correct answer key
key <- c(1,4,5,2,3,1,2,1,3,1,2,4,2,1,5,3,4,4,1,4,3,3,4,1,3,5,1,3,1,5,4,5)
scoredSAT12 <- key2binary(SAT12, key)
mod0 <- mirt(scoredSAT12, 1)
# for first 5 items use 2PLNRM and nominal
scoredSAT12[,1:5] <- as.matrix(SAT12[,1:5])
mod1 <- mirt(scoredSAT12, 1, c(rep('nominal',5),rep('2PL', 27)))
mod2 <- mirt(scoredSAT12, 1, c(rep('2PLNRM',5),rep('2PL', 27)), key=key)
coef(mod0)$Item.1
coef(mod1)$Item.1
coef(mod2)$Item.1
itemplot(mod0, 1)
itemplot(mod1, 1)
itemplot(mod2, 1)
# compare added information from distractors
Theta <- matrix(seq(-4,4,.01))
par(mfrow = c(2,3))
for(i in 1:5){
info <- iteminfo(extract.item(mod0,i), Theta)
info2 <- iteminfo(extract.item(mod2,i), Theta)
plot(Theta, info2, type = 'l', main = paste('Information for item', i), ylab = 'Information')
lines(Theta, info, col = 'red')
}
par(mfrow = c(1,1))
# test information
plot(Theta, testinfo(mod2, Theta), type = 'l', main = 'Test information', ylab = 'Information')
lines(Theta, testinfo(mod0, Theta), col = 'red')
###########
# using the MH-RM algorithm
data(LSAT7)
fulldata <- expand.table(LSAT7)
(mod1 <- mirt(fulldata, 1, method = 'MHRM'))
# Confirmatory models
# simulate data
a <- matrix(c(
1.5,NA,
0.5,NA,
1.0,NA,
1.0,0.5,
NA,1.5,
NA,0.5,
NA,1.0,
NA,1.0),ncol=2,byrow=TRUE)
d <- matrix(c(
-1.0,NA,NA,
-1.5,NA,NA,
1.5,NA,NA,
0.0,NA,NA,
3.0,2.0,-0.5,
2.5,1.0,-1,
2.0,0.0,NA,
1.0,NA,NA),ncol=3,byrow=TRUE)
sigma <- diag(2)
sigma[1,2] <- sigma[2,1] <- .4
items <- c(rep('2PL',4), rep('graded',3), '2PL')
dataset <- simdata(a,d,2000,items,sigma)
# analyses
# CIFA for 2 factor crossed structure
model.1 <- '
F1 = 1-4
F2 = 4-8
COV = F1*F2'
# compute model, and use parallel computation of the log-likelihood
if(interactive()) mirtCluster()
mod1 <- mirt(dataset, model.1, method = 'MHRM')
coef(mod1)
summary(mod1)
residuals(mod1)
#####
# bifactor
model.3 <- '
G = 1-8
F1 = 1-4
F2 = 5-8'
mod3 <- mirt(dataset,model.3, method = 'MHRM')
coef(mod3)
summary(mod3)
residuals(mod3)
anova(mod1,mod3)
#####
# polynomial/combinations
data(SAT12)
data <- key2binary(SAT12,
key = c(1,4,5,2,3,1,2,1,3,1,2,4,2,1,5,3,4,4,1,4,3,3,4,1,3,5,1,3,1,5,4,5))
model.quad <- '
F1 = 1-32
(F1*F1) = 1-32'
model.combo <- '
F1 = 1-16
F2 = 17-32
(F1*F2) = 1-8'
(mod.quad <- mirt(data, model.quad))
summary(mod.quad)
(mod.combo <- mirt(data, model.combo))
anova(mod.combo, mod.quad)
# non-linear item and test plots
plot(mod.quad)
plot(mod.combo, type = 'SE')
itemplot(mod.quad, 1, type = 'score')
itemplot(mod.combo, 2, type = 'score')
itemplot(mod.combo, 2, type = 'infocontour')
## empirical histogram examples (normal, skew and bimodality)
# make some data
set.seed(1234)
a <- matrix(rlnorm(50, .2, .2))
d <- matrix(rnorm(50))
ThetaNormal <- matrix(rnorm(2000))
ThetaBimodal <- scale(matrix(c(rnorm(1000, -2), rnorm(1000,2)))) #bimodal
ThetaSkew <- scale(matrix(rchisq(2000, 3))) #positive skew
datNormal <- simdata(a, d, 2000, itemtype = '2PL', Theta=ThetaNormal)
datBimodal <- simdata(a, d, 2000, itemtype = '2PL', Theta=ThetaBimodal)
datSkew <- simdata(a, d, 2000, itemtype = '2PL', Theta=ThetaSkew)
normal <- mirt(datNormal, 1, dentype = "empiricalhist")
plot(normal, type = 'empiricalhist')
histogram(ThetaNormal, breaks=30)
bimodal <- mirt(datBimodal, 1, dentype = "empiricalhist")
plot(bimodal, type = 'empiricalhist')
histogram(ThetaBimodal, breaks=30)
skew <- mirt(datSkew, 1, dentype = "empiricalhist")
plot(skew, type = 'empiricalhist')
histogram(ThetaSkew, breaks=30)
#####
# non-linear parameter constraints with Rsolnp package (nloptr supported as well):
# Find Rasch model subject to the constraint that the intercepts sum to 0
dat <- expand.table(LSAT6)
itemstats(dat)
# free latent mean and variance terms
model <- 'Theta = 1-5
MEAN = Theta
COV = Theta*Theta'
# view how vector of parameters is organized internally
sv <- mirt(dat, model, itemtype = 'Rasch', pars = 'values')
sv[sv$est, ]
# constraint: create function for solnp to compute constraint, and declare value in eqB
eqfun <- function(p, optim_args) sum(p[1:5]) #could use browser() here, if it helps
LB <- c(rep(-15, 6), 1e-4) # more reasonable lower bound for variance term
mod <- mirt(dat, model, sv=sv, itemtype = 'Rasch', optimizer = 'solnp',
solnp_args=list(eqfun=eqfun, eqB=0, LB=LB))
print(mod)
coef(mod)
(ds <- sapply(coef(mod)[1:5], function(x) x[,'d']))
sum(ds)
# same likelihood location as: mirt(dat, 1, itemtype = 'Rasch')
#######
# latent regression Rasch model
# simulate data
set.seed(1234)
N <- 1000
# covariates
X1 <- rnorm(N); X2 <- rnorm(N)
covdata <- data.frame(X1, X2, X3 = rnorm(N))
Theta <- matrix(0.5 * X1 + -1 * X2 + rnorm(N, sd = 0.5))
# items and response data
a <- matrix(1, 20); d <- matrix(rnorm(20))
dat <- simdata(a, d, 1000, itemtype = '2PL', Theta=Theta)
# unconditional Rasch model
mod0 <- mirt(dat, 1, 'Rasch', SE=TRUE)
coef(mod0, printSE=TRUE)
# conditional model using X1, X2, and X3 (bad) as predictors of Theta
mod1 <- mirt(dat, 1, 'Rasch', covdata=covdata, formula = ~ X1 + X2 + X3, SE=TRUE)
coef(mod1, printSE=TRUE)
coef(mod1, simplify=TRUE)
anova(mod0, mod1) # jointly significant predictors of theta
# large sample z-ratios and p-values (if one cares)
cfs <- coef(mod1, printSE=TRUE)
(z <- cfs$lr.betas[[1]] / cfs$lr.betas[[2]])
round(pnorm(abs(z[,1]), lower.tail=FALSE)*2, 3)
# drop predictor for nested comparison
mod1b <- mirt(dat, 1, 'Rasch', covdata=covdata, formula = ~ X1 + X2)
anova(mod1b, mod1)
# compare to mixedmirt() version of the same model
mod1.mixed <- mixedmirt(dat, 1, itemtype='Rasch',
covdata=covdata, lr.fixed = ~ X1 + X2 + X3, SE=TRUE)
coef(mod1.mixed)
coef(mod1.mixed, printSE=TRUE)
# draw plausible values for secondary analyses
pv <- fscores(mod1, plausible.draws = 10)
pvmods <- lapply(pv, function(x, covdata) lm(x ~ covdata$X1 + covdata$X2),
covdata=covdata)
# population characteristics recovered well, and can be averaged over
so <- lapply(pvmods, summary)
so
# compute Rubin's multiple imputation average
par <- lapply(so, function(x) x$coefficients[, 'Estimate'])
SEpar <- lapply(so, function(x) x$coefficients[, 'Std. Error'])
averageMI(par, SEpar)
############
# Example using Gauss-Hermite quadrature with custom input functions
library(fastGHQuad)
data(SAT12)
data <- key2binary(SAT12,
key = c(1,4,5,2,3,1,2,1,3,1,2,4,2,1,5,3,4,4,1,4,3,3,4,1,3,5,1,3,1,5,4,5))
GH <- gaussHermiteData(50)
Theta <- matrix(GH$x)
# This prior works for uni- and multi-dimensional models
prior <- function(Theta, Etable){
P <- grid <- GH$w / sqrt(pi)
if(ncol(Theta) > 1)
for(i in 2:ncol(Theta))
P <- expand.grid(P, grid)
if(!is.vector(P)) P <- apply(P, 1, prod)
P
}
GHmod1 <- mirt(data, 1, optimizer = 'NR',
technical = list(customTheta = Theta, customPriorFun = prior))
coef(GHmod1, simplify=TRUE)
Theta2 <- as.matrix(expand.grid(Theta, Theta))
GHmod2 <- mirt(data, 2, optimizer = 'NR', TOL = .0002,
technical = list(customTheta = Theta2, customPriorFun = prior))
summary(GHmod2, suppress=.2)
############
# Davidian curve example
dat <- key2binary(SAT12,
key = c(1,4,5,2,3,1,2,1,3,1,2,4,2,1,5,3,4,4,1,4,3,3,4,1,3,5,1,3,1,5,4,5))
dav <- mirt(dat, 1, dentype = 'Davidian-4') # use four smoothing parameters
plot(dav, type = 'Davidian') # shape of latent trait distribution
coef(dav, simplify=TRUE)
fs <- fscores(dav) # assume normal prior
fs2 <- fscores(dav, use_dentype_estimate=TRUE) # use Davidian estimated prior shape
head(cbind(fs, fs2))
itemfit(dav) # assume normal prior
itemfit(dav, use_dentype_estimate=TRUE) # use Davidian estimated prior shape
###########
# 5PL and restricted 5PL example
dat <- expand.table(LSAT7)
mod2PL <- mirt(dat)
mod2PL
# Following does not converge without including strong priors
# mod5PL <- mirt(dat, itemtype = '5PL')
# mod5PL
# restricted version of 5PL (asymmetric 2PL)
model <- 'Theta = 1-5
FIXED = (1-5, g), (1-5, u)'
mod2PL_asym <- mirt(dat, model=model, itemtype = '5PL')
mod2PL_asym
coef(mod2PL_asym, simplify=TRUE)
coef(mod2PL_asym, simplify=TRUE, IRTpars=TRUE)
# no big difference statistically or visually
anova(mod2PL, mod2PL_asym)
plot(mod2PL, type = 'trace')
plot(mod2PL_asym, type = 'trace')
###################
# LLTM example
a <- matrix(rep(1,30))
d <- rep(c(1,0, -1),each = 10) # first easy, then medium, last difficult
dat <- simdata(a, d, 1000, itemtype = '2PL')
# unconditional model for intercept comparisons
mod <- mirt(dat, itemtype = 'Rasch')
coef(mod, simplify=TRUE)
# Suppose that the first 10 items were suspected to be easy, followed by 10 medium difficulty items,
# then finally the last 10 items are difficult,
# and we wish to test this item structure hypothesis (more intercept designs are possible
# by including more columns).
itemdesign <- data.frame(difficulty =
factor(c(rep('easy', 10), rep('medium', 10), rep('hard', 10))))
rownames(itemdesign) <- colnames(dat)
itemdesign
# LLTM with mirt()
lltm <- mirt(dat, itemtype = 'Rasch', SE=TRUE,
item.formula = ~ 0 + difficulty, itemdesign=itemdesign)
coef(lltm, simplify=TRUE)
coef(lltm, printSE=TRUE)
anova(lltm, mod) # models fit effectively the same; hence, intercept variability well captured
# additional information for LLTM
plot(lltm)
plot(lltm, type = 'trace')
itemplot(lltm, item=1)
itemfit(lltm)
head(fscores(lltm)) #EAP estimates
fscores(lltm, method='EAPsum', full.scores=FALSE)
M2(lltm) # goodness of fit
head(personfit(lltm))
residuals(lltm)
# intercept across items also possible by removing ~ 0 portion, just interpreted differently
lltm.int <- mirt(dat, itemtype = 'Rasch',
item.formula = ~ difficulty, itemdesign=itemdesign)
anova(lltm, lltm.int) # same
coef(lltm.int, simplify=TRUE)
# using unconditional modeling for first four items
itemdesign.sub <- itemdesign[5:nrow(itemdesign), , drop=FALSE]
itemdesign.sub # note that rownames are required in this case
lltm.4 <- mirt(dat, itemtype = 'Rasch',
item.formula = ~ 0 + difficulty, itemdesign=itemdesign.sub)
coef(lltm.4, simplify=TRUE) # first four items are the standard Rasch
anova(lltm, lltm.4) # similar fit, hence more constrained model preferred
# LLTM with mixedmirt() (more flexible in general, but slower)
LLTM <- mixedmirt(dat, model=1, fixed = ~ 0 + difficulty,
itemdesign=itemdesign, SE=FALSE)
summary(LLTM)
coef(LLTM)
# LLTM with random error estimate (not supported with mirt() )
LLTM.e <- mixedmirt(dat, model=1, fixed = ~ 0 + difficulty,
random = ~ 1|items, itemdesign=itemdesign, SE=FALSE)
coef(LLTM.e)
###################
# General MLTM example (Embretson, 1984)
set.seed(42)
as <- matrix(rep(1,60), ncol=2)
as[11:18,1] <- as[1:9,2] <- 0
d1 <- rep(c(3,1),each = 6) # first easy, then medium, last difficult for first trait
d2 <- rep(c(0,1,2),times = 4) # difficult to easy
d <- rnorm(18)
ds <- rbind(cbind(d1=NA, d2=d), cbind(d1, d2))
(pars <- data.frame(a=as, d=ds))
dat <- simdata(as, ds, 2500,
itemtype = c(rep('dich', 18), rep('partcomp', 12)))
itemstats(dat)
# unconditional model
syntax <- "theta1 = 1-9, 19-30
theta2 = 10-30
COV = theta1*theta2"
itemtype <- c(rep('Rasch', 18), rep('PC1PL', 12))
mod <- mirt(dat, syntax, itemtype=itemtype)
coef(mod, simplify=TRUE)
data.frame(est=coef(mod, simplify=TRUE)$items, pop=data.frame(a=as, d=ds))
itemplot(mod, 1)
itemplot(mod, 30)
# MLTM design only for PC1PL items
itemdesign <- data.frame(t1_difficulty= factor(d1, labels=c('medium', 'easy')),
t2_difficulty=factor(d2, labels=c('hard', 'medium', 'easy')))
rownames(itemdesign) <- colnames(dat)[19:30]
itemdesign
# fit MLTM design, leaving first 18 items as 'Rasch' type
mltm <- mirt(dat, syntax, itemtype=itemtype, itemdesign=itemdesign,
item.formula = list(theta1 ~ 0 + t1_difficulty,
theta2 ~ 0 + t2_difficulty), SE=TRUE)
coef(mltm, simplify=TRUE)
coef(mltm, printSE=TRUE)
anova(mltm, mod) # similar fit; hence more constrained version preferred
M2(mltm) # goodness of fit
head(personfit(mltm))
residuals(mltm)
# EAP estimates
fscores(mltm) |> head()
}
Run the code above in your browser using DataLab