if (FALSE) {
# single factor
set.seed(12345)
a <- matrix(abs(rnorm(15,1,.3)), ncol=1)
d <- matrix(rnorm(15,0,.7),ncol=1)
itemtype <- rep('2PL', nrow(a))
N <- 1000
dataset1 <- simdata(a, d, N, itemtype)
dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5))
dat <- rbind(dataset1, dataset2)
group <- c(rep('D1', N), rep('D2', N))
# marginal information
itemstats(dat)
# conditional information
itemstats(dat, group=group)
mod_configural <- multipleGroup(dat, 1, group = group) #completely separate analyses
# limited information fit statistics
M2(mod_configural)
mod_metric <- multipleGroup(dat, 1, group = group, invariance=c('slopes')) #equal slopes
# equal intercepts, free variance and means
mod_scalar2 <- multipleGroup(dat, 1, group = group,
invariance=c('slopes', 'intercepts', 'free_var','free_means'))
mod_scalar1 <- multipleGroup(dat, 1, group = group, #fixed means
invariance=c('slopes', 'intercepts', 'free_var'))
mod_fullconstrain <- multipleGroup(dat, 1, group = group,
invariance=c('slopes', 'intercepts'))
extract.mirt(mod_fullconstrain, 'time') #time of estimation components
# optionally use Newton-Raphson for (generally) faster convergence in the M-step's
mod_fullconstrain <- multipleGroup(dat, 1, group = group, optimizer = 'NR',
invariance=c('slopes', 'intercepts'))
extract.mirt(mod_fullconstrain, 'time') #time of estimation components
summary(mod_scalar2)
coef(mod_scalar2, simplify=TRUE)
residuals(mod_scalar2)
plot(mod_configural)
plot(mod_configural, type = 'info')
plot(mod_configural, type = 'trace')
plot(mod_configural, type = 'trace', which.items = 1:4)
itemplot(mod_configural, 2)
itemplot(mod_configural, 2, type = 'RE')
anova(mod_metric, mod_configural) #equal slopes only
anova(mod_scalar2, mod_metric) #equal intercepts, free variance and mean
anova(mod_scalar1, mod_scalar2) #fix mean
anova(mod_fullconstrain, mod_scalar1) #fix variance
# compared all at once (in order of most constrained to least)
anova(mod_fullconstrain, mod_scalar2, mod_configural)
# test whether first 6 slopes should be equal across groups
values <- multipleGroup(dat, 1, group = group, pars = 'values')
values
constrain <- list(c(1, 63), c(5,67), c(9,71), c(13,75), c(17,79), c(21,83))
equalslopes <- multipleGroup(dat, 1, group = group, constrain = constrain)
anova(equalslopes, mod_configural)
# same as above, but using mirt.model syntax
newmodel <- '
F = 1-15
CONSTRAINB = (1-6, a1)'
equalslopes <- multipleGroup(dat, newmodel, group = group)
coef(equalslopes, simplify=TRUE)
############
# vertical scaling (i.e., equating when groups answer items others do not)
dat2 <- dat
dat2[group == 'D1', 1:2] <- dat2[group != 'D1', 14:15] <- NA
head(dat2)
tail(dat2)
# items with missing responses need to be constrained across groups for identification
nms <- colnames(dat2)
mod <- multipleGroup(dat2, 1, group, invariance = nms[c(1:2, 14:15)])
# this will throw an error without proper constraints (SEs cannot be computed either)
# mod <- multipleGroup(dat2, 1, group)
# model still does not have anchors, therefore need to add a few (here use items 3-5)
mod_anchor <- multipleGroup(dat2, 1, group,
invariance = c(nms[c(1:5, 14:15)], 'free_means', 'free_var'))
coef(mod_anchor, simplify=TRUE)
# check if identified by computing information matrix
mod_anchor <- multipleGroup(dat2, 1, group, pars = mod2values(mod_anchor), TOL=NaN, SE=TRUE,
invariance = c(nms[c(1:5, 14:15)], 'free_means', 'free_var'))
mod_anchor
coef(mod_anchor)
coef(mod_anchor, printSE=TRUE)
#############
# DIF test for each item (using all other items as anchors)
itemnames <- colnames(dat)
refmodel <- multipleGroup(dat, 1, group = group, SE=TRUE,
invariance=c('free_means', 'free_var', itemnames))
# loop over items (in practice, run in parallel to increase speed). May be better to use ?DIF
estmodels <- vector('list', ncol(dat))
for(i in 1:ncol(dat))
estmodels[[i]] <- multipleGroup(dat, 1, group = group, verbose = FALSE,
invariance=c('free_means', 'free_var', itemnames[-i]))
anova(refmodel, estmodels[[1]])
(anovas <- lapply(estmodels, function(x, refmodel) anova(refmodel, x),
refmodel=refmodel))
# family-wise error control
p <- do.call(rbind, lapply(anovas, function(x) x[2, 'p']))
p.adjust(p, method = 'BH')
# same as above, except only test if slopes vary (1 df)
# constrain all intercepts
estmodels <- vector('list', ncol(dat))
for(i in 1:ncol(dat))
estmodels[[i]] <- multipleGroup(dat, 1, group = group, verbose = FALSE,
invariance=c('free_means', 'free_var', 'intercepts',
itemnames[-i]))
(anovas <- lapply(estmodels, function(x, refmodel) anova(refmodel, x),
refmodel=refmodel))
# quickly test with Wald test using DIF()
mod_configural2 <- multipleGroup(dat, 1, group = group, SE=TRUE)
DIF(mod_configural2, which.par = c('a1', 'd'), Wald=TRUE, p.adjust = 'fdr')
#############
# Three group model where the latent variable parameters are constrained to
# be equal in the focal groups
set.seed(12345)
a <- matrix(abs(rnorm(15,1,.3)), ncol=1)
d <- matrix(rnorm(15,0,.7),ncol=1)
itemtype <- rep('2PL', nrow(a))
N <- 1000
dataset1 <- simdata(a, d, N, itemtype)
dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5))
dataset3 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5))
dat <- rbind(dataset1, dataset2, dataset3)
group <- rep(c('D1', 'D2', 'D3'), each=N)
# marginal information
itemstats(dat)
# conditional information
itemstats(dat, group=group)
model <- 'F1 = 1-15
FREE[D2, D3] = (GROUP, MEAN_1), (GROUP, COV_11)
CONSTRAINB[D2,D3] = (GROUP, MEAN_1), (GROUP, COV_11)'
mod <- multipleGroup(dat, model, group = group, invariance = colnames(dat))
coef(mod, simplify=TRUE)
#############
# multiple factors
a <- matrix(c(abs(rnorm(5,1,.3)), rep(0,15),abs(rnorm(5,1,.3)),
rep(0,15),abs(rnorm(5,1,.3))), 15, 3)
d <- matrix(rnorm(15,0,.7),ncol=1)
mu <- c(-.4, -.7, .1)
sigma <- matrix(c(1.21,.297,1.232,.297,.81,.252,1.232,.252,1.96),3,3)
itemtype <- rep('2PL', nrow(a))
N <- 1000
dataset1 <- simdata(a, d, N, itemtype)
dataset2 <- simdata(a, d, N, itemtype, mu = mu, sigma = sigma)
dat <- rbind(dataset1, dataset2)
group <- c(rep('D1', N), rep('D2', N))
# group models
model <- '
F1 = 1-5
F2 = 6-10
F3 = 11-15'
# define mirt cluster to use parallel architecture
if(interactive()) mirtCluster()
# EM approach (not as accurate with 3 factors, but generally good for quick model comparisons)
mod_configural <- multipleGroup(dat, model, group = group) #completely separate analyses
mod_metric <- multipleGroup(dat, model, group = group, invariance=c('slopes')) #equal slopes
mod_fullconstrain <- multipleGroup(dat, model, group = group, #equal means, slopes, intercepts
invariance=c('slopes', 'intercepts'))
anova(mod_metric, mod_configural)
anova(mod_fullconstrain, mod_metric)
# same as above, but with MHRM (generally more accurate with 3+ factors, but slower)
mod_configural <- multipleGroup(dat, model, group = group, method = 'MHRM')
mod_metric <- multipleGroup(dat, model, group = group, invariance=c('slopes'), method = 'MHRM')
mod_fullconstrain <- multipleGroup(dat, model, group = group, method = 'MHRM',
invariance=c('slopes', 'intercepts'))
anova(mod_metric, mod_configural)
anova(mod_fullconstrain, mod_metric)
############
# polytomous item example
set.seed(12345)
a <- matrix(abs(rnorm(15,1,.3)), ncol=1)
d <- matrix(rnorm(15,0,.7),ncol=1)
d <- cbind(d, d-1, d-2)
itemtype <- rep('graded', nrow(a))
N <- 1000
dataset1 <- simdata(a, d, N, itemtype)
dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5))
dat <- rbind(dataset1, dataset2)
group <- c(rep('D1', N), rep('D2', N))
model <- 'F1 = 1-15'
mod_configural <- multipleGroup(dat, model, group = group)
plot(mod_configural)
plot(mod_configural, type = 'SE')
itemplot(mod_configural, 1)
itemplot(mod_configural, 1, type = 'info')
plot(mod_configural, type = 'trace') # messy, score function typically better
plot(mod_configural, type = 'itemscore')
fs <- fscores(mod_configural, full.scores = FALSE)
head(fs[["D1"]])
fscores(mod_configural, method = 'EAPsum', full.scores = FALSE)
# constrain slopes within each group to be equal (but not across groups)
model2 <- 'F1 = 1-15
CONSTRAIN = (1-15, a1)'
mod_configural2 <- multipleGroup(dat, model2, group = group)
plot(mod_configural2, type = 'SE')
plot(mod_configural2, type = 'RE')
itemplot(mod_configural2, 10)
############
## empirical histogram example (normal and bimodal groups)
set.seed(1234)
a <- matrix(rlnorm(50, .2, .2))
d <- matrix(rnorm(50))
ThetaNormal <- matrix(rnorm(2000))
ThetaBimodal <- scale(matrix(c(rnorm(1000, -2), rnorm(1000,2)))) #bimodal
Theta <- rbind(ThetaNormal, ThetaBimodal)
dat <- simdata(a, d, 4000, itemtype = '2PL', Theta=Theta)
group <- rep(c('G1', 'G2'), each=2000)
EH <- multipleGroup(dat, 1, group=group, dentype="empiricalhist", invariance = colnames(dat))
coef(EH, simplify=TRUE)
plot(EH, type = 'empiricalhist', npts = 60)
# DIF test for item 1
EH1 <- multipleGroup(dat, 1, group=group, dentype="empiricalhist", invariance = colnames(dat)[-1])
anova(EH, EH1)
#--------------------------------
# Mixture model (no prior group variable specified)
set.seed(12345)
nitems <- 20
a1 <- matrix(.75, ncol=1, nrow=nitems)
a2 <- matrix(1.25, ncol=1, nrow=nitems)
d1 <- matrix(rnorm(nitems,0,1),ncol=1)
d2 <- matrix(rnorm(nitems,0,1),ncol=1)
itemtype <- rep('2PL', nrow(a1))
N1 <- 500
N2 <- N1*2 # second class twice as large
dataset1 <- simdata(a1, d1, N1, itemtype)
dataset2 <- simdata(a2, d2, N2, itemtype)
dat <- rbind(dataset1, dataset2)
# group <- c(rep('D1', N1), rep('D2', N2))
# Mixture Rasch model (Rost, 1990)
models <- 'F1 = 1-20
CONSTRAIN = (1-20, a1)'
mod_mix <- multipleGroup(dat, models, dentype = 'mixture-2', GenRandomPars = TRUE)
coef(mod_mix, simplify=TRUE)
summary(mod_mix)
plot(mod_mix)
plot(mod_mix, type = 'trace')
itemplot(mod_mix, 1, type = 'info')
head(fscores(mod_mix)) # theta estimates
head(fscores(mod_mix, method = 'classify')) # classification probability
itemfit(mod_mix)
# Mixture 2PL model
mod_mix2 <- multipleGroup(dat, 1, dentype = 'mixture-2', GenRandomPars = TRUE)
anova(mod_mix, mod_mix2)
coef(mod_mix2, simplify=TRUE)
itemfit(mod_mix2)
# Compare to single group
mod <- mirt(dat)
anova(mod, mod_mix2)
########################################
# Zero-inflated 2PL IRT model
n <- 1000
nitems <- 20
a <- rep(2, nitems)
d <- rep(c(-2,-1,0,1,2), each=nitems/5)
zi_p <- 0.2 # Proportion of people in zero class
theta <- rnorm(n, 0, 1)
zeros <- matrix(0, n*zi_p, nitems)
nonzeros <- simdata(a, d, n*(1-zi_p), itemtype = '2PL',
Theta = as.matrix(theta[1:(n*(1-zi_p))]))
data <- rbind(nonzeros, zeros)
# define class with extreme theta but fixed item parameters
zi2PL <- "F = 1-20
START [MIXTURE_1] = (GROUP, MEAN_1, -100), (GROUP, COV_11, .00001),
(1-20, a1, 1.0), (1-20, d, 0)
FIXED [MIXTURE_1] = (GROUP, MEAN_1), (GROUP, COV_11),
(1-20, a1), (1-20, d)"
# define custom Theta integration grid that contains extreme theta + normal grid
technical <- list(customTheta = matrix(c(-100, seq(-6,6,length.out=61))))
# fit ZIM-IRT
zi2PL.fit <- multipleGroup(data, zi2PL, dentype = 'mixture-2', technical=technical)
coef(zi2PL.fit, simplify=TRUE)
}
Run the code above in your browser using DataLab