# Setup for Examples 1 and 2 ------------------------------------------------
# Settings
set.seed(0) # seed for reproducibility
N <- 50 # number of persons
n <- 40 # number of items
# Randomly select 10% examinees with preknowledge and 40% compromised items
cv <- sample(1:N, size = N * 0.10)
ci <- sample(1:n, size = n * 0.40)
# Create vector of indicators (1 = similar pair, 0 = non-similar pair)
pair <- t(combn(N, 2))
ind <- ifelse((pair[, 1] %in% cv) & (pair[, 2] %in% cv), 1, 0)
names(ind) <- paste(pair[, 1], pair[, 2], sep = "-")
# Example 1: Item Scores and Response Times ---------------------------------
# Generate person parameters for the 3PL model and lognormal model
xi <- MASS::mvrnorm(
N,
mu = c(theta = 0.00, tau = 0.00),
Sigma = matrix(c(1.00, 0.25, 0.25, 0.25), ncol = 2)
)
# Generate item parameters for the 3PL model and lognormal model
psi <- cbind(
a = rlnorm(n, meanlog = 0.00, sdlog = 0.25),
b = NA,
c = runif(n, min = 0.05, max = 0.30),
alpha = runif(n, min = 1.50, max = 2.50),
beta = NA
)
# Generate positively correlated difficulty and time intensity parameters
psi[, c("b", "beta")] <- MASS::mvrnorm(
n,
mu = c(b = 0.00, beta = 3.50),
Sigma = matrix(c(1.00, 0.20, 0.20, 0.15), ncol = 2)
)
# Simulate uncontaminated data
dat <- sim(psi, xi)
x <- dat$x
y <- dat$y
# Modify contaminated data by changing the item scores and reducing the log
# response times
x[cv, ci] <- rbinom(length(cv) * length(ci), size = 1, prob = 0.90)
y[cv, ci] <- y[cv, ci] * 0.75
# Detect answer similarity
out <- detect_as(
method = c("OMG_S", "WOMG_S", "GBT_S", "OMG_ST", "GBT_ST"),
psi = psi,
x = x,
y = y
)
# Example 2: Polytomous Item Scores -----------------------------------------
# Generate person parameters for the generalized partial credit model
xi <- cbind(theta = rnorm(N, mean = 0.00, sd = 1.00))
# Generate item parameters for the generalized partial credit model
psi <- cbind(
a = rlnorm(n, meanlog = 0.00, sdlog = 0.25),
c0 = 0,
c1 = rnorm(n, mean = -1.00, sd = 0.50),
c2 = rnorm(n, mean = 0.00, sd = 0.50),
c3 = rnorm(n, mean = 1.00, sd = 0.50)
)
# Simulate uncontaminated data
x <- sim(psi, xi)$x
# Modify contaminated data by changing the item scores to the maximum score
x[cv, ci] <- 3
# Detect answer similarity
out <- detect_as(
method = c("OMG_S", "WOMG_S", "GBT_S"),
psi = psi,
x = x
)
# Setup for Example 3 -------------------------------------------------------
# Settings
set.seed(0) # seed for reproducibility
N <- 50 # number of persons
n <- 40 # number of items
# Randomly select 10% sources and 10% copiers
s <- sample(1:N, size = N * 0.10)
c <- sample(setdiff(1:N, s), size = N * 0.10)
# Create vector of indicators (1 = similar pair, 0 = non-similar pair)
pair <- t(combn(N, 2))
ind <- ifelse(1:nrow(pair) %in% apply(
rbind(cbind(s, c), cbind(c, s)), 1, function(p)
which(pair[, 1] == p[1] & pair[, 2] == p[2])), 1, 0)
names(ind) <- paste(pair[, 1], pair[, 2], sep = "-")
# Example 3: Item Responses -------------------------------------------------
# Generate person parameters for the nominal response model
xi <- cbind(eta = rnorm(N, mean = 0.00, sd = 1.00))
# Generate item parameters for the nominal response model
psi <- cbind(
lambda1 = rnorm(n, mean = -0.50, sd = 0.50),
lambda2 = rnorm(n, mean = -0.50, sd = 0.50),
lambda3 = rnorm(n, mean = -0.50, sd = 0.50),
lambda4 = rnorm(n, mean = 1.50, sd = 0.50),
zeta1 = rnorm(n, mean = -0.50, sd = 0.50),
zeta2 = rnorm(n, mean = -0.50, sd = 0.50),
zeta3 = rnorm(n, mean = -0.50, sd = 0.50),
zeta4 = rnorm(n, mean = 1.50, sd = 0.50)
)
# Simulate uncontaminated data
r <- sim(psi, xi)$r
# Modify contaminated data by replacing 40% of the copier responses with
# source responses
for (v in 1:length(c)) {
ci <- sample(1:n, size = n * 0.40)
r[c[v], ci] <- r[s[v], ci]
}
# Detect answer similarity
out <- detect_as(
method = c("OMG_R", "WOMG_R", "GBT_R"),
psi = psi,
r = r
)
Run the code above in your browser using DataLab