# NOT RUN {
data(simulated.data)
head(simulated.data)
str(simulated.data) #check that the variables are all "character"
data(ipar)
head(ipar)
# Due to the time constrains, I take a subset of the dataset
# You can ignore the following two lines in your run.
simulated.data <- simulated.data[,1:10]
ipar <- ipar[1:10,]
# Now, compute these indices for 100 random pairs of examinees
# a small type I error rate study
replication=1 #set this number to 100 or 1000.One replication takes about 15 seconds
pairs <- as.data.frame(matrix(replication,ncol=2))
for(i in 1:replication){
d <- sample(1:nrow(simulated.data),2,replace=FALSE)
pairs[i,1]=d[1]
pairs[i,2]=d[2]
}
pairs$W <- NA
pairs$GBT <- NA
pairs$K <- NA
pairs$K1 <- NA
pairs$K2 <- NA
pairs$S1 <- NA
pairs$S2 <- NA
for(i in 1:replication){
x <- CopyDetect2(data=simulated.data,
item.par=ipar,
pair=c(pairs[i,1],pairs[i,2]),
options=c("A","B","C","D","E"))
pairs[i,]$W=x$W.index$p.value
pairs[i,]$GBT=x$GBT.index$p.value
pairs[i,]$K=x$K.index$k.index
pairs[i,]$K1=x$K.variants$K1.index
pairs[i,]$K2=x$K.variants$K2.index
pairs[i,]$S1=x$K.variants$S1.index
pairs[i,]$S2=x$K.variants$S2.index
}
#Check the false detection rates at alpha level of .05
#(empirical type I error rates)
#We expect to see 5% of the pairs be detected just by chance
length(which(pairs$W<.05))/nrow(pairs)
length(which(pairs$GBT<.05))/nrow(pairs)
length(which(pairs$K<.05))/nrow(pairs)
length(which(pairs$K1<.05))/nrow(pairs)
length(which(pairs$K2<.05))/nrow(pairs)
length(which(pairs$S1<.05))/nrow(pairs)
length(which(pairs$S2<.05))/nrow(pairs)
#Now, compute these indices for 5 answer copying pairs
#a tiny empirical power study
#First we will randomly choose a cheater examinee
#Second, we will randomly choose a corresponding source examinee
#Third, we will randomly select 10 items (25% copying)
#Finally, we will overwrite the response vector of the source examinee
#on the response vector of the cheater examinee
#This mimicks the scenario that the cheater examinee looks at the
#source examinee's sheet and copies 5 items.
replication=1 #set this number to 100 or 1000.One replication takes about 15 seconds
copy.pairs <- as.data.frame(matrix(replication,ncol=2))
for(i in 1:replication){
d <- sample(1:nrow(simulated.data),2,replace=FALSE)
copy.pairs[i,1]=d[1] #hypothetical cheater examinee
copy.pairs[i,2]=d[2] #hypothetical source examinee
}
new.data <- simulated.data
for(i in 1:replication){ #Simulate answer copying for each answer copying pair
copy.items <- sample(1:ncol(simulated.data),5,replace=FALSE)
new.data[copy.pairs[i,1],copy.items]=new.data[copy.pairs[i,2],copy.items]
}
#Compute indices on the original response vectors
copy.pairs$W1 <- NA
copy.pairs$GBT1 <- NA
copy.pairs$K_1 <- NA
copy.pairs$K1_1 <- NA
copy.pairs$K2_1 <- NA
copy.pairs$S1_1 <- NA
copy.pairs$S2_1 <- NA
for(i in 1:replication){
x <- CopyDetect2(data=simulated.data,
item.par=ipar,
pair=c(copy.pairs[i,1],copy.pairs[i,2]),
options=c("A","B","C","D","E"))
copy.pairs[i,]$W1=x$W.index$p.value
copy.pairs[i,]$GBT1=x$GBT.index$p.value
copy.pairs[i,]$K_1=x$K.index$k.index
copy.pairs[i,]$K1_1=x$K.variants$K1.index
copy.pairs[i,]$K2_1=x$K.variants$K2.index
copy.pairs[i,]$S1_1=x$K.variants$S1.index
copy.pairs[i,]$S2_1=x$K.variants$S2.index
}
#Compute indices for same pairs on the answer copying simulated response vectors
copy.pairs$W2 <- NA
copy.pairs$GBT2 <- NA
copy.pairs$K_2 <- NA
copy.pairs$K1_2 <- NA
copy.pairs$K2_2 <- NA
copy.pairs$S1_2 <- NA
copy.pairs$S2_2 <- NA
for(i in 1:replication){
x <- CopyDetect2(data=new.data,
item.par=ipar,
pair=c(copy.pairs[i,1],copy.pairs[i,2]),
options=c("A","B","C","D","E"))
copy.pairs[i,]$W2=x$W.index$p.value
copy.pairs[i,]$GBT2=x$GBT.index$p.value
copy.pairs[i,]$K_2=x$K.index$k.index
copy.pairs[i,]$K1_2=x$K.variants$K1.index
copy.pairs[i,]$K2_2=x$K.variants$K2.index
copy.pairs[i,]$S1_2=x$K.variants$S1.index
copy.pairs[i,]$S2_2=x$K.variants$S2.index
}
#See what happens!
print(copy.pairs,8)
# }
Run the code above in your browser using DataCamp Workspace