simCRM: Generating Data under the Continuous Response Model

Description

Generating data under the Continuous Response Model

Usage

simCRM(thetas, true.param, max.item)

Value

a data frame with N rows and m columns with N denoting the number of observations and m denoting the number of items.

Arguments

thetas: a vector of length N with N denoting the number of examinees. Each element of the vector is the true ability level for an examinee
true.param: a matrix of true item parameters with m rows and three columns, with m denoting the number of items. The first column is the a parameters, the second column is the b parameters, and the third column is the alpha parameters
max.item: a vector of length m indicating the maximum possible score for each hypothetical item.

Author

Cengiz Zopluoglu

Details

The simCRM generates data under Continuous Response Model as described in Shojima(2005). Given the true ability level for person i and the true item parameters for item j, the transformed response of person i for item j follows a normal distribution with a mean of α((θ-β)) and a standard deviation of α^2/a^-2.

References

Shojima, K.(2005). A Noniterative Item Parameter Solution in Each EM Cycyle of the Continuous Response Model. Educational Technology Research, 28, 11-22.

Examples

Run this code

  
  if (FALSE) {
  
  #####################################################
  #                      Example 1:                   #
  #   Basic data generation and parameter recovery    #
  #####################################################
  
  #Generate true person ability parameters for 1000 examinees from 
  #a standard normal distribution
  
  true.thetas <- rnorm(1000,0,1)
  
  #Generate the true item parameter matrix for the hypothetical items
  
  true.par <- matrix(c(.5,1,1.5,2,2.5,
  -1,-.5,0,.5,1,1,.8,1.5,.9,1.2),
  nrow=5,ncol=3)
  true.par
  
  #Generate the vector maximum possible scores that students can 
  #get for the items
  
  max.item <- c(30,30,30,30,30)
  
  #Generate the response matrix
  
  simulated.data <- simCRM(true.thetas,true.par,max.item)
  
  #Let's examine the simulated data
  
  head(simulated.data)
  summary(simulated.data)
  
  #Let's try to recover the item parameters
  
  min.item <- c(0,0,0,0,0)
  
  CRM <- EstCRMitem(simulated.data,max.item, min.item,
  max.EMCycle=500,converge=0.01)
  
  #Compare the true item parameters with the estimated item parameters. 
  #The first three column is the true item parameters, and the second
  #three column is the estimated item parameters
  
  cbind(true.par,CRM$param)
  
  #Let's recover the person parameters
  
  par <- CRM$param
  CRMthetas <- EstCRMperson(simulated.data,par,min.item,max.item)
  theta.par <- CRMthetas$thetas
  
  #Compare the true person ability parameters to the estimated person
  ##ability parameters.The first column is the true parameters and the
  ##second column is the estimated parameters
  
  thetas <- cbind(true.thetas,theta.par[,2])
  head(thetas)
  cor(thetas)
  plot(thetas[,1],thetas[,2])
  
  #RMSE for the estimated person parameters
  
  sqrt(sum((thetas[,1]-thetas[,2])^2)/nrow(thetas))
  
  #RMSE is comparable and similar to the standard error of the 
  #theta estimates. Standard error of the theta estimate is the square
  #root of the reciprocal of the total test information which is the sum
  #of square of the "a" parameters
  
  sqrt(1/sum(CRM$param[,1]^2))
  
  #####################################################
  #                      Example 2:                   #
  #   Item fit Residuals, Empirical and Theoretical   #
  #  Item Category Response Curves for the Simulated  #
  #                    Data Above                     #
  #####################################################
  
  #Because of the run time issues during the package development,
  #I run the fit analysis for a subset of simulated data above.
  #The simulated data has 1000 examinees, but I run the fit analysis
  #for the first 100 subjects of the simulated data. Please ignore the 
  #following line and run the analysis for whole data
  
  simulated.data <- simulated.data[1:100,] #Ignore this line 
  
  par <- CRM$param
  max.item <- c(30,30,30,30,30)
  min.item <- c(0,0,0,0,0)
  CRMthetas <- EstCRMperson(simulated.data,par,min.item,max.item)
  theta.par <- CRMthetas$thetas
  mean(theta.par[,2])
  sd(theta.par[,2])
  hist(theta.par[,2])
  
  fit <- fitCRM(simulated.data,par, CRMthetas, max.item, group=10)
  
  #Item-Fit Residuals
  
  fit$fit.stat
  
  #Empirical Item Category Response Curves
  fit$emp.irf[[1]]   #Item 1
  fit$emp.irf[[5]]   #Item 5
  
  #Theoretical Item Category Response Curves
  plotCRM(par,1,min.item, max.item)   #Item 1
  
  
  
  #####################################################
  #                      Example 3:                   #
  #   The replication of Shojima's simulation study   #
  #                         2005                      #
  #####################################################
  
  #In Shojima's simulation study published in 2005
  
   #true person parameters were generated from a standard normal distribution.
   #The natural logarithm of the true "a" parameters were generated from a N(0,0.09)
   #The true "b" parameters were generated from a N(0,1)
   #The natural logarithm of the true "alpha" parameters were generated from a N(0,0.09)
  
  #The independent variables were the number of items and sample size 
  #in the simulation study
  
  #There were 9 different conditions and 100 replications for each condition.
  
  #In Table 1 (Shojima,2005), the RMSD statistics were reported for each condition.
  
  #The code below replicates the same study. The results are comparable to the Table 1. 
  #The user 
  #should only specify the sample size and the number of items. Then, the user should 
  #run the rest of the code. 
  #At the end, RMSEa, RMSEb, RMSEalp are the item parameter recovery statistics which is 
  #comparable to Table 1
  
  
  #Set the conditions for the simulation study. 
  #It takes longer to run for big number of replications
  
  N=500              #sample size
  n=10               #number of items
  replication=1      #number if replications for each condition  
  
  ############################################################
  #       Run the rest of the code from START to END         #
  ############################################################
  
  #START
  
  true.person <- vector("list",replication)
  true.item <- vector("list",replication)
  est.person <- vector("list",replication)
  est.item <- vector("list",replication)
  simulated.datas <- vector("list",replication)
  
  for(i in 1:replication) {
  true.person[[i]] <- rnorm(N,0,1)
  true.item[[i]] <- cbind(exp(rnorm(n,0,.09)),rnorm(n,0,1),1/exp(rnorm(n,0,.09)))
  }
  
  max.item <- rep(50,n)
  min.item <- rep(0,n)
  
  for(i in 1:replication) {
  simulated.datas[[i]] <- simCRM(true.person[[i]],true.item[[i]],max.item)
  }
  
  for(i in 1:replication) {
  CRM<-EstCRMitem(simulated.datas[[i]],max.item,min.item,max.EMCycle= 500,converge=0.01)
  est.item[[i]]=CRM$par
  }
  for(i in 1:replication) {
  persontheta <- EstCRMperson(simulated.datas[[i]],est.item[[i]],min.item,max.item)
  est.person[[i]]<- persontheta$thetas[,2]
  }
  
  #END
  ############################################################
  
  #RMSE for parameter "a"
  
  RMSEa <- c()
  for(i in 1:replication) {
  RMSEa[i]=sqrt(sum((true.item[[i]][,1]-est.item[[i]][,1])^2)/n)
  }
  
  mean(RMSEa)
  
  
  #RMSE for parameter "b"
  
  RMSEb <- c()
  for(i in 1:replication) {
  RMSEb[i]=sqrt(sum((true.item[[i]][,2]-est.item[[i]][,2])^2)/n)
  }
  
  mean(RMSEb)
  
  #RMSE for parameter "alpha" 
  RMSEalp <- c()
  for(i in 1:replication) {
  RMSEalp[i]=sqrt(sum((true.item[[i]][,3]-est.item[[i]][,3])^2)/n)
  }
  
  mean(RMSEalp)
  
  #RMSE for person parameter
  
  RMSEtheta <- c()
  for(i in 1:replication) {
  RMSEtheta[i]=sqrt(sum((true.person[[i]]-est.person[[i]])^2)/N)
  }
  
  mean(RMSEtheta)
  }

Run the code above in your browser using DataLab