#############################################################################
# EXAMPLE 1: PISA Reading | Rasch model for dichotomous data
#############################################################################
data( data.pisaRead)
dat <- data.pisaRead$data
items <- grep("R" , colnames(dat))
# define matrix of covariates
X <- cbind( 1 , dat[ , c("female","hisei","migra" ) ] )
#***
# Model 1: Latent regression model in the Rasch model
# estimate Rasch model
mod1 <- rasch.mml2( dat[,items] )
# latent regression model
lm1 <- latent.regression.em.raschtype( data=dat[,items ], X = X , b = mod1$item$b )
## Not run:
# #***
# # Model 2: Latent regression with generalized link function
# # estimate alpha parameters for link function
# mod2 <- rasch.mml2( dat[,items] , est.alpha=TRUE)
# # use model estimated likelihood for latent regression model
# lm2 <- latent.regression.em.raschtype( f.yi.qk=mod2$f.yi.qk ,
# X = X , theta.list=mod2$theta.k)
#
# #***
# # Model 3: Latent regression model based on Rasch copula model
# testlets <- paste( data.pisaRead$item$testlet)
# itemclusters <- match( testlets , unique(testlets) )
# # estimate Rasch copula model
# mod3 <- rasch.copula2( dat[,items] , itemcluster=itemclusters )
# # use model estimated likelihood for latent regression model
# lm3 <- latent.regression.em.raschtype( f.yi.qk=mod3$f.yi.qk ,
# X = X , theta.list=mod3$theta.k)
#
# #############################################################################
# # EXAMPLE 2: Simulated data according to the Rasch model
# #############################################################################
#
# set.seed(899)
# I <- 21 # number of items
# b <- seq(-2,2, len=I) # item difficulties
# n <- 2000 # number of students
#
# # simulate theta and covariates
# theta <- stats::rnorm( n )
# x <- .7 * theta + stats::rnorm( n , .5 )
# y <- .2 * x+ .3*theta + stats::rnorm( n , .4 )
# dfr <- data.frame( theta , 1 , x , y )
#
# # simulate Rasch model
# dat1 <- sim.raschtype( theta = theta , b = b )
#
# # estimate latent regression
# mod <- latent.regression.em.raschtype( data = dat1 , X = dfr[,-1] , b = b )
# ## Regression Parameters
# ##
# ## est se.simple se t p beta fmi N.simple pseudoN.latent
# ## X1 -0.2554 0.0208 0.0248 -10.2853 0 0.0000 0.2972 2000 1411.322
# ## x 0.4113 0.0161 0.0193 21.3037 0 0.4956 0.3052 2000 1411.322
# ## y 0.1715 0.0179 0.0213 8.0438 0 0.1860 0.2972 2000 1411.322
# ##
# ## Residual Variance = 0.685
# ## Explained Variance = 0.3639
# ## Total Variance = 1.049
# ## R2 = 0.3469
#
# # compare with linear model (based on true scores)
# summary( stats::lm( theta ~ x + y , data = dfr ) )
# ## Coefficients:
# ## Estimate Std. Error t value Pr(>|t|)
# ## (Intercept) -0.27821 0.01984 -14.02 <2e-16 ***
# ## x 0.40747 0.01534 26.56 <2e-16 ***
# ## y 0.18189 0.01704 10.67 <2e-16 ***
# ## ---
# ##
# ## Residual standard error: 0.789 on 1997 degrees of freedom
# ## Multiple R-squared: 0.3713, Adjusted R-squared: 0.3707
#
# #***********
# # define guessing parameters (lower asymptotes) and
# # upper asymptotes ( 1 minus slipping parameters)
# cI <- rep(.2, I) # all items get a guessing parameter of .2
# cI[ c(7,9) ] <- .25 # 7th and 9th get a guessing parameter of .25
# dI <- rep( .95 , I ) # upper asymptote of .95
# dI[ c(7,11) ] <- 1 # 7th and 9th item have an asymptote of 1
#
# # latent regression model
# mod1 <- latent.regression.em.raschtype( data = dat1 , X = dfr[,-1] ,
# b = b , c = cI , d = dI )
# ## Regression Parameters
# ##
# ## est se.simple se t p beta fmi N.simple pseudoN.latent
# ## X1 -0.7929 0.0243 0.0315 -25.1818 0 0.0000 0.4044 2000 1247.306
# ## x 0.5025 0.0188 0.0241 20.8273 0 0.5093 0.3936 2000 1247.306
# ## y 0.2149 0.0209 0.0266 8.0850 0 0.1960 0.3831 2000 1247.306
# ##
# ## Residual Variance = 0.9338
# ## Explained Variance = 0.5487
# ## Total Variance = 1.4825
# ## R2 = 0.3701
#
# #############################################################################
# # EXAMPLE 3: Measurement error in dependent variable
# #############################################################################
#
# set.seed(8766)
# N <- 4000 # number of persons
# X <- stats::rnorm(N) # independent variable
# Z <- stats::rnorm(N) # independent variable
# y <- .45 * X + .25 * Z + stats::rnorm(N) # dependent variable true score
# sig.e <- stats::runif( N , .5 , .6 ) # measurement error standard deviation
# yast <- y + stats::rnorm( N , sd = sig.e ) # dependent variable measured with error
#
# #****
# # Model 1: Estimation with latent.regression.em.raschtype using
# # individual likelihood
# # define theta grid for evaluation of density
# theta.list <- mean(yast) + stats::sd(yast) * seq( - 5 , 5 , length=21)
# # compute individual likelihood
# f.yi.qk <- stats::dnorm( outer( yast , theta.list , "-" ) / sig.e )
# f.yi.qk <- f.yi.qk / rowSums(f.yi.qk)
# # define predictor matrix
# X1 <- as.matrix(data.frame( "intercept"=1 , "X"=X , "Z"=Z ))
#
# # latent regression model
# res <- latent.regression.em.raschtype( f.yi.qk=f.yi.qk ,
# X= X1 , theta.list=theta.list)
# ## Regression Parameters
# ##
# ## est se.simple se t p beta fmi N.simple pseudoN.latent
# ## intercept 0.0112 0.0157 0.0180 0.6225 0.5336 0.0000 0.2345 4000 3061.998
# ## X 0.4275 0.0157 0.0180 23.7926 0.0000 0.3868 0.2350 4000 3061.998
# ## Z 0.2314 0.0156 0.0178 12.9868 0.0000 0.2111 0.2349 4000 3061.998
# ##
# ## Residual Variance = 0.9877
# ## Explained Variance = 0.2343
# ## Total Variance = 1.222
# ## R2 = 0.1917
#
# #****
# # Model 2: Estimation with latent.regression.em.normal
# res2 <- latent.regression.em.normal( y = yast , sig.e = sig.e , X = X1)
# ## Regression Parameters
# ##
# ## est se.simple se t p beta fmi N.simple pseudoN.latent
# ## intercept 0.0112 0.0157 0.0180 0.6225 0.5336 0.0000 0.2345 4000 3062.041
# ## X 0.4275 0.0157 0.0180 23.7927 0.0000 0.3868 0.2350 4000 3062.041
# ## Z 0.2314 0.0156 0.0178 12.9870 0.0000 0.2111 0.2349 4000 3062.041
# ##
# ## Residual Variance = 0.9877
# ## Explained Variance = 0.2343
# ## Total Variance = 1.222
# ## R2 = 0.1917
#
# ## -> Results between Model 1 and Model 2 are identical because they use
# ## the same input.
#
# #***
# # Model 3: Regression model based on true scores y
# mod3 <- stats::lm( y ~ X + Z )
# summary(mod3)
# ## Coefficients:
# ## Estimate Std. Error t value Pr(>|t|)
# ## (Intercept) 0.02364 0.01569 1.506 0.132
# ## X 0.42401 0.01570 27.016 <2e-16 ***
# ## Z 0.23804 0.01556 15.294 <2e-16 ***
# ## Residual standard error: 0.9925 on 3997 degrees of freedom
# ## Multiple R-squared: 0.1923, Adjusted R-squared: 0.1919
# ## F-statistic: 475.9 on 2 and 3997 DF, p-value: < 2.2e-16
#
# #***
# # Model 4: Regression model based on observed scores yast
# mod4 <- stats::lm( yast ~ X + Z )
# summary(mod4)
# ## Coefficients:
# ## Estimate Std. Error t value Pr(>|t|)
# ## (Intercept) 0.01101 0.01797 0.613 0.54
# ## X 0.42716 0.01797 23.764 <2e-16 ***
# ## Z 0.23174 0.01783 13.001 <2e-16 ***
# ## Residual standard error: 1.137 on 3997 degrees of freedom
# ## Multiple R-squared: 0.1535, Adjusted R-squared: 0.1531
# ## F-statistic: 362.4 on 2 and 3997 DF, p-value: < 2.2e-16
# ## End(Not run)
Run the code above in your browser using DataLab