# Simulate data
n <- 1000
p0 <- 0.25
X1 <- rbinom(n, 1, 0.4)
X2 <- sample(x = c("a", "b", "c"), size = n, replace = TRUE)
X3 <- rnorm(n, 0, 1)
theta0 <- 0.3
V <- rbinom(n,1,theta0)
V_star <- V
V_star[V==1] <- rbinom(sum(V==1),1,1-p0)
a0 <- 1
a1 <- 2
a2 <- 0
a3 <- -1
a4 <- 4
a5 <- 2
mu <- rep(0, n)
for(i in 1:n){
mu[i] <- exp(a0 + a1*X1 + a4*X3 + a5*V )[i]
if(X2[i] == "a" || X2[i] == "b"){
mu[i] <- mu[i]*exp(a2)
}else{
mu[i] <- mu[i]*exp(a3)
}
}
sigma <- 0.427
mu.norm <- log(mu)-sigma^2/2
Y <- rlnorm(n, mu.norm, sigma)
data <- data.frame(Y = Y, X1 = X1, X2 = X2, X3 = X3, V_star = V_star)
# "a" is the reference
data$X2 <- as.factor(data$X2)
# Model with main effects:
LN_mod <- LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + V_star,
v_star = "V_star", data = data)
# The prevalence of misrepresentation;
(theta0 * p0) / (1 - theta0*(1-p0)) # 0.09677419
# Parameter estimates and estimated prevalence of
# misrepresentation (lambda);
summary(LN_mod)
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 1.00664 0.02874 35.02082 <2e-16 ***
# X1 1.95903 0.02825 69.35263 <2e-16 ***
# X2b 0.04106 0.03413 1.20303 0.22925
# X2c -1.00367 0.03418 -29.36328 <2e-16 ***
# X3 4.00031 0.01366 292.75312 <2e-16 ***
# V_star 2.01422 0.02922 68.93902 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---
# AIC AICc BIC
# 5555.224 5555.370 5594.486
# ---
# Log-Likelihood
# -2769.612
# ---
# Lambda: 0.11085 std.err: 0.01150365
# Fitting an interaction between X2 and X3;
a6 <- -2
a7 <- 2
for(i in 1:n){
if(X2[i] == "c"){
mu[i] <- mu[i]*exp(a6*X3[i])
}else{
if(X2[i] =="b"){
mu[i] <- mu[i]*exp(a7*X3[i])
}
}
}
mu.norm <- log(mu)-sigma^2/2
Y <- rlnorm(n, mu.norm, sigma)
data$Y <- Y
LN_mod <- LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + V_star + X2*X3,
v_star = "V_star", data = data)
summary(LN_mod)
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.95064 0.02905 32.71943 <2e-16 ***
# X1 2.04258 0.02876 71.02228 <2e-16 ***
# X2b 0.00204 0.03463 0.05879 0.95314
# X2c -0.97738 0.03469 -28.17315 <2e-16 ***
# X3 3.97014 0.02341 169.61122 <2e-16 ***
# V_star 2.01894 0.02967 68.04786 <2e-16 ***
# X2b:X3 2.00436 0.03459 57.95433 <2e-16 ***
# X2c:X3 -1.97573 0.03431 -57.59173 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---
# AIC AICc BIC
# 5505.180 5505.402 5554.257
# ---
# Log-Likelihood
# -2742.59
# ---
# Lambda: 0.1055629 std.err: 0.01134298
# Model fitting with a polynomial effect;
a8 <- -0.5
mu <- mu*exp(a8*X3^2)
mu.norm <- log(mu)-sigma^2/2
Y <- rlnorm(n, mu.norm, sigma)
data$Y <- Y
LN_mod <- LnRegMisrepEM(formula = log(Y) ~ X1 + X2 + X3 + V_star + X2*X3 + I(X3^2),
v_star = "V_star", data = data)
summary(LN_mod)
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.95591 0.03084 30.99533 <2e-16 ***
# X1 2.00070 0.02878 69.52672 <2e-16 ***
# X2b 0.09309 0.03480 2.67464 0.0076 **
# X2c -0.96572 0.03455 -27.95530 <2e-16 ***
# X3 3.96765 0.02378 166.82860 <2e-16 ***
# V_star 2.00513 0.02967 67.58486 <2e-16 ***
# I(X3^2) -0.49043 0.00983 -49.90063 <2e-16 ***
# X2b:X3 2.04614 0.03454 59.24411 <2e-16 ***
# X2c:X3 -1.97248 0.03383 -58.30378 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---
# AIC AICc BIC
# 4537.485 4537.752 4591.470
# ---
# Log-Likelihood
# -2257.742
# ---
# Lambda: 0.1061872 std.err: 0.01138758
Run the code above in your browser using DataLab