# \donttest{
# -------------------------------
# CPS data example
# -------------------------------
# Set seed for reproducibility
set.seed(123)
# Upload the data
data(cps)
# Estimate the employment model
model <- msel(work ~ age + I(age ^ 2) + bachelor + master, data = cps)
summary(model)
# Use Wald test to test the hypothesis that age has no
# effect on the conditional probability of employment:
# H0: coef age = 0
# coef age ^ 2 = 0
age_fn <- function(object)
{
lwage_coef <- coef(object, type = "coef")[[1]]
val <- c(lwage_coef["age"], lwage_coef["I(age^2)"])
return(val)
}
age_test <- test_msel(object = model, fn = age_fn, test = "wald")
summary(age_test)
# Use t-test to test for each individual the hypothesis:
# P(work = 1 | x) = 0.8
prob_fn <- function(object)
{
prob <- predict(object, group = 1, type = "prob")
val <- prob - 0.8
return(val)
}
prob_test <- test_msel(object = model, fn = prob_fn, test = "t")
summary(prob_test)
# -------------------------------
# Simulated data example
# Model with continuous outcome
# and ordinal selection
# -------------------------------
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# Load required package
library("mnorm")
# The number of observations
n <- 10000
# Regressors (covariates)
s1 <- runif(n = n, min = -1, max = 1)
s2 <- runif(n = n, min = -1, max = 1)
s3 <- runif(n = n, min = -1, max = 1)
s4 <- runif(n = n, min = -1, max = 1)
# Random errors
sigma <- matrix(c(1, 0.4, 0.45, 0.7,
0.4, 1, 0.54, 0.8,
0.45, 0.54, 0.81, 0.81,
0.7, 0.8, 0.81, 1), nrow = 4)
errors <- mnorm::rmnorm(n = n, mean = c(0, 0, 0, 0), sigma = sigma)
u1 <- errors[, 1]
u2 <- errors[, 2]
eps0 <- errors[, 3]
eps1 <- errors[, 4]
# Coefficients
gamma1 <- c(-1, 2)
gamma2 <- c(1, 1)
gamma1_het <- c(0.5, -1)
beta0 <- c(1, -1, 1, -1.2)
beta1 <- c(2, -1.5, 0.5, 1.2)
# Linear index of the ordinal equations
# mean part
li1 <- gamma1[1] * s1 + gamma1[2] * s2
li2 <- gamma2[1] * s1 + gamma2[2] * s3
# variance part
li1_het <- gamma1_het[1] * s2 + gamma1_het[2] * s3
# Linear index of the continuous equation
# regime 0
li_y0 <- beta0[1] + beta0[2] * s1 + beta0[3] * s3 + beta0[4] * s4
# regime 1
li_y1 <- beta1[1] + beta1[2] * s1 + beta1[3] * s3 + beta1[4] * s4
# Latent variables
z1_star <- li1 + u1 * exp(li1_het)
z2_star <- li2 + u2
y0_star <- li_y0 + eps0
y1_star <- li_y1 + eps1
# Cuts
cuts1 <- c(-1)
cuts2 <- c(0, 1)
# Observable ordinal outcome
# first
z1 <- rep(0, n)
z1[z1_star > cuts1[1]] <- 1
# second
z2 <- rep(0, n)
z2[(z2_star > cuts2[1]) & (z2_star <= cuts2[2])] <- 1
z2[z2_star > cuts2[2]] <- 2
z2[z1 == 0] <- NA
# Observable continuous outcome
y <- rep(NA, n)
y[which(z2 == 0)] <- y0_star[which(z2 == 0)]
y[which(z2 != 0)] <- y1_star[which(z2 != 0)]
y[which(z1 == 0)] <- NA
# Data
data <- data.frame(s1 = s1, s2 = s2, s3 = s3, s4 = s4,
z1 = z1, z2 = z2, y = y)
# ---
# Step 2
# Estimation of the parameters
# ---
# Assign the groups
groups <- matrix(c(1, 2,
1, 1,
1, 0,
0, -1),
byrow = TRUE, ncol = 2)
groups2 <- matrix(c(1, 1, 0, -1), ncol = 1)
# Estimate the model
model <- msel(list(z1 ~ s1 + s2 | s2 + s3,
z2 ~ s1 + s3),
list(y ~ s1 + s3 + s4),
groups = groups, groups2 = groups2,
data = data)
# ---
# Step 3
# Hypotheses testing
# ---
# Use t-test to test for each observation the hypothesis
# H0: P(z1 = 0, z2 = 2 | Xi) = 0
prob02_fn <- function(object)
{
val <- predict(object, group = c(1, 0))
return(val)
}
prob02_test <- test_msel(object = model, fn = prob02_fn, test = "t")
summary(prob02_test)
# Use t-test to test the hypothesis
# H0: E(y1|z1=0, z2=2) - E(y0|z1=0, z2=2)
ATE_fn <- function(object)
{
val1 <- predict(object, group = c(0, 2), group2 = 1)
val0 <- predict(object, group = c(0, 2), group2 = 0)
val <- mean(val1 - val0)
return(val)
}
ATE_test <- test_msel(object = model, fn = ATE_fn)
summary(ATE_test)
# Use Wald to test the hypothesis
# H0: beta1 = beta0
coef_fn <- function(object)
{
coef1 <- coef(object, regime = 1, type = "coef2")
coef0 <- coef(object, regime = 0, type = "coef2")
coef_difference <- coef1 - coef0
return(coef_difference)
}
coef_test <- test_msel(object = model, fn = coef_fn, test = "wald")
summary(coef_test)
# Use t-test to test for each 'k' the hypothesis
# H0: beta1k = beta0k
coef_test2 <- test_msel(object = model, fn = coef_fn, test = "t")
summary(coef_test2)
# Use Wald test to test the hypothesis
# H0: beta11 + beta12 - 0.5 = 0
# beta11 * beta13 - beta03 = 0
test_fn <- function(object)
{
coef1 <- coef(object, regime = 1, type = "coef2")
coef0 <- coef(object, regime = 0, type = "coef2")
val <- c(coef1[1] + coef1[2] - 0.5,
coef1[1] * coef1[3] - coef0[3])
return(val)
}
# classic Wald test
wald1 <- test_msel(object = model, fn = test_fn,
test = "wald", method = "classic")
summary(wald1)
# score bootstrap Wald test
wald2 <- test_msel(object = model, fn = test_fn,
test = "wald", method = "score")
summary(wald2)
# Replicate the latter test with the 2-step estimator
model2 <- msel(list(z1 ~ s1 + s2 | s2 + s3,
z2 ~ s1 + s3),
list(y ~ s1 + s3 + s4),
groups = groups, groups2 = groups2,
data = data, estimator = "2step")
# classic Wald test
wald1_2step <- test_msel(object = model2, fn = test_fn,
test = "wald", method = "classic")
summary(wald1_2step)
# score bootstrap Wald test
wald2_2step <- test_msel(object = model2, fn = test_fn,
test = "wald", method = "score")
summary(wald2_2step)
# }
Run the code above in your browser using DataLab