# \donttest{
# ---------------------------------------
# CPS data example
# ---------------------------------------
# Set seed for reproducibility
set.seed(123)
# Upload data
data(cps)
# Prepare the variable for education
cps$educ <- NA
cps$educ[cps$basic == 1] <- 0
cps$educ[cps$bachelor == 1] <- 1
cps$educ[cps$master == 1] <- 2
# Labor supply (probit) model
f_work <- work ~ age + I(age ^ 2) + bachelor + master + health +
slwage + nchild
model1 <- msel(f_work, data = cps)
summary(model1)
# Education choice (ordered probit) model
f_educ <- educ ~ age + I(age ^ 2) + sbachelor + smaster
model2 <- msel(f_educ, data = cps)
summary(model2)
# Education choice (multinomial logit) model
model3 <- msel(formula3 = f_educ, data = cps, type3 = "logit")
summary(model3)
# Education choice (multinomial probit) model
model4 <- msel(formula3 = f_educ, data = cps, type3 = "probit")
summary(model4)
# Labor supply with endogenous ordinal education
# treatment (recursive or hierarchical ordered probit) model
model5 <- msel(list(f_work, f_educ), data = cps)
summary(model5)
# Sample selection (on employment) Heckman's model
f_lwage <- lwage ~ age + I(age ^ 2) +
bachelor + master + health
model6 <- msel(f_work, f_lwage, data = cps)
summary(model6)
# Ordinal endogenous education treatment with non-random
# sample selection into employment
model7 <- msel(list(f_work, f_educ), f_lwage, data = cps)
summary(model7)
# Ordinal endogenous switching on education model with
# non-random selection into employment
groups <- cbind(c(1, 1, 1, 0, 0, 0),
c(0, 1, 2, 0, 1, 2))
groups2 <- matrix(c(0, 1, 2, -1, -1, -1), ncol = 1)
f_lwage2 <- lwage ~ age + I(age ^ 2) + health
model8 <- msel(list(f_work, f_educ), f_lwage2,
groups = groups, groups2 = groups2,
data = cps)
summary(model8)
# Multinomial endogenous switching on education model with
# non-random selection into employment
groups <- matrix(c(1, 1, 1, 0, 0, 0), ncol = 1)
groups2 <- matrix(c(0, 1, 2, -1, -1, -1), ncol = 1)
groups3 <- c(0, 1, 2, 0, 1, 2)
model9 <- msel(f_work, f_lwage2, f_educ,
groups = groups, groups2 = groups2,
groups3 = groups3, data = cps,
estimator = "2step")
summary(model9)
# }
# ---------------------------------------
# Simulated data example 1
# Ordered probit and other univariate
# ordered choice models
# --------------------------------------
# ---
# Step 1
# Simulation of the data
# ---
# Load required package
library("mnorm")
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
# Random errors
u <- rnorm(n = n, mean = 0, sd = 1)
# Coefficients
gamma <- c(-1, 2)
# Linear predictor (index)
li <- gamma[1] * w1 + gamma[2] * w2
# Latent variable
z_star <- li + u
# Cuts
cuts <- c(-1, 0.5, 2)
# Observable ordinal outcome
z <- rep(0, n)
z[(z_star > cuts[1]) & (z_star <= cuts[2])] <- 1
z[(z_star > cuts[2]) & (z_star <= cuts[3])] <- 2
z[z_star > cuts[3]] <- 3
table(z)
# Data
data <- data.frame(w1 = w1, w2 = w2, z = z)
# ---
# Step 2
# Estimation of the parameters
# ---
# Estimation
model <- msel(z ~ w1 + w2, data = data)
summary(model)
# \donttest{
# Compare the estimates and true values of the parameters
# regression coefficients
gamma_est <- coef(model, type = "coef", eq = 1)
cbind(true = gamma, estimate = gamma_est)
# cuts
cuts_est <- coef(model, type = "cuts", eq = 1)
cbind(true = cuts, estimate = cuts_est)
# ---
# Step 3
# Estimation of the probabilities and marginal effects
# ---
# Predict conditional probability of the dependent variable
# equals to 2 for every observation in a sample.
# P(z = 2 | w)
prob <- predict(model, group = 2, type = "prob")
head(prob)
# Calculate mean marginal effect of w2 on P(z = 1 | w)
mean(predict(model, group = 1, type = "prob", me = "w2"))
# Calculate probabilities and marginal effects
# for manually provided observations.
# new data
newdata <- data.frame(z = c(1, 1),
w1 = c(0.5, 0.2),
w2 = c(-0.3, 0.8))
# probability P(z = 2 | w)
predict(model, group = 2, type = "prob", newdata = newdata)
# linear predictor (index)
predict(model, type = "li", newdata = newdata)
# marginal effect of w1 on P(z = 2 | w)
predict(model, group = 2, type = "prob", newdata = newdata, me = "w1")
# marginal effect of w1 and w2 on P(z = 3 | w)
predict(model, group = 3, type = "prob",
newdata = newdata, me = c("w1", "w2"))
# marginal effect of w2 on the linear predictor (index)
predict(model, group = 2, type = "li", newdata = newdata, me = "w2")
# discrete marginal effect:
# P(z = 2 | w1 = 0.5, w2) - P(z = 2 | w1 = 0.2, w2)
predict(model, group = 2, type = "prob", newdata = newdata,
me = "w2", eps = c(0.2, 0.5))
# ---
# Step 4
# Ordered logit model
# ---
# Estimate ordered logit model with a unit variance
# that is just a matter of reparametrization i.e.,
# do not affect signs and significance of the coefficients
# and dot not affect at all the marginal effects
logit <- msel(z ~ w1 + w2, data = data, marginal = list("logistic" = 0))
summary(logit)
# Compare ordered probit and ordered logit models
# using Akaike and Bayesian information criteria
# AIC
c(probit = AIC(model), logit = AIC(logit))
# BIC
c(probit = BIC(model), logit = BIC(logit))
# Estimate some probabilities and marginal effects
# probability P(z = 1 | w)
predict(logit, group = 1, type = "prob", newdata = newdata)
# marginal effect of w2 on P(z = 1 | w)
predict(logit, group = 1, type = "prob", newdata = newdata, me = "w2")
# ---
# Step 5
# Semiparametric ordered choice model with
# Gallant and Nychka distribution
# ---
# Estimate semiparametric model
pgn <- msel(z ~ w1 + w2, data = data, marginal = list("PGN" = 2))
summary(pgn)
# Estimate some probabilities and marginal effects
# probability P(z = 3 | w)
predict(pgn, group = 3, type = "prob", newdata = newdata)
# marginal effect of w2 on P(z = 3 | w)
predict(pgn, group = 3, type = "prob", newdata = newdata, me = "w2")
# Test the normality assumption via the likelihood ratio test
summary(lrtest_msel(model, pgn))
# Test the normality assumption via the Wald test
test_fn <- function(object)
{
marginal_par <- coef(object, type = "marginal", eq = 1)
return(marginal_par)
}
test_result <- test_msel(object = pgn, test = "wald", fn = test_fn)
summary(test_result)
# }
# \donttest{
# ---------------------------------------
# Simulated data example 2
# Heteroscedastic ordered probit model
# ---------------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
# Random errors
u <- rnorm(n, mean = 0, sd = 1)
# Coefficients of the mean equation
gamma <- c(-1, 2)
# Coefficients of the variance equation
gamma_het <- c(0.5, -1)
# Linear predictor (index) of the mean equation
li <- gamma[1] * w1 + gamma[2] * w2
# Linear predictor (index) of the variance equation
li_het <- gamma_het[1] * w2 + gamma_het[2] * w3
# Heteroscedastic stdandard deviation
# i.e., value of the variance equation
sd_het <- exp(li_het)
# Latent variable
z_star <- li + u * sd_het
# Cuts
cuts <- c(-1, 0.5, 2)
# Observable ordinal outcome
z <- rep(0, n)
z[(z_star > cuts[1]) & (z_star <= cuts[2])] <- 1
z[(z_star > cuts[2]) & (z_star <= cuts[3])] <- 2
z[z_star > cuts[3]] <- 3
table(z)
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3, z = z)
# ---
# Step 2
# Estimation of the parameters
# ---
# Estimation
model <- msel(z ~ w1 + w2 | w2 + w3,
data = data)
summary(model)
# Compare the estimates and true values of the parameters
# regression coefficients of the mean equation
gamma_est <- coef(model, type = "coef", eq = 1)
cbind(true = gamma, estimate = gamma_est)
# regression coefficients of the variance equation
gamma_het_est <- coef(model, type = "coef_var", eq = 1)
cbind(true = gamma_het, estimate = gamma_het_est)
# cuts
cuts_est <- coef(model, type = "cuts", eq = 1)
cbind(true = cuts, estimate = cuts_est)
# Likelihood-ratio test for the homoscedasticity
model0 <- msel(z ~ w1 + w2, data = data)
summary(lrtest_msel(model, model0))
# Wald test for the homoscedasticity
test_fn <- function(object)
{
val <- coef(object, type = "coef_var", eq = 1)
return(val)
}
test_result <- test_msel(model, test = "wald", fn = test_fn)
summary(test_result)
# ---
# Step 3
# Estimation of the probabilities and marginal effects
# ---
# Predict probability of the dependent variable
# equals to 2 for every observation in a sample
# P(z = 2 | w)
prob <- predict(model, group = 2, type = "prob")
head(prob)
# Calculate mean marginal effect of w2 on P(z = 1 | w)
mean(predict(model, group = 1, type = "prob", me = "w2"))
# Estimate conditional probabilities, linear predictors (indexes) and
# heteroscedastic standard deviations for manually
# provided observations.
# new data
newdata <- data.frame(z = c(1, 1),
w1 = c(0.5, 0.2),
w2 = c(-0.3, 0.8),
w3 = c(0.6, 0.1))
# probability P(z = 2 | w)
predict(model, group = 2, type = "prob", newdata = newdata)
# linear predictor (index)
predict(model, type = "li", newdata = newdata)
# standard deviation
predict(model, type = "sd", newdata = newdata)
# marginal effect of w3 on P(z = 3 | w)
predict(model, group = 3, type = "prob", newdata = newdata, me = "w3")
# marginal effect of w2 on the standard error
predict(model, group = 2, type = "sd", newdata = newdata, me = "w2")
# discrete marginal effect:
# P(Z = 2 | w1 = 0.5, w2) - P(Z = 2 | w1 = 0.2, w2)
predict(model, group = 2, type = "prob", newdata = newdata,
me = "w2", eps = c(0.2, 0.5))
# }
# \donttest{
# ---------------------------------------
# Simulated data example 3
# Bivariate ordered probit model with
# heteroscedastic second equation
# ---------------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
w4 <- runif(n = n, min = -1, max = 1)
# Covariance matrix of random errors
rho <- 0.5
sigma <- matrix(c(1, rho,
rho, 1),
nrow = 2)
# Random errors
u <- mnorm::rmnorm(n = n, mean = c(0, 0), sigma = sigma)
# Coefficients
gamma1 <- c(-1, 2)
gamma2 <- c(1, 1.5)
# Coefficients of the variance equation
gamma2_het <- c(0.5, -1)
# Linear predictors (indexes)
li1 <- gamma1[1] * w1 + gamma1[2] * w2
li2 <- gamma2[1] * w2 + gamma2[2] * w3
# Linear predictor (index) of the variance equation
li2_het <- gamma2_het[1] * w2 + gamma2_het[2] * w4
# Heteroscedastic stdandard deviation
# i.e. value of variance equation
sd2_het <- exp(li2_het)
# Latent variables
z1_star <- li1 + u[, 1]
z2_star <- li2 + u[, 2] * sd2_het
# Cuts
cuts1 <- c(-1, 0.5, 2)
cuts2 <- c(-2, 0)
# Observable ordinal outcome
# first outcome
z1 <- rep(0, n)
z1[(z1_star > cuts1[1]) & (z1_star <= cuts1[2])] <- 1
z1[(z1_star > cuts1[2]) & (z1_star <= cuts1[3])] <- 2
z1[z1_star > cuts1[3]] <- 3
# second outcome
z2 <- rep(0, n)
z2[(z2_star > cuts2[1]) & (z2_star <= cuts2[2])] <- 1
z2[z2_star > cuts2[2]] <- 2
# distribution
table(z1, z2)
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3,
w4 = w4, z1 = z1, z2 = z2)
# ---
# Step 2
# Estimation of the parameters
# ---
# Estimation
model <- msel(list(z1 ~ w1 + w2,
z2 ~ w2 + w3 | w2 + w4),
data = data)
summary(model)
# Compare the estimates and true values of parameters
# regression coefficients of the first equation
gamma1_est <- coef(model, type = "coef", eq = 1)
cbind(true = gamma1, estimate = gamma1_est)
# regression coefficients of the second equation
gamma2_est <- coef(model, type = "coef", eq = 2)
cbind(true = gamma2, estimate = gamma2_est)
# cuts of the first equation
cuts1_est <- coef(model, type = "cuts", eq = 1)
cbind(true = cuts1, estimate = cuts1_est)
# cuts of the second equation
cuts2_est <- coef(model, type = "cuts", eq = 2)
cbind(true = cuts2, estimate = cuts2_est)
# correlation coefficients
rho_est <- coef(model, type = "cov1", eq = c(1, 2))
cbind(true = rho, estimate = rho_est)
# regression coefficients of the variance equation
gamma2_het_est <- coef(model, type = "coef_var", eq = 2)
cbind(true = gamma2_het, estimate = gamma2_het_est)
# ---
# Step 3
# Estimation of the probabilities and linear predictors (indexes)
# ---
# Predict probability P(z1 = 2, z2 = 0 | w)
prob <- predict(model, group = c(2, 0), type = "prob")
head(prob)
# Calculate mean marginal effect of w2 on:
# P(z1 = 1 | w)
mean(predict(model, group = c(1, -1), type = "prob", me = "w2"))
# P(z1 = 1, z2 = 0 | w)
mean(predict(model, group = c(1, 0), type = "prob", me = "w2"))
# Calculate conditional probabilities and linear predictors (indexes)
# for the manually provided observations.
# new data
newdata <- data.frame(z1 = c(1, 1),
z2 = c(1, 1),
w1 = c(0.5, 0.2),
w2 = c(-0.3, 0.8),
w3 = c(0.6, 0.1),
w4 = c(0.3, -0.5))
# probability P(z1 = 2, z2 = 0 | w)
predict(model, group = c(2, 0), type = "prob", newdata = newdata)
# linear predictor (index)
predict(model, type = "li", newdata = newdata)
# marginal probability P(z2 = 1 | w)
predict(model, group = c(-1, 1), type = "prob", newdata = newdata)
# marginal probability P(z1 = 3 | w)
predict(model, group = c(3, -1), type = "prob", newdata = newdata)
# conditional probability P(z1 = 2 | z2 = 0, w)
predict(model, group = c(2, 0), given_ind = 2,
type = "prob", newdata = newdata)
# conditional probability P(z2 = 1 | z1 = 3, w)
predict(model, group = c(3, 1), given_ind = 1,
type = "prob", newdata = newdata)
# marginal effect of w4 on P(Z2 = 2 | w)
predict(model, group = c(-1, 2),
type = "prob", newdata = newdata, me = "w4")
# marginal effect of w4 on P(z1 = 3, Z2 = 2 | w)
predict(model, group = c(3, 2),
type = "prob", newdata = newdata, me = "w4")
# marginal effect of w4 on P(z1 = 3 | z2 = 2, w)
predict(model, group = c(3, 2), given_ind = 2,
type = "prob", newdata = newdata, me = "w4")
# ---
# Step 4
# Replication under the non-random sample selection
# ---
# Suppose that z2 is unobservable when z1 = 1 or z1 = 3
z2[(z1 == 1) | (z1 == 3)] <- -1
data$z2 <- z2
# Replicate the estimation procedure
model <- msel(list(z1 ~ w1 + w2,
z2 ~ w2 + w3 | w2 + w4),
cov_type = "gop", data = data)
summary(model)
# Compare estimates and true values of the parameters
# regression coefficients of the first equation
gamma1_est <- coef(model, type = "coef", eq = 1)
cbind(true = gamma1, estimate = gamma1_est)
# regression coefficients of the second equation
gamma2_est <- coef(model, type = "coef", eq = 2)
cbind(true = gamma2, estimate = gamma2_est)
# cuts of the first equation
cuts1_est <- coef(model, type = "cuts", eq = 1)
cbind(true = cuts1, estimate = cuts1_est)
# cuts of the second equation
cuts2_est <- coef(model, type = "cuts", eq = 2)
cbind(true = cuts2, estimate = cuts2_est)
# correlation coefficient
rho_est <- coef(model, type = "cov1", eq = c(1, 2))
cbind(true = rho, estimate = rho_est)
# regression coefficients of the variance equation
gamma2_het_est <- coef(model, type = "coef_var", eq = 2)
cbind(true = gamma2_het, estimate = gamma2_het_est)
# ---
# Step 5
# Semiparametric model with marginal logistic and PGN distributions
# ---
# Estimate the model
model <- msel(list(z1 ~ w1 + w2,
z2 ~ w2 + w3 | w2 + w4),
data = data, marginal = list(PGN = 3, logistic = NULL))
summary(model)
# }
# \donttest{
# ---------------------------------------
# Simulated data example 4
# Heckman model with ordinal
# selection mechanism
# ---------------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
# Random errors
rho <- 0.5
var.y <- 0.3
sd.y <- sqrt(var.y)
sigma <- matrix(c(1, rho * sd.y,
rho * sd.y, var.y),
nrow = 2)
errors <- mnorm::rmnorm(n = n, mean = c(0, 0), sigma = sigma)
u <- errors[, 1]
eps <- errors[, 2]
# Coefficients
gamma <- c(-1, 2)
beta <- c(1, -1, 1)
# Linear predictor (index)
li <- gamma[1] * w1 + gamma[2] * w2
li.y <- beta[1] + beta[2] * w1 + beta[3] * w3
# Latent variable
z_star <- li + u
y_star <- li.y + eps
# Cuts
cuts <- c(-1, 0.5, 2)
# Observable ordered outcome
z <- rep(0, n)
z[(z_star > cuts[1]) & (z_star <= cuts[2])] <- 1
z[(z_star > cuts[2]) & (z_star <= cuts[3])] <- 2
z[z_star > cuts[3]] <- 3
table(z)
# Observable continuous outcome such
# that outcome 'y' is observable only
# when 'z > 1' and unobservable otherwise
# i.e. when 'z <= 1' we code 'y' as 'NA'
y <- y_star
y[z <= 1] <- NA
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3,
z = z, y = y)
# ---
# Step 2
# Estimation of parameters
# ---
# Estimation
model <- msel(z ~ w1 + w2, y ~ w1 + w3, data = data)
summary(model)
# Compare estimates and true values of the parameters
# regression coefficients of the ordinal equation
gamma_est <- coef(model, type = "coef", eq = 1)
cbind(true = gamma, estimate = gamma_est)
# cuts
cuts_est <- coef(model, type = "cuts", eq = 1)
cbind(true = cuts, estimate = cuts_est)
# regression coefficients of the continuous equation
beta_est <- coef(model, type = "coef2", eq2 = 1, regime = 0)
cbind(true = beta, estimate = beta_est)
# variance
var.y_est <- coef(model, type = "var", eq2 = 1, regime = 0)
cbind(true = var.y, estimate = var.y_est)
# covariance
cov_est <- coef(model, type = "cov12", eq = 1, eq2 = 1)
cbind(true = rho * sd.y, estimate = cov_est)
# ---
# Step 3
# Estimation of the expectations and marginal effects
# ---
# New data
newdata <- data.frame(z = 1,
y = 1,
w1 = 0.1,
w2 = 0.2,
w3 = 0.3)
# Predict the unconditional expectation of the continuous outcome
# E(y | w)
predict(model, group = -1, group2 = 0, newdata = newdata)
# Predict the conditional expectations of the continuous outcome
# E(y | z = 2, w)
predict(model, group = 2, group2 = 0, newdata = newdata)
# E(y | z = 0, w)
predict(model, group = 0, group2 = 0, newdata = newdata)
# ---
# Step 4
# Classic Heckman's two-step estimation procedure
# ---
# Estimate the model by using the two-step estimator
model_ts <- msel(z ~ w1 + w2, y ~ w1 + w3,
data = data, estimator = "2step")
summary(model_ts)
# Check the estimates accuracy
tbl <- cbind(true = beta,
ml = model$coef2[[1]][1, ],
twostep = model_ts$coef2[[1]][1, -4])
print(tbl)
# ---
# Step 5
# Semiparametric estimation procedure
# ---
# Estimate the model using Lee's method
# assuming logistic distribution of the
# random errors of the selection equation
model_Lee <- msel(z ~ w1 + w2,
y ~ w1 + w3,
data = data, estimator = "2step",
marginal = list(logistic = NULL))
summary(model_Lee)
# One step estimation is also available as well
# as more complex marginal distributions.
# Consider random errors in selection equation
# following PGN distribution with three parameters.
model_sp <- msel(z ~ w1 + w2,
y ~ w1 + w3,
data = data, marginal = list(PGN = 3))
summary(model_sp)
# To additionally relax normality assumption of
# random error of continuous equation it is possible
# to use Newey's two-step procedure.
model_Newey <- msel(z ~ w1 + w2,
y ~ w1 + w3,
data = data, marginal = list(logistic = 0),
estimator = "2step", degrees = 2)
summary(model_Newey)
# }
# \donttest{
# ---------------------------------------
# Simulated data example 5
# Endogenous switching model with
# heteroscedastic ordered selection
# mechanism
# ---------------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
# Random errors
rho_0 <- -0.8
rho_1 <- -0.7
var2_0 <- 0.9
var2_1 <- 1
sd_y_0 <- sqrt(var2_0)
sd_y_1 <- sqrt(var2_1)
cor_y_01 <- 0.7
cov2_01 <- sd_y_0 * sd_y_1 * cor_y_01
cov2_z_0 <- rho_0 * sd_y_0
cov2_z_1 <- rho_1 * sd_y_1
sigma <- matrix(c(1, cov2_z_0, cov2_z_1,
cov2_z_0, var2_0, cov2_01,
cov2_z_1, cov2_01, var2_1),
nrow = 3)
errors <- mnorm::rmnorm(n = n, mean = c(0, 0, 0), sigma = sigma)
u <- errors[, 1]
eps_0 <- errors[, 2]
eps_1 <- errors[, 3]
# Coefficients
gamma <- c(-1, 2)
gamma_het <- c(0.5, -1)
beta_0 <- c(1, -1, 1)
beta_1 <- c(2, -1.5, 0.5)
# Linear predictor (index) of the ordinal equation
# mean
li <- gamma[1] * w1 + gamma[2] * w2
# variance
li_het <- gamma_het[1] * w2 + gamma_het[2] * w3
# Linear predictor (index) of the continuous equation
# regime 0
li_y_0 <- beta_0[1] + beta_0[2] * w1 + beta_0[3] * w3
# regime 1
li_y_1 <- beta_1[1] + beta_1[2] * w1 + beta_1[3] * w3
# Latent variable
z_star <- li + u * exp(li_het)
y_0_star <- li_y_0 + eps_0
y_1_star <- li_y_1 + eps_1
# Cuts
cuts <- c(-1, 0.5, 2)
# Observable ordinal outcome
z <- rep(0, n)
z[(z_star > cuts[1]) & (z_star <= cuts[2])] <- 1
z[(z_star > cuts[2]) & (z_star <= cuts[3])] <- 2
z[z_star > cuts[3]] <- 3
table(z)
# Observable continuous outcome such that y' is
# observable in regime 1 when 'z = 1',
# observable in regime 0 when 'z <= 1',
# unobservable when 'z = 0'
y <- rep(NA, n)
y[z == 0] <- NA
y[z == 1] <- y_0_star[z == 1]
y[z > 1] <- y_1_star[z > 1]
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3,
z = z, y = y)
# ---
# Step 2
# Estimation of the parameters
# ---
# Assign groups
groups <- matrix(0:3, ncol = 1)
groups2 <- matrix(c(-1, 0, 1, 1), ncol = 1)
# Estimation
model <- msel(list(z ~ w1 + w2 | w2 + w3),
list(y ~ w1 + w3),
groups = groups, groups2 = groups2,
data = data)
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of the ordinal equation
gamma_est <- coef(model, type = "coef", eq = 1)
gamma_het_est <- coef(model, type = "coef_var", eq = 1)
cbind(true = gamma, estimate = gamma_est)
cbind(true = gamma_het, estimate = gamma_het_est)
# cuts
cuts_est <- coef(model, type = "cuts", eq = 1)
cbind(true = cuts, estimate = cuts_est)
# regression coefficients of the continuous equation
beta_0_test <- coef(model, type = "coef2", eq2 = 1, regime = 0)
beta_1_test <- coef(model, type = "coef2", eq2 = 1, regime = 1)
cbind(true = beta_0, estimate = beta_0_test)
cbind(true = beta_1, estimate = beta_1_test)
# variances
var2_0_est <- coef(model, type = "var", eq2 = 1, regime = 0)
var2_1_est <- coef(model, type = "var", eq2 = 1, regime = 1)
cbind(true = c(var2_0, var2_1), estimate = c(var2_0_est, var2_1_est))
# covariances between the random errors
cov2_z_0_est <- coef(model, type = "cov12", eq = 1, eq2 = 1, regime = 0)
cov2_z_1_est <- coef(model, type = "cov12", eq = 1, eq2 = 1, regime = 1)
cbind(true = c(cov2_z_0, cov2_z_1),
estimate = c(cov2_z_0_est, cov2_z_1_est))
# ---
# Step 3
# Estimation of the expectations and marginal effects
# ---
# New data
newdata <- data.frame(z = 1, y = 1,
w1 = 0.1, w2 = 0.2, w3 = 0.3)
# Predict the unconditional expectation of the
# continuous outcome E(yr | w)
# regime 0
predict(model, group = -1, group2 = 0, newdata = newdata)
# regime 1
predict(model, group = -1, group2 = 1, newdata = newdata)
# Predict the conditional expectations of the continuous outcome
# given condition 'z == 0' for regime 1 i.e., E(y1 | z = 0, w)
predict(model, group = 0, group2 = 1, newdata = newdata)
# }
# \donttest{
# ---------------------------------------
# Simulated data example 6
# Endogenous switching model with
# multivariate heteroscedastic ordered
# selection mechanism
# ---------------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
w4 <- runif(n = n, min = -1, max = 1)
# Random errors
rho_z1_z2 <- 0.5
rho_y0_z1 <- 0.6
rho_y0_z2 <- 0.7
rho_y1_z1 <- 0.65
rho_y1_z2 <- 0.75
var20 <- 0.9
var21 <- 1
sd_y0 <- sqrt(var20)
sd_y1 <- sqrt(var21)
cor_y01 <- 0.7
cov201 <- sd_y0 * sd_y1 * cor_y01
cov20_z1 <- rho_y0_z1 * sd_y0
cov21_z1 <- rho_y1_z1 * sd_y1
cov20_z2 <- rho_y0_z2 * sd_y0
cov21_z2 <- rho_y1_z2 * sd_y1
sigma <- matrix(c(1, rho_z1_z2, cov20_z1, cov21_z1,
rho_z1_z2, 1, cov20_z2, cov21_z2,
cov20_z1, cov20_z2, var20, cov201,
cov21_z1, cov21_z2, cov201, var21),
nrow = 4)
errors <- mnorm::rmnorm(n = n, mean = c(0, 0, 0, 0), sigma = sigma)
u1 <- errors[, 1]
u2 <- errors[, 2]
eps0 <- errors[, 3]
eps1 <- errors[, 4]
# Coefficients
gamma1 <- c(-1, 2)
gamma1_het <- c(0.5, -1)
gamma2 <- c(1, 1)
beta0 <- c(1, -1, 1, -1.2)
beta1 <- c(2, -1.5, 0.5, 1.2)
# Linear index (predictor) of the ordinal equation
# mean
li1 <- gamma1[1] * w1 + gamma1[2] * w2
li2 <- gamma2[1] * w1 + gamma2[2] * w3
# variance
li1_het <- gamma1_het[1] * w2 + gamma1_het[2] * w3
# Linear predictor (index) of the continuous equation
# regime 0
li_y0 <- beta0[1] + beta0[2] * w1 + beta0[3] * w3 + beta0[4] * w4
# regime 1
li_y1 <- beta1[1] + beta1[2] * w1 + beta1[3] * w3 + beta1[4] * w4
# Latent variables
z1_star <- li1 + u1 * exp(li1_het)
z2_star <- li2 + u2
y0_star <- li_y0 + eps0
y1_star <- li_y1 + eps1
# Cuts
cuts1 <- c(-1, 1)
cuts2 <- c(0)
# Observable ordered outcome
# first
z1 <- rep(0, n)
z1[(z1_star > cuts1[1]) & (z1_star <= cuts1[2])] <- 1
z1[z1_star > cuts1[2]] <- 2
# second
z2 <- rep(0, n)
z2[z2_star > cuts2[1]] <- 1
table(z1, z2)
# Observable continuous outcome such
# that outcome 'y' is
# in regime 0 when 'z1 == 1',
# in regime 1 when 'z1 == 0' or 'z1 == 2',
# unobservable when 'z2 == 0'
y <- rep(NA, n)
y[z1 == 1] <- y0_star[z1 == 1]
y[z1 != 1] <- y1_star[z1 != 1]
y[z2 == 0] <- NA
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3, w4 = w4,
z1 = z1, z2 = z2, y = y)
# ---
# Step 2
# Estimation of the parameters
# ---
# Assign groups
groups <- matrix(c(0, 0,
0, 1,
1, 0,
1, 1,
2, 0,
2, 1),
byrow = TRUE, ncol = 2)
groups2 <- matrix(c(-1, 1, -1, 0, -1, 1), ncol = 1)
# Estimation
model <- msel(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
y ~ w1 + w3 + w4,
groups = groups, groups2 = groups2,
data = data)
summary(model)
# Compare estimates and true values of the parameters
# regression coefficients of the first ordered equation
gamma1_est <- coef(model, type = "coef", eq = 1)
gamma1__het_est <- coef(model, type = "coef_var", eq = 1)
cbind(true = gamma1, estimate = gamma1_est)
cbind(true = gamma1_het, estimate = gamma1__het_est)
# regression coefficients of the second ordered equation
gamma2_est <- coef(model, type = "coef", eq = 2)
cbind(true = gamma2, estimate = gamma2_est)
# cuts
cuts1_est <- coef(model, type = "cuts", eq = 1)
cuts2_est <- coef(model, type = "cuts", eq = 2)
cbind(true = cuts1, estimate = cuts1_est)
cbind(true = cuts2, estimate = cuts2_est)
# regression coefficients of the continuous equation
beta0_est <- coef(model, type = "coef2", eq2 = 1, regime = 0)
beta1_est <- coef(model, type = "coef2", eq2 = 1, regime = 1)
cbind(true = beta0, estimate = beta0_est)
cbind(true = beta1, estimate = beta1_est)
# variances
var20_est <- coef(model, type = "var", eq2 = 1, regime = 0)
var21_est <- coef(model, type = "var", eq2 = 1, regime = 1)
cbind(true = c(var20, var21), estimate = c(var20_est, var21_est))
# covariances
cov_y0_z1_est <- coef(model, type = "cov12", eq = 1, eq2 = 1, regime = 0)
cov_y0_z2_est <- coef(model, type = "cov12", eq = 2, eq2 = 1, regime = 0)
cov_y1_z1_est <- coef(model, type = "cov12", eq = 1, eq2 = 1, regime = 1)
cov_y1_z2_est <- coef(model, type = "cov12", eq = 2, eq2 = 1, regime = 1)
cbind(true = c(cov20_z1, cov20_z2),
estimate = c(cov_y0_z1_est, cov_y0_z2_est))
cbind(true = c(cov21_z1, cov21_z2),
estimate = c(cov_y1_z1_est, cov_y1_z2_est))
# ---
# Step 3
# Estimation of the expectations and marginal effects
# ---
# New data
newdata <- data.frame(z1 = 1, z2 = 1, y = 1,
w1 = 0.1, w2 = 0.2, w3 = 0.3, w4 = 0.4)
# Predict the unconditional expectation of the continuous outcome
# regime 0
predict(model, group = c(-1, -1), group2 = 0, newdata = newdata)
# regime 1
predict(model, group = c(-1, -1), group2 = 1, newdata = newdata)
# Predict the conditional expectations of the continuous outcome
# E(y1 | z1 = 2, z2 = 1, w)
predict(model, group = c(2, 1), group2 = 1, newdata = newdata)
# Marginal effect of w3 on E(y1 | z1 = 2, z2 = 1, w)
predict(model, group = c(2, 1), group2 = 1, newdata = newdata, me = "w3")
# ---
# Step 4
# Two-step estimation procedure
# ---
# For a comparison reasons let's estimate the model
# via the least squares
model.ls.0 <- lm(y ~ w1 + w3 + w4,
data = data[!is.na(data$y) & (data$z1 == 1), ])
model.ls.1 <- lm(y ~ w1 + w3 + w4,
data = data[!is.na(data$y) & (data$z1 != 1), ])
# Apply the two-step procedure
model_ts <- msel(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
y ~ w1 + w3 + w4,
groups = groups, groups2 = groups2,
estimator = "2step", data = data)
summary(model_ts)
# Use the two-step procedure with logistic marginal distributions
# that is multivariate generalization of the Lee's method
model_Lee <- msel(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
y ~ w1 + w3 + w4,
marginal = list(logistic = NULL, logistic = NULL),
groups = groups, groups2 = groups2,
estimator = "2step", data = data)
# Apply the Newey's method
model_Newey <- msel(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
y ~ w1 + w3 + w4,
marginal = list(logistic = NULL, logistic = NULL),
degrees = c(2, 3), groups = groups, groups2 = groups2,
estimator = "2step", data = data)
# Compare accuracy of the methods
# beta0
tbl0 <- cbind(true = beta0,
ls = coef(model.ls.0),
ml = model$coef2[[1]][1, 1:length(beta0)],
twostep = model_ts$coef2[[1]][1, 1:length(beta0)],
Lee = model_Lee$coef2[[1]][1, 1:length(beta0)],
Newey = model_Newey$coef2[[1]][1, 1:length(beta0)])
print(tbl0)
# beta1
tbl1 <- cbind(true = beta1,
ls = coef(model.ls.1),
ml = model$coef2[[1]][2, 1:length(beta1)],
twostep = model_ts$coef2[[1]][2, 1:length(beta1)],
Lee = model_Lee$coef2[[1]][2, 1:length(beta1)],
Newey = model_Newey$coef2[[1]][2, 1:length(beta1)])
print(tbl1)
# }
# \donttest{
# ---------------------------------------
# Simulated data example 7
# Endogenous multivariate switching model
# with multivariate heteroscedastic
# ordered selection mechanism
# ---------------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
w4 <- runif(n = n, min = -1, max = 1)
w5 <- runif(n = n, min = -1, max = 1)
# Random errors
var_y0 <- 0.9
var_y1 <- 1
var_g0 <- 1.1
var_g1 <- 1.2
var_g2 <- 1.3
A <- rWishart(1, 7, diag(7))[, , 1]
B <- diag(sqrt(c(1, 1, var_y0, var_y1,
var_g0, var_g1, var_g2)))
sigma <- B %*% cov2cor(A) %*% B
errors <- mnorm::rmnorm(n = n, mean = rep(0, nrow(sigma)), sigma = sigma)
u1 <- errors[, 1]
u2 <- errors[, 2]
eps0_y <- errors[, 3]
eps1_y <- errors[, 4]
eps0_g <- errors[, 5]
eps1_g <- errors[, 6]
eps2_g <- errors[, 7]
# Coefficients
gamma1 <- c(-1, 2)
gamma1_het <- c(0.5, -1)
gamma2 <- c(1, 1)
beta0_y <- c(1, -1, 1, -1.2)
beta1_y <- c(2, -1.5, 0.5, 1.2)
beta0_g <- c(-1, 1, 1, 1)
beta1_g <- c(1, -1, 1, 1)
beta2_g <- c(1, 1, -1, 1)
# Linear predictor (index) of the ordinal equation
# mean
li1 <- gamma1[1] * w1 + gamma1[2] * w2
li2 <- gamma2[1] * w1 + gamma2[2] * w3
# variance
li1_het <- gamma1_het[1] * w2 + gamma1_het[2] * w3
# Linear predictor (index) of the first continuous equation
# regime 0
li_y0 <- beta0_y[1] + beta0_y[2] * w1 + beta0_y[3] * w3 + beta0_y[4] * w4
# regime 1
li_y1 <- beta1_y[1] + beta1_y[2] * w1 + beta1_y[3] * w3 + beta1_y[4] * w4
# Linear predictor (index) of the second continuous equation
# regime 0
li_g0 <- beta0_g[1] + beta0_g[2] * w2 + beta0_g[3] * w3 + beta0_g[4] * w5
# regime 1
li_g1 <- beta1_g[1] + beta1_g[2] * w2 + beta1_g[3] * w3 + beta1_g[4] * w5
# regime 2
li_g2 <- beta2_g[1] + beta2_g[2] * w2 + beta2_g[3] * w3 + beta2_g[4] * w5
# Latent variables
z1_star <- li1 + u1 * exp(li1_het)
z2_star <- li2 + u2
y0_star <- li_y0 + eps0_y
y1_star <- li_y1 + eps1_y
g0_star <- li_g0 + eps0_g
g1_star <- li_g1 + eps1_g
g2_star <- li_g2 + eps2_g
# Cuts
cuts1 <- c(-1, 1)
cuts2 <- c(0)
# Observable ordered outcome
# first
z1 <- rep(0, n)
z1[(z1_star > cuts1[1]) & (z1_star <= cuts1[2])] <- 1
z1[z1_star > cuts1[2]] <- 2
# second
z2 <- rep(0, n)
z2[z2_star > cuts2[1]] <- 1
table(z1, z2)
# Observable continuous outcome such that outcome 'y' is
# in regime 0 when 'z1 == 1',
# in regime 1 when 'z1 == 0' or 'z1 == 2',
# unobservable when 'z2 == 0'
y <- rep(NA, n)
y[z1 == 1] <- y0_star[z1 == 1]
y[z1 != 1] <- y1_star[z1 != 1]
y[z2 == 0] <- NA
#' # Observable continuous outcome such
# that outcome 'g' is
# in regime 0 when 'z1 == z2',
# in regime 1 when 'z1 > z2',
# in regime 2 when 'z1 < z2',
g <- rep(NA, n)
g[z1 == z2] <- g0_star[z1 == z2]
g[z1 > z2] <- g1_star[z1 > z2]
g[z1 < z2] <- g2_star[z1 < z2]
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3, w4 = w4, w5 = w5,
z1 = z1, z2 = z2, y = y, g = g)
# ---
# Step 2
# Estimation of the parameters
# ---
# Assign groups
groups <- matrix(c(0, 0,
0, 1,
1, 0,
1, 1,
2, 0,
2, 1),
byrow = TRUE, ncol = 2)
# Assign groups 2
# prepare the matrix
groups2 <- matrix(NA, nrow = nrow(groups), ncol = 2)
# fill the matrix
groups2[groups[, 1] == 1, 1] <- 0
groups2[(groups[, 1] == 0) | (groups[, 1] == 2), 1] <- 1
groups2[groups[, 2] == 0, 1] <- -1
groups2[groups[, 1] == groups[, 2], 2] <- 0
groups2[groups[, 1] > groups[, 2], 2] <- 1
groups2[groups[, 1] < groups[, 2], 2] <- 2
# The structure of the model
cbind(groups, groups2)
# Estimation
model <- msel(list(z1 ~ w1 + w2 | w2 + w3, z2 ~ w1 + w3),
list(y ~ w1 + w3 + w4, g ~ w2 + w3 + w5),
groups = groups, groups2 = groups2, data = data)
summary(model)
# Compare estimates and true values of the parameters
# regression coefficients of the first ordered equation
gamma1_est <- coef(model, type = "coef", eq = 1)
gamma1_het_est <- coef(model, type = "coef_var", eq = 1)
cbind(true = gamma1, estimate = gamma1_est)
cbind(true = gamma1_het, estimate = gamma1_het_est)
# regression coefficients of the second ordered equation
gamma2_est <- coef(model, type = "coef", eq = 2)
cbind(true = gamma2, estimate = gamma2_est)
# cuts
cuts1_est <- coef(model, type = "cuts", eq = 1)
cuts2_est <- coef(model, type = "cuts", eq = 2)
cbind(true = cuts1, estimate = cuts1_est)
cbind(true = cuts2, estimate = cuts2_est)
# regression coefficients of the first continuous equation
beta0_y_est <- coef(model, type = "coef2", eq2 = 1, regime = 0)
beta1_y_est <- coef(model, type = "coef2", eq2 = 1, regime = 1)
cbind(true = beta0_y, estimate = beta0_y_est)
cbind(true = beta1_y, estimate = beta1_y_est)
# regression coefficients of the second continuous equation
beta0_g_est <- coef(model, type = "coef2", eq2 = 2, regime = 0)
beta1_g_est <- coef(model, type = "coef2", eq2 = 2, regime = 1)
beta2_g_est <- coef(model, type = "coef2", eq2 = 2, regime = 2)
cbind(true = beta0_g, estimate = beta0_g_est)
cbind(true = beta1_g, estimate = beta1_g_est)
cbind(true = beta2_g, estimate = beta2_g_est)
# variances of the first continuous equation
var_y0_est <- coef(model, type = "var", eq2 = 1, regime = 0)
var_y1_est <- coef(model, type = "var", eq2 = 1, regime = 1)
cbind(true = c(var_y0, var_y1), estimate = c(var_y0_est, var_y1_est))
# variances of the second continuous equation
var_g0_est <- coef(model, type = "var", eq2 = 2, regime = 0)
var_g1_est <- coef(model, type = "var", eq2 = 2, regime = 1)
var_g2_est <- coef(model, type = "var", eq2 = 2, regime = 2)
cbind(true = c(var_g0, var_g1, var_g2),
estimate = c(var_g0_est, var_g1_est, var_g2_est))
# correlation between the ordinal equations
sigma12_est <- coef(model, type = "cov1", eq = c(1, 2))
cbind(true = c(sigma[1, 2]), estimate = sigma12_est)
# covariances between the continuous and ordinal equations
cbind(true = sigma[1:2, 3], estimate = model$cov2[[1]][1, ])
cbind(true = sigma[1:2, 4], estimate = model$cov2[[1]][2, ])
cbind(true = sigma[1:2, 5], estimate = model$cov2[[2]][1, ])
cbind(true = sigma[1:2, 6], estimate = model$cov2[[2]][2, ])
cbind(true = sigma[1:2, 7], estimate = model$cov2[[2]][3, ])
# covariances between the continuous equations
sigma2_est <- coef(model, type = "cov2")[[1]]
cbind(true = c(sigma[4, 7], sigma[3, 5], sigma[4, 6]),
estimate = sigma2_est)
# ---
# Step 3
# Estimation of the expectations and marginal effects
# ---
# New data
newdata <- data.frame(z1 = 1, z2 = 1, y = 1, g = 1,
w1 = 0.1, w2 = 0.2, w3 = 0.3, w4 = 0.4, w5 = 0.5)
# Predict unconditional expectation of the dependent variable
# regime 0 for 'y' and regime 1 for 'g' i.e. E(y0 | w), E(g1 | w)
predict(model, group = c(-1, -1), group2 = c(0, 1), newdata = newdata)
# Predict conditional expectations of the dependent variable
# E(y0 | z1 = 2, z2 = 1, w), E(g1 | z1 = 2, z2 = 1, w)
predict(model, group = c(2, 1), group2 = c(0, 1), newdata = newdata)
# Marginal effect of w3 on
# E(y1 | z1 = 2, z2 = 1, w) and E(g1 | z1 = 2, z2 = 1, w)
predict(model, group = c(2, 1), group2 = c(0, 1),
newdata = newdata, me = "w3")
# ---
# Step 4
# Two-step estimation procedure
# ---
# Provide manually selectivity terms
model2 <- msel(list(z1 ~ w1 + w2 | w2 + w3, z2 ~ w1 + w3),
list(y ~ w1 + w3 + w4 +
lambda1 + lambda2 + I(lambda1 * lambda2),
g ~ w2 + w3 + w5 + lambda1 + lambda2),
groups = groups, groups2 = groups2,
data = data, estimator = "2step")
summary(model2)
# }
# \donttest{
# ---------------------------------------
# Simulated data example 8
# Multinomial endogenous switching and
# selection model (probit)
# ---------------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 10000
# Random errors
# variances and correlations
sd.z2 <- sqrt(0.9)
cor.z <- 0.3
sd.y0 <- sqrt(2)
cor.z1y0 <- 0.4
cor.z2y0 <- 0.7
sd.y1 <- sqrt(1.8)
cor.z1y1 <- 0.3
cor.z2y1 <- 0.6
cor.y <- 0.8
# the covariance matrix
sigma <- matrix(c(
1, cor.z * sd.z2, cor.z1y0 * sd.y0, cor.z1y1 * sd.y1,
cor.z * sd.z2, sd.z2 ^ 2, cor.z2y0 * sd.z2 * sd.y0, cor.z2y1 * sd.z2 * sd.y1,
cor.z1y0 * sd.y0, cor.z2y0 * sd.z2 * sd.y0, sd.y0 ^ 2, cor.y * sd.y0 * sd.y1,
cor.z1y1 * sd.y1, cor.z2y1 * sd.z2 * sd.y1, cor.y * sd.y0 * sd.y1, sd.y1 ^ 2),
ncol = 4, byrow = TRUE)
colnames(sigma) <- c("z1", "z2", "y0", "y1")
rownames(sigma) <- colnames(sigma)
# Simulate the random errors
errors <- rmnorm(n, c(0, 0, 0, 0), sigma)
u <- errors[, 1:2]
eps <- errors[, 3:4]
# Regressors (covariates)
x1 <- runif(n, -1, 1)
x2 <- runif(n, -1, 1)
x3 <- (x2 + runif(n, -1, 1)) / 2
W <- cbind(1, x1, x2)
X <- cbind(1, x1, x3)
# Coefficients
gamma0 <- c(0.1, 1, 1)
gamma1 <- c(0.2, -1.2, 0.8)
beta0 <- c(1, -3, 4)
beta1 <- c(1, 4, -3)
# Linear predictors (indexes)
z1.li <- W %*% gamma0
z2.li <- W %*% gamma1
y0.li <- X %*% beta0
y1.li <- X %*% beta1
# Latent variables
z1.star <- z1.li + u[, 1]
z2.star <- z2.li + u[, 2]
y0.star <- y0.li + eps[, 1]
y1.star <- y1.li + eps[, 2]
# Obvservable variable as a dummy
z1 <- (z1.star > z2.star) & (z1.star > 0)
z2 <- (z2.star > z1.star) & (z2.star > 0)
z3 <- (z1 != 1) & (z2 != 1)
# Observable multinomial variable
z <- rep(0, n)
z[z1] <- 0
z[z2] <- 1
z[z3] <- 2
table(z)
# Make unobservable values of the continuous outcome
y <- rep(NA, n)
y[z == 1] <- y0.star[z == 1]
y[z == 2] <- y1.star[z == 2]
# Data
data <- data.frame(z = z, y = y, x1 = x1, x2 = x2, x3 = x3)
# ---
# Step 2
# Estimation of the parameters
# ---
# Define the groups
groups3 <- c(0, 1, 2)
groups2 <- matrix(c(-1, 0, 1), ncol = 1)
# Two-step method
model <- msel(formula3 = z ~ x1 + x2, formula2 = y ~ x1 + x3,
groups3 = groups3, groups2 = groups2,
data = data, estimator = "2step",
type3 = "probit")
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of the continuous equation
beta0_est <- coef(model, type = "coef2", eq2 = 1, regime = 0)
beta1_est <- coef(model, type = "coef2", eq2 = 1, regime = 1)
cbind(true = beta0, est = beta0_est[1:length(beta0)])
cbind(true = beta1, est = beta1_est[1:length(beta1)])
# regression coefficients of the multinomial equations
gamma0_est <- coef(model, type = "coef3", eq3 = 0)
gamma1_est <- coef(model, type = "coef3", eq3 = 1)
cbind(true = gamma0, est = gamma0_est)
cbind(true = gamma1, est = gamma1_est)
# compare the covariances between
# z1 and z2
cbind(true = cor.z * sd.z2,
est = coef(model, type = "cov3", eq3 = c(0, 1)))
# z1 and y0
cbind(true = cor.z1y0 * sd.y0,
est = beta0_est["lambda1_mn"])
# z2 and y0
cbind(true = cor.z2y0 * sd.y0,
est = beta0_est["lambda2_mn"])
# z1 and y1
cbind(true = cor.z1y1 * sd.y1,
est = beta1_est["lambda1_mn"])
# z2 and y1
cbind(true = cor.z2y1 * sd.y1,
est = beta1_est["lambda2_mn"])
# ---
# Step 3
# Predictions and marginal effects
# ---
# Unconditional expectation E(y1 | w) for every observation in a sample
predict(model, type = "val", group2 = 1, group3 = -1)
# Marginal effect of x1 on conditional expectation E(y0 | z = 1, w)
# for every observation in a sample
predict(model, type = "val", group2 = 0, group3 = 1, me = "x1")
# Calculate predictions and marginal effects
# for manually provided observations
# using aforementioned models.
newdata <- data.frame(z = c(1, 1),
y = c(1, 1),
x1 = c(0.5, 0.2),
x2 = c(-0.3, 0.8),
x3 = c(0.6, -0.7))
# Unconditional expectation E(y0 | w)
predict(model, type = "val", group2 = 0, group3 = -1, newdata = newdata)
# Conditional expectation E(y1 | z=2, w)
predict(model, type = "val", group2 = 1, group3 = 2, newdata = newdata)
# Marginal effect of x2 on E(y0 | z = 1, w)
predict(model, type = "val", group2 = 0, group3 = 1,
me = "x2", newdata = newdata)
# ---
# Step 4
# Multinomial logit selection
# ---
# Two-step method
model2 <- msel(formula3 = z ~ x1 + x2, formula2 = y ~ x1 + x3,
groups3 = groups3, groups2 = groups2,
data = data, estimator = "2step",
type3 = "logit")
summary(model2)
# Compare the estimates
beta0_est2 <- coef(model2, type = "coef2", eq2 = 1, regime = 0)[]
beta1_est2 <- coef(model2, type = "coef2", eq2 = 1, regime = 1)
# beta0 coefficients
cbind(true = beta0, probit = beta0_est[1:3], logit = beta0_est2[1:3])
# beta1 coefficients
cbind(true = beta1, probit = beta1_est[1:3], logit = beta1_est2[1:3])
# }
# \donttest{
# ---------------------------------------
# Simulated data example 9
# Multinomial endogenous switching and
# selection model (logit)
# ---------------------------------------
# ---
# Step 1
# Simulation of the data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 10000
# Random errors
u <- matrix(-log(-log(runif(n * 3))), nrow = n, ncol = 3)
tau0 <- matrix(c(0.6, -0.4, 0.3), ncol = 1)
tau1 <- matrix(c(-0.3, 0.5, 0.2), ncol = 1)
eps0 <- (u - 0.57721656649) %*% tau0 + rnorm(n)
eps1 <- (u - 0.57721656649) %*% tau1 + rnorm(n)
# Regressors (covariates)
x1 <- runif(n = n, min = -1, max = 1)
x2 <- runif(n = n, min = -1, max = 1)
x3 <- runif(n = n, min = -1, max = 1)
# Coefficients
gamma.0 <- c(0.2, -2, 2)
gamma.1 <- c(0.1, 2, -2)
beta.0 <- c(2, 2, 2)
beta.1 <- c(1, -2, 2)
# Linear predictors (indexes)
z0.li <- gamma.0[1] + gamma.0[2] * x1 + gamma.0[3] * x2
z1.li <- gamma.1[1] + gamma.1[2] * x1 + gamma.1[3] * x2
# Latent variables
z0.star <- z0.li + u[, 1]
z1.star <- z1.li + u[, 2]
z2.star <- u[, 3]
y0.star <- beta.0[1] + beta.0[2] * x1 + beta.0[3] * x3 + eps0
y1.star <- beta.1[1] + beta.1[2] * x1 + beta.1[3] * x3 + eps1
# Observable multinomial variable
z <- rep(2, n)
z[(z0.star > z1.star) & (z0.star > z2.star)] <- 0
z[(z1.star > z0.star) & (z1.star > z2.star)] <- 1
table(z)
# Unobservable values of the continuous outcome
y <- rep(NA, n)
y[z == 0] <- y0.star[z == 0]
y[z == 1] <- y1.star[z == 1]
# Data
data <- data.frame(x1 = x1, x2 = x2, x3 = x3, z = z, y = y)
# ---
# Step 2
# Estimation of the parameters
# ---
# Define the groups
groups3 <- c(0, 1, 2)
groups2 <- c(0, 1, -1)
# Two-step estimator of Dubin-McFadden
model <- msel(formula3 = z ~ x1 + x2, formula2 = y ~ x1 + x3,
groups3 = groups3, groups2 = groups2,
data = data, estimator = "2step",
type3 = "logit")
summary(model)
# Least squaes estimates (benchmark)
lm0 <- lm(y ~ x1 + x3, data = data[data$z == 0, ])
lm1 <- lm(y ~ x1 + x3, data = data[data$z == 1, ])
# Compare the estimates of beta0
cbind(true = beta.0,
DMF = coef(model, type = "coef2", eq2 = 1, regime = 0),
LS = coef(lm0))
# Compare the estimates of beta1
cbind(true = beta.1,
DMF = coef(model, type = "coef2", eq2 = 1, regime = 1),
LS = coef(lm1))
# }
Run the code above in your browser using DataLab