# \donttest{
# -------------------------------
# CPS data example
# -------------------------------
# Set seed for reproducibility
set.seed(123)
# Upload data
data(cps)
# Prepare ordered variable for education
cps$educ <- NA
cps$educ[cps$basic == 1] <- 0
cps$educ[cps$bachelor == 1] <- 1
cps$educ[cps$master == 1] <- 2
# Labor supply (probit) model
f_work <- work ~ age + I(age ^ 2) + bachelor + master + health +
slwage + nchild
model1 <- mvoprobit(f_work, data = cps)
summary(model1)
# Education choice (ordered probit) model
f_educ <- educ ~ age + I(age ^ 2) + sbachelor + smaster
model2 <- mvoprobit(f_educ, data = cps)
summary(model2)
# Labor supply with endogenous education
# treatment (recursive or hierarchical ordered probit) model
model3 <- mvoprobit(list(f_work, f_educ), data = cps)
summary(model3)
# Sample selection (on employment) Heckman's model
f_lwage <- lwage ~ age + I(age ^ 2) + bachelor + master + health
cps$lwage[cps$work == 0] <- Inf
model4 <- mvoprobit(f_work, f_lwage, data = cps)
summary(model4)
# Endogenous education treatment with non-random sample selection
model5 <- mvoprobit(list(f_work, f_educ), f_lwage, data = cps)
summary(model5)
# Endogenous switching model with non-random sample selection
groups <- cbind(c(1, 1, 1, 0, 0, 0),
c(0, 1, 2, 0, 1, 2))
groups2 <- matrix(c(0, 1, 2, -1, -1, -1), ncol = 1)
f_lwage2 <- lwage ~ age + I(age ^ 2) + health
model6 <- mvoprobit(list(f_work, f_educ), f_lwage2,
groups = groups, groups2 = groups2,
data = cps)
summary(model6)
# }
# -------------------------------
# Simulated data example 1
# Ordered probit model
# -------------------------------
# ---
# Step 1
# Simulation of data
# ---
# Load required package
library("mnorm")
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
# Random errors
u <- rnorm(n = n, mean = 0, sd = 1)
# Coefficients
gamma <- c(-1, 2)
# Linear index
li <- gamma[1] * w1 + gamma[2] * w2
# Latent variable
z_star <- li + u
# Cuts
cuts <- c(-1, 0.5, 2)
# Observable ordered outcome
z <- rep(0, n)
z[(z_star > cuts[1]) & (z_star <= cuts[2])] <- 1
z[(z_star > cuts[2]) & (z_star <= cuts[3])] <- 2
z[z_star > cuts[3]] <- 3
table(z)
# Data
data <- data.frame(w1 = w1, w2 = w2, z = z)
# ---
# Step 2
# Estimation of parameters
# ---
# Estimation
model <- mvoprobit(z ~ w1 + w2,
data = data)
summary(model)
# \donttest{
# Compare estimates and true values of parameters
# regression coefficients
cbind(true = gamma, estimate = model$coef[[1]])
# cuts
cbind(true = cuts, estimate = model$cuts[[1]])
# ---
# Step 3
# Estimation of probabilities and marginal effects
# ---
# Predict probability of dependent variable
# equals to 2 for every observation in a sample.
# P(z = 2)
prob <- predict(model, group = 2, type = "prob")
head(prob)
# Calculate mean marginal effect of w2 on P(z = 1)
mean(predict(model, group = 1, type = "prob", me = "w2"))
# Calculate probabilities and marginal effects
# for manually provided observations.
# new data
newdata <- data.frame(z = c(1, 1),
w1 = c(0.5, 0.2),
w2 = c(-0.3, 0.8))
# probability P(z = 2)
predict(model, group = 2, type = "prob", newdata = newdata)
# linear index
predict(model, type = "li", newdata = newdata)
# marginal effect of w1 on P(z = 2)
predict(model, group = 2, type = "prob", newdata = newdata, me = "w1")
# marginal effect of w1 and w2 on P(z = 3)
predict(model, group = 3, type = "prob",
newdata = newdata, me = c("w1", "w2"))
# marginal effect of w2 on the linear index
predict(model, group = 2, type = "li", newdata = newdata, me = "w2")
# discrete marginal effect i.e. P(z = 2 | w1 = 0.5) - P(z = 2 | w1 = 0.2)
predict(model, group = 2, type = "prob", newdata = newdata,
me = "w2", eps = c(0.2, 0.5))
# adjusted conditional expectation for endogenous switching and
# sample selection models with continuous outcome with random error 'e'
# E(e | z == 2) / cov(e, u)
# where joint distribution of 'e' and 'u' determined by
# Gaussian copula and 'e' is normally distributed
predict(model, group = 2, type = "lambda", newdata = newdata)
# ---
# Step 4
# Ordered logit model
# ---
# Estimate ordered logit model with a unit variance
# that is just a matter of reparametrization i.e.
# do not affect signs and significance of coefficients
# and dot not affect at all marginal effects
logit <- mvoprobit(z ~ w1 + w2,
data = data,
marginal = "logistic")
summary(logit)
# Compare ordered probit and ordered logit models
# using Akaike and Bayesian information criteria
# AIC
c(probit = AIC(model), logit = AIC(logit))
# BIC
c(probit = BIC(model), logit = BIC(logit))
# Calculation of probabilities and marginal effects is identical
# to the previous example
# probability P(z = 1)
predict(logit, group = 1, type = "prob", newdata = newdata)
# marginal effect of w2 on P(z = 1)
predict(logit, group = 1, type = "prob", newdata = newdata, me = "w2")
# E(e | z == 1) / cov(e, u)
predict(logit, group = 1, type = "lambda", newdata = newdata)
# ---
# Step 5
# Semiparametric model with Gallant and Nychka distribution
# ---
pgn <- mvoprobit(z ~ w1 + w2,
data = data,
marginal = list("PGN" = 3))
summary(pgn)
# Calculation of probabilities and marginal effects is identical
# to the previous example
# probability P(z = 3)
predict(pgn, group = 3, type = "prob", newdata = newdata)
# marginal effect of w2 on P(z = 3)
predict(pgn, group = 3, type = "prob", newdata = newdata, me = "w2")
# E(e | z == 3) / cov(e, u)
predict(pgn, group = 3, type = "lambda", newdata = newdata)
# Test normality assumption via likelihood ratio test
lrtest(model, pgn)
# }
# \donttest{
# -------------------------------
# Simulated data example 2
# Heteroscedastic ordered
# probit model
# -------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
# Random errors
u <- rnorm(n, mean = 0, sd = 1)
# Coefficients of mean equation
gamma <- c(-1, 2)
# Coefficients of variance equation
gamma_het <- c(0.5, -1)
# Linear index of mean equation
li <- gamma[1] * w1 + gamma[2] * w2
# Linear index of variance equation
li_het <- gamma_het[1] * w2 + gamma_het[2] * w3
# Heteroscedastic stdandard deviation
# i.e. value of variance equation
sd_het <- exp(li_het)
# Latent variable
z_star <- li + u * sd_het
# Cuts
cuts <- c(-1, 0.5, 2)
# Observable ordered outcome
z <- rep(0, n)
z[(z_star > cuts[1]) & (z_star <= cuts[2])] <- 1
z[(z_star > cuts[2]) & (z_star <= cuts[3])] <- 2
z[z_star > cuts[3]] <- 3
table(z)
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3, z = z)
# ---
# Step 2
# Estimation of parameters
# ---
# Estimation
model <- mvoprobit(z ~ w1 + w2 | w2 + w3,
data = data)
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of mean equation
cbind(true = gamma, estimate = model$coef[[1]])
# regression coefficients of variance equation
cbind(true = gamma_het, estimate = model$coef_var[[1]])
# cuts
cbind(true = cuts, estimate = model$cuts[[1]])
# Test for homoscedasticity
model0 <- mvoprobit(z ~ w1 + w2, data = data)
lrtest(model, model0)
# ---
# Step 3
# Estimation of probabilities and marginal effects
# ---
# Predict probability of dependent variable
# equals to 2 for every observation in a sample.
# P(z = 2)
prob <- predict(model, group = 2, type = "prob")
head(prob)
# Calculate mean marginal effect of w2 on P(z = 1)
mean(predict(model, group = 1, type = "prob", me = "w2"))
# Calculate corresponding probability, linear
# index and heteroscedastic standard deviations for
# manually provided observations.
# new data
newdata <- data.frame(z = c(1, 1),
w1 = c(0.5, 0.2),
w2 = c(-0.3, 0.8),
w3 = c(0.6, 0.1))
# probability P(z = 2)
predict(model, group = 2, type = "prob", newdata = newdata)
# linear index
predict(model, type = "li", newdata = newdata)
# standard deviation
predict(model, type = "sd", newdata = newdata)
# marginal effect of w3 on P(Z = 3)
predict(model, group = 3, type = "prob", newdata = newdata, me = "w3")
# marginal effect of w2 on the standard error
predict(model, group = 2, type = "sd", newdata = newdata, me = "w2")
# discrete marginal effect i.e. P(Z = 2 | w1 = 0.5) - P(Z = 2 | w1 = 0.2)
predict(model, group = 2, type = "prob", newdata = newdata,
me = "w2", eps = c(0.2, 0.5))
# }
# \donttest{
# -------------------------------
# Simulated data example 3
# Bivariate ordered probit model
# with heteroscedastic second
# equation
# -------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
w4 <- runif(n = n, min = -1, max = 1)
# Covariance matrix of random errors
rho <- 0.5
sigma <- matrix(c(1, rho,
rho, 1),
nrow = 2)
# Random errors
u <- mnorm::rmnorm(n = n, mean = c(0, 0), sigma = sigma)
# Coefficients
gamma1 <- c(-1, 2)
gamma2 <- c(1, 1.5)
# Coefficients of variance equation
gamma2_het <- c(0.5, -1)
# Linear indexes
li1 <- gamma1[1] * w1 + gamma1[2] * w2
li2 <- gamma2[1] * w2 + gamma2[2] * w3
# Linear index of variance equation
li2_het <- gamma2_het[1] * w2 + gamma2_het[2] * w4
# Heteroscedastic stdandard deviation
# i.e. value of variance equation
sd2_het <- exp(li2_het)
# Latent variables
z1_star <- li1 + u[, 1]
z2_star <- li2 + u[, 2] * sd2_het
# Cuts
cuts1 <- c(-1, 0.5, 2)
cuts2 <- c(-2, 0)
# Observable ordered outcome
# first outcome
z1 <- rep(0, n)
z1[(z1_star > cuts1[1]) & (z1_star <= cuts1[2])] <- 1
z1[(z1_star > cuts1[2]) & (z1_star <= cuts1[3])] <- 2
z1[z1_star > cuts1[3]] <- 3
# second outcome
z2 <- rep(0, n)
z2[(z2_star > cuts2[1]) & (z2_star <= cuts2[2])] <- 1
z2[z2_star > cuts2[2]] <- 2
# distribution
table(z1, z2)
# Data
data <- data.frame(w1 = w1, w2 = w2,
w3 = w3, w4 = w4,
z1 = z1, z2 = z2)
# ---
# Step 2
# Estimation of parameters
# ---
# Estimation
model <- mvoprobit(list(z1 ~ w1 + w2,
z2 ~ w2 + w3 | w2 + w4),
data = data)
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of the first equation
cbind(true = gamma1, estimate = model$coef[[1]])
# regression coefficients of the second equation
cbind(true = gamma2, estimate = model$coef[[2]])
# cuts of the first equation
cbind(true = cuts1, estimate = model$cuts[[1]])
# cuts of the second equation
cbind(true = cuts2, estimate = model$cuts[[2]])
# correlation coefficients
cbind(true = rho, estimate = model$sigma[1, 2])
# regression coefficients of variance equation
cbind(true = gamma2_het, estimate = model$coef_var[[2]])
# ---
# Step 3
# Estimation of probabilities and linear indexes
# ---
# Predict probability P(z1 = 2, z2 = 0)
prob <- predict(model, group = c(2, 0), type = "prob")
head(prob)
# Calculate mean marginal effect of w2 on:
# P(z1 = 1)
mean(predict(model, group = c(1, -1), type = "prob", me = "w2"))
# P(z1 = 1, z2 = 0)
mean(predict(model, group = c(1, 0), type = "prob", me = "w2"))
# Calculate corresponding probability and linear
# index for manually provided observations.
# new data
newdata <- data.frame(z1 = c(1, 1),
z2 = c(1, 1),
w1 = c(0.5, 0.2),
w2 = c(-0.3, 0.8),
w3 = c(0.6, 0.1),
w4 = c(0.3, -0.5))
# probability P(z1 = 2, z2 = 0)
predict(model, group = c(2, 0), type = "prob", newdata = newdata)
# linear index
predict(model, type = "li", newdata = newdata)
# marginal probability P(z2 = 1)
predict(model, group = c(-1, 1), type = "prob", newdata = newdata)
# marginal probability P(z1 = 3)
predict(model, group = c(3, -1), type = "prob", newdata = newdata)
# conditional probability P(z1 = 2 | z2 = 0)
predict(model, group = c(2, 0), given_ind = 2,
type = "prob", newdata = newdata)
# conditional probability P(z2 = 1 | z1 = 3)
predict(model, group = c(3, 1), given_ind = 1,
type = "prob", newdata = newdata)
# marginal effect of w4 on P(Z2 = 2)
predict(model, group = c(-1, 2),
type = "prob", newdata = newdata, me = "w4")
# marginal effect of w4 on P(z1 = 3, Z2 = 2)
predict(model, group = c(3, 2),
type = "prob", newdata = newdata, me = "w4")
# marginal effect of w4 on P(z1 = 3 | z2 = 2)
predict(model, group = c(3, 2), given_ind = 2,
type = "prob", newdata = newdata, me = "w4")
# ---
# Step 4
# Replication under non-random sample selection
# ---
# Suppose that z2 is unobservable when z1 = 1 or z1 = 3
z2[(z1 == 1) | (z1 == 3)] <- -1
data$z2 <- z2
# Replicate estimation procedure
model <- mvoprobit(list(z1 ~ w1 + w2,
z2 ~ w2 + w3 | w2 + w4),
cov_type = "GOP",
data = data)
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of the first equation
cbind(true = gamma1, estimate = model$coef[[1]])
# regression coefficients of the second equation
cbind(true = gamma2, estimate = model$coef[[2]])
# cuts of the first equation
cbind(true = cuts1, estimate = model$cuts[[1]])
# cuts of the second equation
cbind(true = cuts2, estimate = model$cuts[[2]])
# correlation coefficients
cbind(true = rho, estimate = model$sigma[1, 2])
# regression coefficients of variance equation
cbind(true = gamma2_het, estimate = model$coef_var[[2]])
# ---
# Step 5
# Semiparametric model with marginal logistic and PGN distributions
# ---
# Estimate the model
model <- mvoprobit(list(z1 ~ w1 + w2,
z2 ~ w2 + w3 | w2 + w4),
data = data,
marginal = list(PGN = 3, logistic = NULL))
summary(model)
# }
# \donttest{
# -------------------------------
# Simulated data example 4
# Heckman model with
# ordered selection mechanism
# -------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
# Random errors
rho <- 0.5
var.y <- 0.3
sd.y <- sqrt(var.y)
sigma <- matrix(c(1, rho * sd.y,
rho * sd.y, var.y),
nrow = 2)
errors <- mnorm::rmnorm(n = n, mean = c(0, 0), sigma = sigma)
u <- errors[, 1]
eps <- errors[, 2]
# Coefficients
gamma <- c(-1, 2)
beta <- c(1, -1, 1)
# Linear index
li <- gamma[1] * w1 + gamma[2] * w2
li.y <- beta[1] + beta[2] * w1 + beta[3] * w3
# Latent variable
z_star <- li + u
y_star <- li.y + eps
# Cuts
cuts <- c(-1, 0.5, 2)
# Observable ordered outcome
z <- rep(0, n)
z[(z_star > cuts[1]) & (z_star <= cuts[2])] <- 1
z[(z_star > cuts[2]) & (z_star <= cuts[3])] <- 2
z[z_star > cuts[3]] <- 3
table(z)
# Observable continuous outcome such
# that outcome 'y' is observable only
# when 'z > 1' and unobservable otherwise
# i.e. when 'z <= 1' we code 'y' as 'Inf'
y <- y_star
y[z <= 1] <- Inf
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3,
z = z, y = y)
# ---
# Step 2
# Estimation of parameters
# ---
# Estimation
model <- mvoprobit(z ~ w1 + w2,
y ~ w1 + w3,
data = data)
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of ordered equation
cbind(true = gamma, estimate = model$coef[[1]])
# cuts
cbind(true = cuts, estimate = model$cuts[[1]])
# regression coefficients of continuous equation
cbind(true = beta, estimate = as.numeric(model$coef2[[1]]))
# variance
cbind(true = var.y, estimate = as.numeric(model$var2[[1]]))
# covariance
cbind(true = rho * sd.y, estimate = as.numeric(model$cov2[[1]]))
# ---
# Step 3
# Estimation of expectations and marginal effects
# ---
# New data
newdata <- data.frame(z = 1,
y = 1,
w1 = 0.1,
w2 = 0.2,
w3 = 0.3)
# Predict unconditional expectation of the dependent variable
predict(model, group2 = 0, newdata = newdata)
# Predict conditional expectations of the dependent variable
# E(y | z == 2)
predict(model, group = 2, group2 = 0, newdata = newdata)
# E(y | z == 0)
predict(model, group = 0, group2 = 0, newdata = newdata)
# ---
# Step 4
# Classical Heckman's two-step estimation procedure
# ---
# Predict adjusted conditional expectations
lambda2 <- predict(model, group = 2, type = "lambda")
lambda3 <- predict(model, group = 3, type = "lambda")
# Construct variable responsible for adjusted
# conditional expectation in linear regression equation
data$lambda <- NA
data$lambda[model$data$z == 2] <- lambda2[model$data$z == 2]
data$lambda[model$data$z == 3] <- lambda3[model$data$z == 3]
# Alternatively simply get this variable from the estimation output
# of a selection part of the model
model_probit <- mvoprobit(z ~ w1 + w2, data = data)
data$lambda <- model_probit$lambda
# Estimate model via classical least squares
model_lm <- lm(y ~ w1 + w3, data = data[!is.infinite(data$y), ])
summary(model_lm)
# Estimate model via two-step procedure
model_ts <- lm(y ~ w1 + w3 + lambda, data = data[!is.infinite(data$y), ])
summary(model_ts)
# Automatic estimation of two-step model with robust standard errors
model_ts <- mvoprobit(z ~ w1 + w2,
y ~ w1 + w3,
data = data,
estimator = "2step")
summary(model_ts)
# Check estimates accuracy
tbl <- cbind(true = beta,
ls = coef(model_lm),
ml = model$coef2[[1]][1, ],
twostep = model_ts$coef2[[1]][1, ])
print(tbl)
# ---
# Step 5
# Semiparametric estimation procedure
# ---
# Estimate the model using Lee's method
# assuming logistic distribution of
# random errors of the selection equation
model_Lee <- mvoprobit(z ~ w1 + w2,
y ~ w1 + w3,
data = data,
marginal = list(logistic = NULL),
estimator = "2step")
summary(model_Lee)
# One step estimation is also available as well
# as more complex marginal distributions.
# Consider random errors in selection equation
# following PGN distribution with three parameters.
model_sp <- mvoprobit(z ~ w1 + w2,
y ~ w1 + w3,
data = data,
marginal = list(PGN = 3))
summary(model_sp)
# To additionally relax normality assumption of
# random error of continuous equation it is possible
# to use Newey's two-step procedure.
model_Newey <- mvoprobit(z ~ w1 + w2,
y ~ w1 + w3,
data = data,
marginal = list(PGN = 3),
estimator = "2step",
degrees = 2,
cov_type = "nonparametric")
summary(model_Newey)
# }
# \donttest{
# -------------------------------
# Simulated data example 5
# Endogenous switching model
# with heteroscedastic
# ordered selection mechanism
# -------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
# Random errors
rho_0 <- -0.8
rho_1 <- -0.7
var2_0 <- 0.9
var2_1 <- 1
sd_y_0 <- sqrt(var2_0)
sd_y_1 <- sqrt(var2_1)
cor_y_01 <- 0.7
cov2_01 <- sd_y_0 * sd_y_1 * cor_y_01
cov2_z_0 <- rho_0 * sd_y_0
cov2_z_1 <- rho_1 * sd_y_1
sigma <- matrix(c(1, cov2_z_0, cov2_z_1,
cov2_z_0, var2_0, cov2_01,
cov2_z_1, cov2_01, var2_1),
nrow = 3)
errors <- mnorm::rmnorm(n = n, mean = c(0, 0, 0), sigma = sigma)
u <- errors[, 1]
eps_0 <- errors[, 2]
eps_1 <- errors[, 3]
# Coefficients
gamma <- c(-1, 2)
gamma_het <- c(0.5, -1)
beta_0 <- c(1, -1, 1)
beta_1 <- c(2, -1.5, 0.5)
# Linear index of ordered equation
# mean
li <- gamma[1] * w1 + gamma[2] * w2
# variance
li_het <- gamma_het[1] * w2 + gamma_het[2] * w3
# Linear index of continuous equation
# regime 0
li_y_0 <- beta_0[1] + beta_0[2] * w1 + beta_0[3] * w3
# regime 1
li_y_1 <- beta_1[1] + beta_1[2] * w1 + beta_1[3] * w3
# Latent variable
z_star <- li + u * exp(li_het)
y_0_star <- li_y_0 + eps_0
y_1_star <- li_y_1 + eps_1
# Cuts
cuts <- c(-1, 0.5, 2)
# Observable ordered outcome
z <- rep(0, n)
z[(z_star > cuts[1]) & (z_star <= cuts[2])] <- 1
z[(z_star > cuts[2]) & (z_star <= cuts[3])] <- 2
z[z_star > cuts[3]] <- 3
table(z)
# Observable continuous outcome such
# that outcome 'y' is
# in regime 1 when 'z == 1',
# in regime 0 when 'z <= 1',
# unobservable when 'z == 0'
y <- rep(NA, n)
y[z == 0] <- Inf
y[z == 1] <- y_0_star[z == 1]
y[z > 1] <- y_1_star[z > 1]
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3,
z = z, y = y)
# ---
# Step 2
# Estimation of parameters
# ---
# Assign groups
groups <- matrix(0:3, ncol = 1)
groups2 <- matrix(c(-1, 0, 1, 1), ncol = 1)
# Estimation
model <- mvoprobit(list(z ~ w1 + w2 | w2 + w3),
list(y ~ w1 + w3),
groups = groups, groups2 = groups2,
data = data)
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of ordered equation
cbind(true = gamma, estimate = model$coef[[1]])
cbind(true = gamma_het, estimate = model$coef_var[[1]])
# cuts
cbind(true = cuts, estimate = model$cuts[[1]])
# regression coefficients of continuous equation
cbind(true = beta_0, estimate = model$coef2[[1]][1, ])
cbind(true = beta_1, estimate = model$coef2[[1]][2, ])
# variances
cbind(true = c(var2_0, var2_1), estimate = model$var2[[1]])
# covariances
cbind(true = c(cov2_z_0, cov2_z_1), estimate = model$cov2[[1]])
# ---
# Step 3
# Estimation of expectations and marginal effects
# ---
# New data
newdata <- data.frame(z = 1,
y = 1,
w1 = 0.1,
w2 = 0.2,
w3 = 0.3)
# Predict unconditional expectation of the dependent variable
# regime 0
predict(model, group2 = 0, newdata = newdata)
# regime 1
predict(model, group2 = 1, newdata = newdata)
# Predict conditional expectations of the dependent variable
# given condition 'z == 0' for regime 1 i.e. E(y1 | z = 0)
predict(model, group = 0, group2 = 1, newdata = newdata)
# }
# \donttest{
# -------------------------------
# Simulated data example 6
# Endogenous switching model with
# multivariate heteroscedastic
# ordered selection mechanism
# -------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
w4 <- runif(n = n, min = -1, max = 1)
# Random errors
rho_z1_z2 <- 0.5
rho_y0_z1 <- 0.6
rho_y0_z2 <- 0.7
rho_y1_z1 <- 0.65
rho_y1_z2 <- 0.75
var20 <- 0.9
var21 <- 1
sd_y0 <- sqrt(var20)
sd_y1 <- sqrt(var21)
cor_y01 <- 0.7
cov201 <- sd_y0 * sd_y1 * cor_y01
cov20_z1 <- rho_y0_z1 * sd_y0
cov21_z1 <- rho_y1_z1 * sd_y1
cov20_z2 <- rho_y0_z2 * sd_y0
cov21_z2 <- rho_y1_z2 * sd_y1
sigma <- matrix(c(1, rho_z1_z2, cov20_z1, cov21_z1,
rho_z1_z2, 1, cov20_z2, cov21_z2,
cov20_z1, cov20_z2, var20, cov201,
cov21_z1, cov21_z2, cov201, var21),
nrow = 4)
errors <- mnorm::rmnorm(n = n, mean = c(0, 0, 0, 0), sigma = sigma)
u1 <- errors[, 1]
u2 <- errors[, 2]
eps0 <- errors[, 3]
eps1 <- errors[, 4]
# Coefficients
gamma1 <- c(-1, 2)
gamma1_het <- c(0.5, -1)
gamma2 <- c(1, 1)
beta0 <- c(1, -1, 1, -1.2)
beta1 <- c(2, -1.5, 0.5, 1.2)
# Linear index of ordered equation
# mean
li1 <- gamma1[1] * w1 + gamma1[2] * w2
li2 <- gamma2[1] * w1 + gamma2[2] * w3
# variance
li1_het <- gamma1_het[1] * w2 + gamma1_het[2] * w3
# Linear index of continuous equation
# regime 0
li_y0 <- beta0[1] + beta0[2] * w1 + beta0[3] * w3 + beta0[4] * w4
# regime 1
li_y1 <- beta1[1] + beta1[2] * w1 + beta1[3] * w3 + beta1[4] * w4
# Latent variables
z1_star <- li1 + u1 * exp(li1_het)
z2_star <- li2 + u2
y0_star <- li_y0 + eps0
y1_star <- li_y1 + eps1
# Cuts
cuts1 <- c(-1, 1)
cuts2 <- c(0)
# Observable ordered outcome
# first
z1 <- rep(0, n)
z1[(z1_star > cuts1[1]) & (z1_star <= cuts1[2])] <- 1
z1[z1_star > cuts1[2]] <- 2
# second
z2 <- rep(0, n)
z2[z2_star > cuts2[1]] <- 1
table(z1, z2)
# Observable continuous outcome such
# that outcome 'y' is
# in regime 0 when 'z1 == 1',
# in regime 1 when 'z1 == 0' or 'z1 == 2',
# unobservable when 'z2 == 0'
y <- rep(NA, n)
y[z1 == 1] <- y0_star[z1 == 1]
y[z1 != 1] <- y1_star[z1 != 1]
y[z2 == 0] <- Inf
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3, w4 = w4,
z1 = z1, z2 = z2, y = y)
# ---
# Step 2
# Estimation of parameters
# ---
# Assign groups
groups <- matrix(c(0, 0,
0, 1,
1, 0,
1, 1,
2, 0,
2, 1),
byrow = TRUE, ncol = 2)
groups2 <- matrix(c(-1, 1, -1, 0, -1, 1), ncol = 1)
# Estimation
model <- mvoprobit(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
list(y ~ w1 + w3 + w4),
groups = groups, groups2 = groups2,
data = data)
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of the first ordered equation
cbind(true = gamma1, estimate = model$coef[[1]])
cbind(true = gamma1_het, estimate = model$coef_var[[1]])
# regression coefficients of the second ordered equation
cbind(true = gamma2, estimate = model$coef[[2]])
# cuts
cbind(true = cuts1, estimate = model$cuts[[1]])
cbind(true = cuts2, estimate = model$cuts[[2]])
# regression coefficients of continuous equation
cbind(true = beta0, estimate = model$coef2[[1]][1, ])
cbind(true = beta1, estimate = model$coef2[[1]][2, ])
# variances
cbind(true = c(var20, var21), estimate = model$var2[[1]])
# covariances
cbind(true = c(cov20_z1, cov20_z2), estimate = model$cov2[[1]][1, ])
cbind(true = c(cov21_z1, cov21_z2), estimate = model$cov2[[1]][2, ])
# ---
# Step 3
# Estimation of expectations and marginal effects
# ---
# New data
newdata <- data.frame(z1 = 1,
z2 = 1,
y = 1,
w1 = 0.1,
w2 = 0.2,
w3 = 0.3,
w4 = 0.4)
# Predict unconditional expectation of the dependent variable
# regime 0
predict(model, group2 = 0, newdata = newdata)
# regime 1
predict(model, group2 = 1, newdata = newdata)
# Predict conditional expectations of the dependent variable
# E(y1 | z1 = 2, z2 = 1)
predict(model, group = c(2, 1), group2 = 1, newdata = newdata)
# Marginal effect of w3 on E(y1 | z1 = 2, z2 = 1)
predict(model, group = c(2, 1), group2 = 1, newdata = newdata, me = "w3")
# ---
# Step 4
# Two-step estimation procedure
# ---
# For comparison reasons let's estimate the model
# via least squres
# Estimate model via classical least squares for a benchmark
model.ls.0 <- lm(y ~ w1 + w3 + w4,
data = data[!is.infinite(data$y) & (data$z1 == 1), ])
model.ls.1 <- lm(y ~ w1 + w3 + w4,
data = data[!is.infinite(data$y) & (data$z1 != 1), ])
# Apply two-step procedure
model_ts <- mvoprobit(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
y ~ w1 + w3 + w4,
groups = groups, groups2 = groups2,
estimator = "2step",
data = data)
summary(model_ts)
# Use two-step procedure with logistic marginal distributions
# that is multivariate generalization of Lee's method
model_Lee <- mvoprobit(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
y ~ w1 + w3 + w4,
marginal = list(logistic = NULL, logistic = NULL),
groups = groups, groups2 = groups2,
estimator = "2step",
data = data)
# Apply multivariate generalization of Newey's method
model_Newey <- mvoprobit(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
y ~ w1 + w3 + w4,
marginal = list(logistic = NULL, logistic = NULL),
degrees = c(2, 3),
groups = groups, groups2 = groups2,
estimator = "2step",
data = data)
# Compare accuracy of the methods
# beta0
tbl <- cbind(true = beta0,
ls = coef(model.ls.0),
ml = model$coef2[[1]][1, ],
twostep = model_ts$coef2[[1]][1, ],
Lee = model_Lee$coef2[[1]][1, ],
Newey = model_Newey$coef2[[1]][1, ])
print(tbl)
# beta1
tbl <- cbind(true = beta1,
ls = coef(model.ls.1),
ml = model$coef2[[1]][2, ],
twostep = model_ts$coef2[[1]][2, ],
Lee = model_Lee$coef2[[1]][2, ],
Newey = model_Newey$coef2[[1]][2, ])
print(tbl)
# }
# \donttest{
# -------------------------------
# Simulated data example 7
# Endogenous multivariate
# switching model with
# multivariate heteroscedastic
# ordered selection mechanism
# -------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
w3 <- runif(n = n, min = -1, max = 1)
w4 <- runif(n = n, min = -1, max = 1)
w5 <- runif(n = n, min = -1, max = 1)
# Random errors
var20 <- 0.9
var21 <- 1
var_g0 <- 1.1
var_g1 <- 1.2
var_g2 <- 1.3
A <- rWishart(1, 7, diag(7))[, , 1]
B <- diag(sqrt(c(1, 1, var20, var21,
var_g0, var_g1, var_g2)))
sigma <- B %*% cov2cor(A) %*% B
errors <- mnorm::rmnorm(n = n, mean = rep(0, nrow(sigma)), sigma = sigma)
u1 <- errors[, 1]
u2 <- errors[, 2]
eps0_y <- errors[, 3]
eps1_y <- errors[, 4]
eps0_g <- errors[, 5]
eps1_g <- errors[, 6]
eps2_g <- errors[, 7]
# Coefficients
gamma1 <- c(-1, 2)
gamma1_het <- c(0.5, -1)
gamma2 <- c(1, 1)
beta0_y <- c(1, -1, 1, -1.2)
beta1_y <- c(2, -1.5, 0.5, 1.2)
beta0_g <- c(-1, 1, 1, 1)
beta1_g <- c(1, -1, 1, 1)
beta2_g <- c(1, 1, -1, 1)
# Linear index of ordered equation
# mean
li1 <- gamma1[1] * w1 + gamma1[2] * w2
li2 <- gamma2[1] * w1 + gamma2[2] * w3
# variance
li1_het <- gamma1_het[1] * w2 + gamma1_het[2] * w3
# Linear index of the first continuous equation
# regime 0
li_y0 <- beta0_y[1] + beta0_y[2] * w1 + beta0_y[3] * w3 + beta0_y[4] * w4
# regime 1
li_y1 <- beta1_y[1] + beta1_y[2] * w1 + beta1_y[3] * w3 + beta1_y[4] * w4
# Linear index of the second continuous equation
# regime 0
li_g0 <- beta0_g[1] + beta0_g[2] * w2 + beta0_g[3] * w3 + beta0_g[4] * w5
# regime 1
li_g1 <- beta1_g[1] + beta1_g[2] * w2 + beta1_g[3] * w3 + beta1_g[4] * w5
# regime 2
li_g2 <- beta2_g[1] + beta2_g[2] * w2 + beta2_g[3] * w3 + beta2_g[4] * w5
# Latent variables
z1_star <- li1 + u1 * exp(li1_het)
z2_star <- li2 + u2
y0_star <- li_y0 + eps0_y
y1_star <- li_y1 + eps1_y
g0_star <- li_g0 + eps0_g
g1_star <- li_g1 + eps1_g
g2_star <- li_g2 + eps2_g
# Cuts
cuts1 <- c(-1, 1)
cuts2 <- c(0)
# Observable ordered outcome
# first
z1 <- rep(0, n)
z1[(z1_star > cuts1[1]) & (z1_star <= cuts1[2])] <- 1
z1[z1_star > cuts1[2]] <- 2
# second
z2 <- rep(0, n)
z2[z2_star > cuts2[1]] <- 1
table(z1, z2)
# Observable continuous outcome such
# that outcome 'y' is
# in regime 0 when 'z1 == 1',
# in regime 1 when 'z1 == 0' or 'z1 == 2',
# unobservable when 'z2 == 0'
y <- rep(NA, n)
y[z1 == 1] <- y0_star[z1 == 1]
y[z1 != 1] <- y1_star[z1 != 1]
y[z2 == 0] <- Inf
#' # Observable continuous outcome such
# that outcome 'g' is
# in regime 0 when 'z1 == z2',
# in regime 1 when 'z1 > z2',
# in regime 2 when 'z1 < z2',
g <- rep(NA, n)
g[z1 == z2] <- g0_star[z1 == z2]
g[z1 > z2] <- g1_star[z1 > z2]
g[z1 < z2] <- g2_star[z1 < z2]
# Data
data <- data.frame(w1 = w1, w2 = w2, w3 = w3, w4 = w4, w5 = w5,
z1 = z1, z2 = z2, y = y, g = g)
# ---
# Step 2
# Estimation of parameters
# ---
# Assign groups
groups <- matrix(c(0, 0,
0, 1,
1, 0,
1, 1,
2, 0,
2, 1),
byrow = TRUE, ncol = 2)
groups2 <- matrix(NA, nrow = nrow(groups), ncol = 2)
groups2[groups[, 1] == 1, 1] <- 0
groups2[(groups[, 1] == 0) | (groups[, 1] == 2), 1] <- 1
groups2[groups[, 2] == 0, 1] <- -1
groups2[groups[, 1] == groups[, 2], 2] <- 0
groups2[groups[, 1] > groups[, 2], 2] <- 1
groups2[groups[, 1] < groups[, 2], 2] <- 2
cbind(groups, groups2)
# Estimation
model <- mvoprobit(list(z1 ~ w1 + w2 | w2 + w3,
z2 ~ w1 + w3),
list(y ~ w1 + w3 + w4,
g ~ w2 + w3 + w5),
groups = groups, groups2 = groups2,
data = data)
summary(model)
# Compare estimates and true values of parameters
# regression coefficients of the first ordered equation
cbind(true = gamma1, estimate = model$coef[[1]])
cbind(true = gamma1_het, estimate = model$coef_var[[1]])
# regression coefficients of the second ordered equation
cbind(true = gamma2, estimate = model$coef[[2]])
# cuts
cbind(true = cuts1, estimate = model$cuts[[1]])
cbind(true = cuts2, estimate = model$cuts[[2]])
# regression coefficients of the first continuous equation
cbind(true = beta0_y, estimate = model$coef2[[1]][1, ])
cbind(true = beta1_y, estimate = model$coef2[[1]][2, ])
# regression coefficients of the second continuous equation
cbind(true = beta0_g, estimate = model$coef2[[2]][1, ])
cbind(true = beta1_g, estimate = model$coef2[[2]][2, ])
cbind(true = beta2_g, estimate = model$coef2[[2]][3, ])
# variances
cbind(true = c(var20, var21), estimate = model$var2[[1]])
cbind(true = c(var_g0, var_g1, var_g2), estimate = model$var2[[2]])
# correlation between ordered equations
cbind(true = c(sigma[1, 2]), estimate = model$sigma[1, 2])
# covariances between continious and ordered equations
cbind(true = sigma[1:2, 3], estimate = model$cov2[[1]][1, ])
cbind(true = sigma[1:2, 4], estimate = model$cov2[[1]][2, ])
cbind(true = sigma[1:2, 5], estimate = model$cov2[[2]][1, ])
cbind(true = sigma[1:2, 6], estimate = model$cov2[[2]][2, ])
cbind(true = sigma[1:2, 7], estimate = model$cov2[[2]][3, ])
# covariances between continuous equations
cbind(true = c(sigma[4, 7], sigma[3, 5], sigma[4, 6]),
estimate = model$sigma2[[1]])
# ---
# Step 3
# Estimation of expectations and marginal effects
# ---
# New data
newdata <- data.frame(z1 = 1,
z2 = 1,
y = 1,
g = 1,
w1 = 0.1,
w2 = 0.2,
w3 = 0.3,
w4 = 0.4,
w5 = 0.5)
# Predict unconditional expectation of the dependent variable
# regime 0 for 'y' and regime 1 for 'g' i.e. E(y0), E(g1)
predict(model, group2 = c(0, 1), newdata = newdata)
# Predict conditional expectations of the dependent variable
# E(y0 | z1 = 2, z2 = 1), E(g1 | z1 = 2, z2 = 1)
predict(model, group = c(2, 1), group2 = c(0, 1), newdata = newdata)
# Marginal effect of w3 on E(y1 | z1 = 2, z2 = 1) and E(g1 | z1 = 2, z2 = 1)
predict(model, group = c(2, 1), group2 = c(0, 1),
newdata = newdata, me = "w3")
# }
Run the code above in your browser using DataLab