# \donttest{
# -------------------------------
# CPS data example
# -------------------------------
# Set seed for reproducibility
set.seed(123)
# Upload data
data(cps)
# Prepare multinomial variable for education
cps$educ <- NA
cps$educ[cps$basic == 1] <- 1
cps$educ[cps$bachelor == 1] <- 2
cps$educ[cps$master == 1] <- 3
# Multinomial probit model for education
f_educ <- educ ~ age + I(age ^ 2) + sbachelor + smaster
model1 <- mnprobit(f_educ, data = cps)
summary(model1)
# Endogenous education treatment model
f_lwage <- lwage ~ age + I(age ^ 2) + bachelor + master + health
model2 <- mnprobit(f_educ, f_lwage, data = cps, cov_type = "gop")
summary(model2)
# Endogenous education switching model
f_lwage2 <- lwage ~ age + I(age ^ 2) + health
model3 <- mnprobit(f_educ, f_lwage2, data = cps,
regimes = c(0, 1, 2), cov_type = "gop")
summary(model3)
# }
# -------------------------------
# Simulated data example 1
# Multinomial probit model
# -------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Regressors (covariates)
w1 <- runif(n = n, min = -1, max = 1)
w2 <- runif(n = n, min = -1, max = 1)
# Random errors
sigma.1 <- 1
sigma.2 <- 0.9
rho <- 0.7
sigma <- matrix(c(sigma.1 ^ 2, sigma.1 * sigma.2 * rho,
sigma.1 * sigma.2 * rho, sigma.2 ^ 2),
ncol = 2, byrow = TRUE)
u <- rmnorm(n = n, mean = c(0, 0), sigma = sigma)
# Coefficients
gamma.1 <- c(0.1, 2, 3)
gamma.2 <- c(-0.1, 3, -2)
# Linear index
z1.li <- gamma.1[1] + gamma.1[2] * w1 + gamma.1[3] * w2
z2.li <- gamma.2[1] + gamma.2[2] * w1 + gamma.2[3] * w2
# Latent variable
z1.star <- z1.li + u[, 1]
z2.star <- z2.li + u[, 2]
z3.star <- rep(0, n)
# Observable ordered outcome
z <- rep(3, n)
z[(z1.star > z2.star) & (z1.star > z3.star)] <- 1
z[(z2.star > z1.star) & (z2.star > z3.star)] <- 2
table(z)
# Data
data <- data.frame(w1 = w1, w2 = w2, z = z)
# ---
# Step 2
# Estimation of parameters
# ---
# Estimation
model <- mnprobit(z ~ w1 + w2,
data = data)
summary(model)
# \donttest{
# Compare estimates and true values of parameters
# regression coefficients
cbind(true = gamma.1, estimate = model$coef[, 1])
cbind(true = gamma.2, estimate = model$coef[, 2])
# covariances
cbind(true = c(sigma[1, 2], sigma[2, 2]),
estimate = c(model$sigma[1, 2], model$sigma[2, 2]))
# ---
# Step 3
# Estimation of probabilities and marginal effects
# ---
# For every observation in a sample predict
# probability of 2-nd alternative i.e. P(z = 2)
prob <- predict(model, alt = 2, type = "prob")
head(prob)
# probability of each alternative
prob <- predict(model, alt = NULL, type = "prob")
head(prob)
# Calculate mean marginal effect of w2 on P(z = 1)
mean(predict(model, alt = 1, type = "prob", me = "w2"))
# Calculate probabilities and marginal effects
# for manually provided observations.
# new data
newdata <- data.frame(z = c(1, 1),
w1 = c(0.5, 0.2),
w2 = c(-0.3, 0.8))
# probability P(z = 2)
predict(model, alt = 2, type = "prob", newdata = newdata)
# linear index
predict(model, type = "li", newdata = newdata)
# marginal effect of w1 on P(z = 2)
predict(model, alt = 2, type = "prob", newdata = newdata, me = "w1")
# marginal effect of w1 and w2 on P(z = 3)
predict(model, alt = 3, type = "prob",
newdata = newdata, me = c("w1", "w2"))
# marginal effect of w2 on the linear index
predict(model, alt = 2, type = "li", newdata = newdata, me = "w2")
# discrete marginal effect i.e. P(z = 2 | w1 = 0.5) - P(z = 2 | w1 = 0.2)
predict(model, alt = 2, type = "prob", newdata = newdata,
me = "w2", eps = c(0.2, 0.5))
# adjusted conditional expectation for endogenous switching and
# sample selection models with continuous outcome with random error 'e'
# E(e | z = 2) / cov(e, u)
# where joint distribution of 'e' and 'u' determined by
# Gaussian copula and 'e' is normally distributed
predict(model, alt = 2, type = "lambda", newdata = newdata)
# }
# \donttest{
# -------------------------------
# Simulated data example 2
# Multinomial selection model
# -------------------------------
# Load required package
library("mnorm")
# ---
# Step 1
# Simulation of data
# ---
# Set seed for reproducibility
set.seed(123)
# The number of observations
n <- 1000
# Random errors
sd.z2 <- sqrt(0.9)
cor.z <- 0.3
sd.y0 <- sqrt(2)
cor.z1y0 <- 0.4
cor.z2y0 <- 0.7
sd.y1 <- sqrt(1.8)
cor.z1y1 <- 0.3
cor.z2y1 <- 0.6
cor.y <- 0.8
sigma <- matrix(c(
1, cor.z * sd.z2, cor.z1y0 * sd.y0, cor.z1y1 * sd.y1,
cor.z * sd.z2, sd.z2 ^ 2, cor.z2y0 * sd.z2 * sd.y0, cor.z2y1 * sd.z2 * sd.y1,
cor.z1y0 * sd.y0, cor.z2y0 * sd.z2 * sd.y0, sd.y0 ^ 2, cor.y * sd.y0 * sd.y1,
cor.z1y1 * sd.y1, cor.z2y1 * sd.z2 * sd.y1, cor.y * sd.y0 * sd.y1, sd.y1 ^ 2),
ncol = 4, byrow = TRUE)
colnames(sigma) <- c("z1", "z2", "y0", "y1")
rownames(sigma) <- colnames(sigma)
# Simulate random errors
errors <- rmnorm(n, c(0, 0, 0, 0), sigma)
u <- errors[, 1:2]
eps <- errors[, 3:4]
# Regressors (covariates)
x1 <- runif(n, -1, 1)
x2 <- runif(n, -1, 1)
x3 <- (x2 + runif(n, -1, 1)) / 2
W <- cbind(1, x1, x2)
X <- cbind(1, x1, x3)
# Coefficients
gamma <- cbind(c(0.1, 1, 1),
c(0.2, -1.2, 0.8))
beta <- cbind(c(1, -1, 2),
c(1, -2, 1))
# Linear indexes
z1.li <- W %*% gamma[, 1]
z2.li <- W %*% gamma[, 2]
y0.li <- X %*% beta[, 1]
y1.li <- X %*% beta[, 2]
# Latent variables
z1.star <- z1.li + u[, 1]
z2.star <- z2.li + u[, 2]
y0.star <- y0.li + eps[, 1]
y1.star <- y1.li + eps[, 2]
# Obvservable variable as a dummy
z1 <- (z1.star > z2.star) & (z1.star > 0)
z2 <- (z2.star > z1.star) & (z2.star > 0)
z3 <- (z1 != 1) & (z2 != 1)
# Aggregate observable variable
z <- rep(0, n)
z[z1] <- 1
z[z2] <- 2
z[z3] <- 3
table(z)
# Make unobservable values of continuous variable
y <- rep(Inf, n)
y[z == 1] <- y0.star[z == 1]
y[z == 2] <- y1.star[z == 2]
# Data
data <- data.frame(z = z, y = y,
x1 = x1, x2 = x2, x3 = x3)
# ---
# Step 2
# Estimation of parameters
# ---
# Maximum likelihood method
model.ml <- mnprobit(z ~ x1 + x2,
y ~ x1 + x3, regimes = c(0, 1, -1),
data = data, cov_type = "gop")
summary(model.ml)
# Two-step method
model.2step <- mnprobit(z ~ x1 + x2,
y ~ x1 + x3, regimes = c(0, 1, -1),
data = data, estimator = "2step")
summary(model.2step)
# Semiparametric estimator using 2-nd and 3-rd level polynomials
model.snp <- mnprobit(z ~ x1 + x2,
y ~ x1 + x3, regimes = c(0, 1, -1),
data = data, estimator = "2step",
degrees = c(2, 3))
summary(model.snp)
# Simple least squares as a benchmark
model.lm0 <- lm(y ~ x1 + x3, data = data[z == 1, ])
model.lm1 <- lm(y ~ x1 + x3, data = data[z == 2, ])
# Compare coefficients of continuous equations
# y0
cbind(true = beta[, 1],
ml = model.ml$coef2[, 1],
twostep = model.2step$coef2[, 1],
semiparametric = model.snp$coef2[, 1],
ls = coef(model.lm0))
# y1
cbind(true = beta[, 2],
ml = model.ml$coef2[, 2],
twostep = model.2step$coef2[, 2],
semiparametric = model.snp$coef2[, 2],
ls = coef(model.lm1))
# Compare coefficients of multinomial equations
# 1-nd alternative
cbind(true = gamma[, 1],
ml = model.ml$coef[, 1],
twostep = model.2step$coef[, 1])
# 2-nd alternative
cbind(true = gamma[, 2],
ml = model.ml$coef[, 2],
twostep = model.2step$coef[, 2])
# Compare variances of random errors associated with
# z2
cbind(true = sigma[2, 2], ml = model.ml$sigma[2, 2])
# y0
cbind(true = sd.y0 ^ 2, ml = model.ml$var2[1])
# y1
cbind(true = sd.y1 ^ 2, ml = model.ml$var2[2])
# compare covariances between
# z1 and z2
cbind(true = cor.z * sd.z2,
ml = model.ml$sigma[1, 2],
twostep = model.2step$sigma[1, 2])
# z1 and y0
cbind(true = cor.z1y0 * sd.y0,
ml = model.ml$cov2[1, 1],
twostep = model.2step$cov2[1, 1])
# z2 and y0
cbind(true = cor.z2y0 * sd.y0, ml = model.ml$cov2[2, 1])
# z1 and y1
cbind(true = cor.z1y1 * sd.y1, ml = model.ml$cov2[1, 2])
# z2 and y1
cbind(true = cor.z2y1 * sd.y1, ml = model.ml$cov2[2, 2])
# ---
# Step 3
# Predictions and marginal effects
# ---
# Unconditional expectation E(y1) for every observation in a sample
predict(model.ml, type = "val", regime = 1, alt = NULL)
# Marginal effect of x1 on conditional expectation E(y0|z = 2)
# for every observation in a sample
predict(model.ml, type = "val", regime = 0, alt = 2, me = "x1")
# Calculate predictions and marginal effects
# for manually provided observations
# using abovementioned models.
newdata <- data.frame(z = c(1, 1),
y = c(1, 1),
x1 = c(0.5, 0.2),
x2 = c(-0.3, 0.8),
x3 = c(0.6, -0.7))
# Unconditional expectation E(y0)
predict(model.ml, type = "val", regime = 0, alt = NULL, newdata = newdata)
predict(model.2step, type = "val", regime = 0, alt = NULL, newdata = newdata)
predict(model.snp, type = "val", regime = 0, alt = NULL, newdata = newdata)
# Conditional expectation E(y1|z=3)
predict(model.ml, type = "val", regime = 1, alt = 3, newdata = newdata)
predict(model.2step, type = "val", regime = 1, alt = 3, newdata = newdata)
predict(model.snp, type = "val", regime = 1, alt = 3, newdata = newdata)
# Marginal effect of x2 on E(y0|z = 1)
predict(model.ml, type = "val", regime = 0,
alt = 1, me = "x2", newdata = newdata)
predict(model.2step, type = "val", regime = 0,
alt = 1, me = "x2", newdata = newdata)
predict(model.snp, type = "val", regime = 0,
alt = 1, me = "x2", newdata = newdata)
# }
Run the code above in your browser using DataLab