library(SemiParBIVProbit)
############
## EXAMPLE 1
############
## Generate data
## Correlation between the two equations 0.5 - Sample size 400
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rmvnorm(n, rep(0,2), Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*x1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1, y2, x1, x2, x3)
#
#
## CLASSIC BIVARIATE PROBIT
out <- SemiParBIVProbit(y1 ~ x1 + x2 + x3,
y2 ~ x1 + x2 + x3,
data = dataSim)
summary(out)
AIC(out)
BIC(out)
## SEMIPARAMETRIC BIVARIATE PROBIT
## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
## k is the basis dimension - default is 10
## m is the order of the penalty for the specific term - default is 2
out <- SemiParBIVProbit(y1 ~ x1 + s(x2, bs = "tp", k = 10, m = 2) + s(x3),
y2 ~ x1 + s(x2) + s(x3),
data = dataSim)
sem.checks(out)
summary(out)
AIC(out)
## estimated smooth function plots - red lines are true curves
x2 <- sort(x2)
f1.x2 <- f1(x2)[order(x2)] - mean(f1(x2))
f2.x2 <- f2(x2)[order(x2)] - mean(f2(x2))
f3.x3 <- rep(0, length(x3))
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq = 1, select = 1, scale = 0); lines(x2, f1.x2, col = "red")
plot(out, eq = 1, select = 2, scale = 0); lines(x3, f3.x3, col = "red")
plot(out, eq = 2, select = 1, scale = 0); lines(x2, f2.x2, col = "red")
plot(out, eq = 2, select = 2, scale = 0); lines(x3, f3.x3, col = "red")
#
## same plots but CIs 'with intercept'
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq = 1, select = 1, seWithMean = TRUE, scale = 0); lines(x2, f1.x2, col = "red")
plot(out, eq = 1, select = 2, seWithMean = TRUE, scale = 0); lines(x3, f3.x3, col = "red")
plot(out, eq = 2, select = 1, seWithMean = TRUE, scale = 0); lines(x2, f2.x2, col = "red")
plot(out, eq = 2, select = 2, seWithMean = TRUE, scale = 0); lines(x3, f3.x3, col = "red")
## p-values suggest to drop x3 from both equations, with a stronger
## evidence for eq. 2. This can be also achieved using shrinkage smoothers
outSS <- SemiParBIVProbit(y1 ~ x1 + s(x2, bs = "cs") + s(x3, bs = "cs"),
y2 ~ x1 + s(x2, bs = "cs") + s(x3, bs = "cs"),
data = dataSim)
par(mfrow = c(2,2), mar = c(4.5,4.5,2,2))
plot(outSS, eq = 1, select = 1, scale = 0)
plot(outSS, eq = 1, select = 2, ylim = c(-0.1,0.1))
plot(outSS, eq = 2, select = 1, scale = 0)
plot(outSS, eq = 2, select = 2, ylim = c(-0.1,0.1))
#
#
############
## EXAMPLE 2
############
## Generate data with one endogenous variable and exclusion restriction
set.seed(0)
n <- 300
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rmvnorm(n, rep(0,2), Sigma)
cov <- rmvnorm(n, rep(0,2), Sigma, method = "svd")
cov <- pnorm(cov)
x1 <- round(cov[,1]); x2 <- cov[,2]
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*y1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1, y2, x1, x2)
#
## Testing the hypothesis of absence of endogeneity...
LM.bpm(y1 ~ x1 + s(x2), y2 ~ y1 + s(x2), dataSim)
# p-value suggests presence of endogeneity, hence fit a bivariate model
## CLASSIC RECURSIVE BIVARIATE PROBIT
out <- SemiParBIVProbit(y1 ~ x1 + x2,
y2 ~ y1 + x2,
data = dataSim)
summary(out)
## SEMIPARAMETRIC RECURSIVE BIVARIATE PROBIT
out <- SemiParBIVProbit(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2),
data = dataSim)
summary(out)
#
## Testing the hypothesis of absence of endogeneity post estimation...
gt.bpm(out)
#
## average treatment effect with CIs
AT(out, eq = 2, nm.bin = "y1")
## try a Clayton copula model...
outC <- SemiParBIVProbit(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2),
data = dataSim, BivD = "C0")
summary(outC)
AT(outC, eq = 2, nm.bin = "y1")
## try a Joe copula model...
outJ <- SemiParBIVProbit(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2),
data = dataSim, BivD = "J0")
summary(outJ)
AT(outJ, eq = 2, nm.bin = "y1")
VuongClarke.bcm(out, outJ)
#
## recursive bivariate probit modelling with unpenalized splines
## can be achieved as follows
outFP <- SemiParBIVProbit(y1 ~ x1 + s(x2, bs = "cr", k = 5),
y2 ~ y1 + s(x2, bs = "cr", k = 6), fp = TRUE,
data = dataSim)
summary(outFP)
#
#
############
## EXAMPLE 3
############
## Generate data with a non-random sample selection mechanism and exclusion restriction
set.seed(0)
n <- 1100
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rmvnorm(n, rep(0,2), Sigma)
SigmaC <- matrix(0.5, 3, 3); diag(SigmaC) <- 1
cov <- rmvnorm(n, rep(0,3), SigmaC, method = "svd")
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + u[, 2] > 0
yo <- y*(ys > 0)
dataSim <- data.frame(y, ys, yo, bi, x1, x2)
## Testing the hypothesis of absence of non-random sample selection...
LM.bpm(ys ~ bi + s(x1) + s(x2), yo ~ bi + s(x1), dataSim, selection = TRUE)
# p-value suggests presence of sample selection, hence fit a bivariate model
#
## SEMIPARAMETRIC SAMPLE SELECTION BIVARIATE PROBIT
## the first equation MUST be the selection equation
out <- SemiParBIVProbit(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1),
data = dataSim, selection = TRUE)
gt.bpm(out)
## compare the two summary outputs
## the second output produces a summary of the results obtained when
## selection bias is not accounted for
summary(out)
summary(out$gam2)
## corrected predicted probability that 'yo' is equal to 1
est.prev(out)
## estimated smooth function plots
## the red line is the true curve
## the blue line is the naive curve not accounting for selection bias
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)]-mean(f21(x1.s))
x11()
plot(out, eq = 2, ylim = c(-1.65,0.95)); lines(x1.s, f21.x1, col="red")
par(new = TRUE)
plot(out$gam2, se = FALSE, col = "blue", ylim = c(-1.65,0.95), ylab = "", rug = FALSE)
#
#
## try a Clayton copula model...
outC <- SemiParBIVProbit(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1),
data = dataSim, selection = TRUE, BivD = "C0")
sem.checks(outC)
summary(outC)
est.prev(outC)
#
#
Run the code above in your browser using DataLab