library(SemiParBIVProbit)
############
## EXAMPLE 1
############
## Generate data
## Correlation between the two equations 0.5 - Sample size 400
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*x1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1, y2, x1, x2, x3)
#
#
## CLASSIC BIVARIATE PROBIT
out <- SemiParBIVProbit(list(y1 ~ x1 + x2 + x3,
y2 ~ x1 + x2 + x3),
data = dataSim)
conv.check(out)
summary(out)
AIC(out)
BIC(out)
## SEMIPARAMETRIC BIVARIATE PROBIT
## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
## k is the basis dimension - default is 10
## m is the order of the penalty for the specific term - default is 2
## For COPULA models use BivD argument
out <- SemiParBIVProbit(list(y1 ~ x1 + s(x2, bs = "tp", k = 10, m = 2) + s(x3),
y2 ~ x1 + s(x2) + s(x3)),
data = dataSim)
conv.check(out)
summary(out, cm.plot = TRUE)
AIC(out)
## estimated smooth function plots - red lines are true curves
x2 <- sort(x2)
f1.x2 <- f1(x2)[order(x2)] - mean(f1(x2))
f2.x2 <- f2(x2)[order(x2)] - mean(f2(x2))
f3.x3 <- rep(0, length(x3))
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq = 1, select = 1, seWithMean = TRUE, scale = 0)
lines(x2, f1.x2, col = "red")
plot(out, eq = 1, select = 2, seWithMean = TRUE, scale = 0)
lines(x3, f3.x3, col = "red")
plot(out, eq = 2, select = 1, seWithMean = TRUE, scale = 0)
lines(x2, f2.x2, col = "red")
plot(out, eq = 2, select = 2, seWithMean = TRUE, scale = 0)
lines(x3, f3.x3, col = "red")
## p-values suggest to drop x3 from both equations, with a stronger
## evidence for eq. 2. This can be also achieved using shrinkage smoothers
outSS <- SemiParBIVProbit(list(y1 ~ x1 + s(x2, bs = "ts") + s(x3, bs = "cs"),
y2 ~ x1 + s(x2, bs = "cs") + s(x3, bs = "ts")),
data = dataSim)
conv.check(outSS)
plot(outSS, eq = 1, select = 1, scale = 0, shade = TRUE)
plot(outSS, eq = 1, select = 2, ylim = c(-0.1,0.1))
plot(outSS, eq = 2, select = 1, scale = 0, shade = TRUE)
plot(outSS, eq = 2, select = 2, ylim = c(-0.1,0.1))
## Not run:
#
# ## SEMIPARAMETRIC BIVARIATE PROBIT with association parameter
# ## depending on covariates as well
#
# eq.mu.1 <- y1 ~ x1 + s(x2)
# eq.mu.2 <- y2 ~ x1 + s(x2)
# eq.theta <- ~ x1 + s(x2)
#
# fl <- list(eq.mu.1, eq.mu.2, eq.theta)
#
# outD <- SemiParBIVProbit(fl, data = dataSim)
# conv.check(outD)
# summary(outD)
# outD$theta
#
# plot(outD, eq = 1, seWithMean = TRUE)
# plot(outD, eq = 2, seWithMean = TRUE)
# plot(outD, eq = 3, seWithMean = TRUE)
# graphics.off()
#
# #
# #
#
# ############
# ## EXAMPLE 2
# ############
# ## Generate data with one endogenous variable
# ## and exclusion restriction
#
# set.seed(0)
#
# n <- 400
#
# Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
# u <- rMVN(n, rep(0,2), Sigma)
#
# cov <- rMVN(n, rep(0,2), Sigma)
# cov <- pnorm(cov)
# x1 <- round(cov[,1]); x2 <- cov[,2]
#
# f1 <- function(x) cos(pi*2*x) + sin(pi*x)
# f2 <- function(x) x+exp(-30*(x-0.5)^2)
#
# y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
# y2 <- ifelse(-0.25 - 1.25*y1 + f2(x2) + u[,2] > 0, 1, 0)
#
# dataSim <- data.frame(y1, y2, x1, x2)
#
# #
#
# ## Testing the hypothesis of absence of endogeneity...
#
# LM.bpm(list(y1 ~ x1 + s(x2), y2 ~ y1 + s(x2)), dataSim, Model = "B")
#
# # p-value suggests presence of endogeneity, hence fit a bivariate model
#
#
# ## CLASSIC RECURSIVE BIVARIATE PROBIT
#
# out <- SemiParBIVProbit(list(y1 ~ x1 + x2,
# y2 ~ y1 + x2),
# data = dataSim)
# conv.check(out)
# summary(out)
# AIC(out); BIC(out)
#
# ## SEMIPARAMETRIC RECURSIVE BIVARIATE PROBIT
#
# out <- SemiParBIVProbit(list(y1 ~ x1 + s(x2),
# y2 ~ y1 + s(x2)),
# data = dataSim)
# conv.check(out)
# summary(out)
# AIC(out); BIC(out)
#
# #
#
# ## Testing the hypothesis of absence of endogeneity post estimation...
#
# gt.bpm(out)
#
# #
# ## reatment effect, risk ratio and odds ratio with CIs
#
# mb(y1, y2, Model = "B")
# AT(out, nm.end = "y1", hd.plot = TRUE)
# RR(out, nm.end = "y1")
# OR(out, nm.end = "y1")
# AT(out, nm.end = "y1", type = "univariate")
#
#
# ## try a Clayton copula model...
#
# outC <- SemiParBIVProbit(list(y1 ~ x1 + s(x2),
# y2 ~ y1 + s(x2)),
# data = dataSim, BivD = "C0")
# conv.check(outC)
# summary(outC)
# AT(outC, nm.end = "y1")
#
# ## try a Joe copula model...
#
# outJ <- SemiParBIVProbit(list(y1 ~ x1 + s(x2),
# y2 ~ y1 + s(x2)),
# data = dataSim, BivD = "J0")
# conv.check(outJ)
# summary(outJ, cm.plot = TRUE)
# AT(outJ, "y1")
#
#
# VuongClarke(out, outJ)
#
# #
# ## recursive bivariate probit modelling with unpenalized splines
# ## can be achieved as follows
#
# outFP <- SemiParBIVProbit(list(y1 ~ x1 + s(x2, bs = "cr", k = 5),
# y2 ~ y1 + s(x2, bs = "cr", k = 6)),
# fp = TRUE, data = dataSim)
# conv.check(outFP)
# summary(outFP)
#
# # in the above examples a third equation could be introduced
# # as illustrated in Example 1
#
# #
# #################
# ## See also ?meps
# #################
#
# ############
# ## EXAMPLE 3
# ############
# ## Generate data with a non-random sample selection mechanism
# ## and exclusion restriction
#
# set.seed(0)
#
# n <- 2000
#
# Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
# u <- rMVN(n, rep(0,2), Sigma)
#
# SigmaC <- matrix(0.5, 3, 3); diag(SigmaC) <- 1
# cov <- rMVN(n, rep(0,3), SigmaC)
# cov <- pnorm(cov)
# bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
#
# f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
# f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
# f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
#
# ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
# y <- -0.68 - 1.5*bi + f21(x1) + + u[, 2] > 0
# yo <- y*(ys > 0)
#
# dataSim <- data.frame(y, ys, yo, bi, x1, x2)
#
# ## Testing the hypothesis of absence of non-random sample selection...
#
# LM.bpm(list(ys ~ bi + s(x1) + s(x2), yo ~ bi + s(x1)), dataSim, Model = "BSS")
#
# # p-value suggests presence of sample selection, hence fit a bivariate model
#
# #
# ## SEMIPARAMETRIC SAMPLE SELECTION BIVARIATE PROBIT
# ## the first equation MUST be the selection equation
#
# out <- SemiParBIVProbit(list(ys ~ bi + s(x1) + s(x2),
# yo ~ bi + s(x1)),
# data = dataSim, Model = "BSS")
# conv.check(out)
# gt.bpm(out)
#
# ## compare the two summary outputs
# ## the second output produces a summary of the results obtained when
# ## selection bias is not accounted for
#
# summary(out)
# summary(out$gam2)
#
# ## corrected predicted probability that 'yo' is equal to 1
#
# mb(ys, yo, Model = "BSS")
# prev(out, hd.plot = TRUE)
# prev(out, type = "univariate", hd.plot = TRUE)
#
# ## estimated smooth function plots
# ## the red line is the true curve
# ## the blue line is the univariate model curve not accounting for selection bias
#
# x1.s <- sort(x1[dataSim$ys>0])
# f21.x1 <- f21(x1.s)[order(x1.s)]-mean(f21(x1.s))
#
# plot(out, eq = 2, ylim = c(-1.65,0.95)); lines(x1.s, f21.x1, col="red")
# par(new = TRUE)
# plot(out$gam2, se = FALSE, col = "blue", ylim = c(-1.65,0.95),
# ylab = "", rug = FALSE)
#
# #
# #
# ## try a Clayton copula model...
#
# outC <- SemiParBIVProbit(list(ys ~ bi + s(x1) + s(x2),
# yo ~ bi + s(x1)),
# data = dataSim, Model = "BSS", BivD = "C0")
# conv.check(outC)
# summary(outC, cm.plot = TRUE)
# prev(outC)
#
# # in the above examples a third equation could be introduced
# # as illustrated in Example 1
#
# #
# ################
# ## See also ?hiv
# ################
#
# ############
# ## EXAMPLE 4
# ############
# ## Generate data with partial observability
#
# set.seed(0)
#
# n <- 10000
#
# Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
# u <- rMVN(n, rep(0,2), Sigma)
#
# x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
#
# y1 <- ifelse(-1.55 + 2*x1 + x2 + u[,1] > 0, 1, 0)
# y2 <- ifelse( 0.45 - x3 + u[,2] > 0, 1, 0)
# y <- y1*y2
#
# dataSim <- data.frame(y, x1, x2, x3)
#
#
# ## BIVARIATE PROBIT with Partial Observability
#
# out <- SemiParBIVProbit(list(y ~ x1 + x2,
# y ~ x3),
# data = dataSim, Model = "BPO")
# conv.check(out)
# summary(out)
#
# # first ten estimated probabilities for the four events from object out
#
# cbind(out$p11, out$p10, out$p00, out$p01)[1:10,]
#
#
# # case with smooth function
# # (more computationally intensive)
#
# f1 <- function(x) cos(pi*2*x) + sin(pi*x)
#
# y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
# y2 <- ifelse( 0.45 - x3 + u[,2] > 0, 1, 0)
# y <- y1*y2
#
# dataSim <- data.frame(y, x1, x2, x3)
#
# out <- SemiParBIVProbit(list(y ~ x1 + s(x2),
# y ~ x3),
# data = dataSim, Model = "BPO")
#
# conv.check(out)
# summary(out, cm.plot = TRUE)
#
#
# # plot estimated and true functions
#
# x2 <- sort(x2); f1.x2 <- f1(x2)[order(x2)] - mean(f1(x2))
# plot(out, eq = 1, scale = 0); lines(x2, f1.x2, col = "red")
#
# #
# ################
# ## See also ?war
# ################
#
# ############
# ## EXAMPLE 5
# ############
# ## Generate data with one endogenous binary variable
# ## and continuous outcome
#
# set.seed(0)
#
# n <- 1000
#
# Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
# u <- rMVN(n, rep(0,2), Sigma)
#
# cov <- rMVN(n, rep(0,2), Sigma)
# cov <- pnorm(cov)
# x1 <- round(cov[,1]); x2 <- cov[,2]
#
# f1 <- function(x) cos(pi*2*x) + sin(pi*x)
# f2 <- function(x) x+exp(-30*(x-0.5)^2)
#
# y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
# y2 <- -0.25 - 1.25*y1 + f2(x2) + u[,2]
#
# dataSim <- data.frame(y1, y2, x1, x2)
#
#
# ## RECURSIVE Model
#
# rc <- resp.check(y2, margin = "N", print.par = TRUE, loglik = TRUE)
# AIC(rc); BIC(rc)
#
# out <- SemiParBIVProbit(list(y1 ~ x1 + x2,
# y2 ~ y1 + x2),
# data = dataSim, margins = c("probit","N"))
# conv.check(out)
# summary(out)
# post.check(out)
#
# ## SEMIPARAMETRIC RECURSIVE Model
#
# eq.mu.1 <- y1 ~ x1 + s(x2)
# eq.mu.2 <- y2 ~ y1 + s(x2)
# eq.sigma2 <- ~ 1
# eq.theta <- ~ 1
#
# fl <- list(eq.mu.1, eq.mu.2, eq.sigma2, eq.theta)
#
# out <- SemiParBIVProbit(fl, data = dataSim,
# margins = c("probit","N"), gamlssfit = TRUE)
# conv.check(out)
# summary(out)
# post.check(out)
# jc.probs(out, 1, 1.5, intervals = TRUE)[1:4,]
# AT(out, nm.end = "y1")
# AT(out, nm.end = "y1", type = "univariate")
#
#
# #
# #
#
# ############
# ## EXAMPLE 6
# ############
# ## Generate data with one endogenous continuous exposure
# ## and binary outcome
#
# set.seed(0)
#
# n <- 1000
#
# Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
# u <- rMVN(n, rep(0,2), Sigma)
#
# cov <- rMVN(n, rep(0,2), Sigma)
# cov <- pnorm(cov)
# x1 <- round(cov[,1]); x2 <- cov[,2]
#
# f1 <- function(x) cos(pi*2*x) + sin(pi*x)
# f2 <- function(x) x+exp(-30*(x-0.5)^2)
#
# y1 <- -0.25 - 2*x1 + f2(x2) + u[,2]
# y2 <- ifelse(-0.25 - 0.25*y1 + f1(x2) + u[,1] > 0, 1, 0)
#
# dataSim <- data.frame(y1, y2, x1, x2)
#
# eq.mu.1 <- y2 ~ y1 + s(x2)
# eq.mu.2 <- y1 ~ x1 + s(x2)
# eq.sigma2 <- ~ 1
# eq.theta <- ~ 1
#
# fl <- list(eq.mu.1, eq.mu.2, eq.sigma2, eq.theta)
#
# out <- SemiParBIVProbit(fl, data = dataSim,
# margins = c("probit","N"))
# conv.check(out)
# summary(out)
# post.check(out)
# AT(out, nm.end = "y1")
# AT(out, nm.end = "y1", type = "univariate")
# RR(out, nm.end = "y1", rr.plot = TRUE)
# RR(out, nm.end = "y1", type = "univariate")
# OR(out, nm.end = "y1", or.plot = TRUE)
# OR(out, nm.end = "y1", type = "univariate")
#
# ## End(Not run)
Run the code above in your browser using DataLab