library(SemiParBIVProbit)
############
## EXAMPLE 1
############
## Generate data
## Correlation between the two equations 0.5 - Sample size 400
set.seed(0)
n <- 400
Sigma <- matrix(c(1,0.5,0.5,1),2,2)
u <- rmvnorm(n,rep(0,2),Sigma)
x1 <- round(runif(n))
x2 <- runif(n)
x3 <- runif(n)
f1 <- function(x) (cos(pi*2*x)) + sin(pi*x)
f2 <- function(x) (x+exp(-30*(x-0.5)^2))
y1 <- rep(0,n)
y1 <- replace(y1, -1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1)
y2 <- rep(0,n)
y2 <- replace(y2, -0.25 - 1.25*x1 + f2(x2) + u[,2] > 0, 1)
dataSim <- data.frame(y1,y2,x1,x2,x3)
#
#
## CLASSIC BIVARIATE PROBIT
out <- SemiParBIVProbit(y1 ~ x1 + x2 + x3,
y2 ~ x1 + x2 + x3,
data=dataSim)
summary(out)
InfCr(out)
InfCr(out,cr="BIC")
## SEMIPARAMETRIC BIVARIATE PROBIT
## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
## k is the basis dimension - default is 10
## m is the order of the penalty for the specific term - default is 2
out <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cr",k=10,m=2) + s(x3,bs="cr",k=10),
y2 ~ x1 + s(x2,bs="cr",k=10) + s(x3,bs="cr",k=10),
data=dataSim)
summary(out)
InfCr(out)
## estimated smooth function plots - red lines are true curves
x2 <- sort(x2)
f1.x2 <- f1(x2)[order(x2)]-mean(f1(x2))
f2.x2 <- f2(x2)[order(x2)]-mean(f2(x2))
f3.x3 <- rep(0,length(x3))
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq=1, select=1); lines(x2, f1.x2, col="red")
plot(out, eq=1, select=2); lines(x3, f3.x3, col="red")
plot(out, eq=2, select=1); lines(x2, f2.x2, col="red")
plot(out, eq=2, select=2); lines(x3, f3.x3, col="red")
#
## same plots but CIs 'with intercept'
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq=1, select=1, seWithMean=TRUE); lines(x2, f1.x2, col="red")
plot(out, eq=1, select=2, seWithMean=TRUE); lines(x3, f3.x3, col="red")
plot(out, eq=2, select=1, seWithMean=TRUE); lines(x2, f2.x2, col="red")
plot(out, eq=2, select=2, seWithMean=TRUE); lines(x3, f3.x3, col="red")
#
## set gamma=1.4 to obtain a smoother model
outS <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cr",k=10) + s(x3,bs="cr",k=10),
y2 ~ x1 + s(x2,bs="cr",k=10) + s(x3,bs="cr",k=10),
data=dataSim, gamma=1.4)
summary(outS)
## compare the two summary outputs
## p-values suggest to drop x3 from both equations, with a stronger
## evidence for eq. 2. This can be also achieved via shrinkage smoothers
outSS <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cs",k=10) + s(x3,bs="cs",k=10),
y2 ~ x1 + s(x2,bs="cs",k=10) + s(x3,bs="cs",k=10),
data=dataSim, gamma=1.4)
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(outSS, eq=1, select=1)
plot(outSS, eq=1, select=2, ylim=c(-0.1,0.1))
plot(outSS, eq=2, select=1)
plot(outSS, eq=2, select=2, ylim=c(-0.1,0.1))
#
#
############
## EXAMPLE 2
############
## Generate data with one endogenous variable and exclusion restriction
set.seed(0)
n <- 400
Sigma <- matrix(c(1,0.5,0.5,1),2,2)
u <- rmvnorm(n,rep(0,2),Sigma)
x1 <- round(runif(n))
x2 <- runif(n)
f1 <- function(x) (cos(pi*2*x)) + sin(pi*x)
f2 <- function(x) (x+exp(-30*(x-0.5)^2))
y1 <- rep(0,n)
y1 <- replace(y1, -1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1)
y2 <- rep(0,n)
y2 <- replace(y2, -0.25 - 1.25*y1 + f2(x2) + u[,2] > 0, 1)
dataSim <- data.frame(y1,y2,x1,x2)
#
#
## CLASSIC RECURSIVE BIVARIATE PROBIT
out <- SemiParBIVProbit(y1 ~ x1 + x2,
y2 ~ y1 + x2,
data=dataSim)
summary(out)
## SEMIPARAMETRIC RECURSIVE BIVARIATE PROBIT
out <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cr",k=10),
y2 ~ y1 + s(x2,bs="cr",k=10),
data=dataSim)
summary(out)
AT(out,eq=2,nm.bin="y1") ## average treatment effect (ATE) of y1 with CIs
AT(out,eq=2,nm.bin="y1",E=FALSE) ## average treatment effect on the treated (ATT)
#
## recursive bivariate probit modelling with unpenalized splines
## can be achieved as follows
outFP <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cr",k=5),
y2 ~ y1 + s(x2,bs="cr",k=6), aut.sp=FALSE, fp=TRUE,
data=dataSim)
summary(outFP)
#
## example using a 2D smooth
f.biv <- function(x,z) {-0.7*exp(((3*x+3) + 0.7*(3*z-3)^2)/5)}
y1 <- rep(0,n)
y1 <- replace(y1, -1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1)
y2 <- rep(0,n)
y2 <- replace(y2, 3.5 - 1.25*y1 + f.biv(x2,x3) + u[,2] > 0, 1)
dataSim <- data.frame(y1,y2,x1,x2,x3)
outSb <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cr"),
y2 ~ y1 + s(x2,x3,bs="tp"),
data=dataSim)
summary(outSb)
par(mfrow=c(1,2),mar=c(4.5,4.5,2,2))
x2 <- x3 <- seq(0, 1, length=40)
z <- outer(x2, x3, f.biv)
persp(x2, x3, z, theta = 30, phi = 25,
xlab="x2", ylab="x3", zlab="f.biv")
plot(outSb, eq=2, select=1,theta = 30, phi = 25)
#
#
############
## EXAMPLE 3
############
## Generate data with sample selection mechanism and exclusion restriction
set.seed(0)
n <- 2000
Sigma <- matrix(c(1,0.5,0.5,1),2,2)
u <- rmvnorm(n,rep(0,2),Sigma)
SigmaC <- matrix( c(1,0.5,0.5,0.5,1,0.5,0.5,0.5,1), 3 , 3)
cov <- rmvnorm(n,rep(0,3),SigmaC, method="svd")
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + u[, 2] > 0
yo <- y*(ys > 0)
dataSim <- data.frame(y,ys,yo,bi,x1,x2)
## SEMIPARAMETRIC SAMPLE SELECTION BIVARIATE PROBIT
## the first equation MUST be the selection equation
out <- SemiParBIVProbit(ys ~ bi + s(x1,bs="cr",k=10) + s(x2,bs="cr",k=10),
yo ~ bi + s(x1,bs="cr",k=10),
data=dataSim, selection=TRUE)
AT(out,eq=2,nm.bin="bi",E=FALSE) ## average treatment effect on the treated (ATT)
## compare the two summary outputs
## the second output produces a summary of the results obtained when only
## the outcome equation is fitted, i.e. selection bias is not accounted for
summary(out)
summary(out$gam2)
## mean predicted probabilities that 'yo' is equal to 1
## the second predicted probability does not account for selection bias
mean(round(pnorm(out$eta2[out$eta2!=0])))
mean(round(pnorm(predict(out$gam2))))
## estimated smooth function plots
## the red line is the true curve
## the blue line is the naive curve not accounting for selection bias
x1 <- sort(x1)
f21.x1 <- f21(x1)[order(x1)]-mean(f21(x1))
plot(out, eq=2, select=1,ylim=c(-1.2,1)); lines(x1, f21.x1, col="red")
par(new=TRUE)
plot(out$gam2, select=1, se=FALSE, col="blue",ylim=c(-1.2,1),ylab="",rug=FALSE)
#
#
############
## EXAMPLE 4
############
## Generate data with one endogenous variable, exclusion restriction
## and the presence of subject specific features
set.seed(0)
n <- 2000
sre <- round(runif(n,2,4))
t.n <- sum(sre)
id <- rep(seq(1,n),sre)
time <- NULL
for (i in 1:n) time <- c(time,seq(1,sre[i]))
time <- (time-mean(time))/2
Sigma <- matrix(c(1,0.5,0.5,1),2,2)
u <- rmvnorm(t.n,rep(0,2),Sigma)
# random effects distribution
mp1 <- c(-2,0,2)
mp2 <- c(-3,0,3)
m <- c(0.4,0.2,0.4)
mp.ind <- sample(c(1,2,3), n, replace = TRUE, prob = m)
mpr1 <- mp1[mp.ind]
mpr2 <- mp2[mp.ind]
mpre1 <- rep(mpr1,sre)
mpre2 <- rep(mpr2,sre)
#
x1 <- round(runif(t.n))
x2 <- runif(t.n)
x3 <- runif(t.n)
f1 <- function(x) (cos(pi*2*x)) + sin(pi*x)
f2 <- function(x) (x+exp(-30*(x-0.5)^2))
# responses
y1 <- rep(0,t.n)
y2 <- rep(0,t.n)
y1 <- replace(y1, mpre1 + 2*x1 + 0.75*x2 - time + u[,1] > 0, 1)
y2 <- replace(y2, mpre2 - 1.25*y1 + 2.55*x2 + time + u[,2] > 0, 1)
dataSim <- data.frame(id,time,y1,y2,x1,x2,x3)
## CASE WITH NO SMOOTH COMPONENTS
out <- SemiParBIVProbit(y1 ~ x1 + x2 + x3 + time,
y2 ~ y1 + x2 + x3 + time,
data=dataSim,
npRE=TRUE, K=3, id=id)
summary(out)
## CASE WITH SMOOTH COMPONENTS
y1 <- rep(0,t.n)
y2 <- rep(0,t.n)
y1 <- replace(y1, mpre1 + 2*x1 + f1(x2) - time + u[,1] > 0, 1)
y2 <- replace(y2, mpre2 - 1.25*y1 + f2(x2) + time + u[,2] > 0, 1)
dataSim <- data.frame(id,time,y1,y2,x1,x2,x3)
out <- SemiParBIVProbit(y1 ~ x1 + s(x2) + x3 + time,
y2 ~ y1 + s(x2) + x3 + time,
data=dataSim, aut.sp=FALSE,
npRE=TRUE, K=3, id=id)
summary(out)
x2 <- sort(x2)
f1.x2 <- f1(x2)[order(x2)]-mean(f1(x2))
f2.x2 <- f2(x2)[order(x2)]-mean(f2(x2))
par(mfrow=c(2,1))
plot(out, eq=1, select=1); lines(x2, f1.x2, col="red")
plot(out, eq=2, select=1); lines(x2, f2.x2, col="red")
#
#Run the code above in your browser using DataLab