library(SemiParBIVProbit)
############
## EXAMPLE 1
############
## Generate data
## Correlation between the two equations 0.5 - Sample size 400
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rmvnorm(n,rep(0,2),Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
f1 <- function(x) (cos(pi*2*x)) + sin(pi*x)
f2 <- function(x) (x+exp(-30*(x-0.5)^2))
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*x1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1,y2,x1,x2,x3)
#
#
## CLASSIC BIVARIATE PROBIT
out <- SemiParBIVProbit(y1 ~ x1 + x2 + x3,
y2 ~ x1 + x2 + x3,
data=dataSim)
summary(out)
InfCr(out)
InfCr(out,cr="BIC")
## SEMIPARAMETRIC BIVARIATE PROBIT
## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
## k is the basis dimension - default is 10
## m is the order of the penalty for the specific term - default is 2
out <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cr",k=10,m=2) + s(x3,bs="cr",k=10),
y2 ~ x1 + s(x2,bs="cr",k=10) + s(x3,bs="cr",k=10),
data=dataSim)
summary(out)
InfCr(out)
## estimated smooth function plots - red lines are true curves
x2 <- sort(x2)
f1.x2 <- f1(x2)[order(x2)]-mean(f1(x2))
f2.x2 <- f2(x2)[order(x2)]-mean(f2(x2))
f3.x3 <- rep(0,length(x3))
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq=1, select=1); lines(x2, f1.x2, col="red")
plot(out, eq=1, select=2); lines(x3, f3.x3, col="red")
plot(out, eq=2, select=1); lines(x2, f2.x2, col="red")
plot(out, eq=2, select=2); lines(x3, f3.x3, col="red")
#
## same plots but CIs 'with intercept'
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq=1, select=1, seWithMean=TRUE); lines(x2, f1.x2, col="red")
plot(out, eq=1, select=2, seWithMean=TRUE); lines(x3, f3.x3, col="red")
plot(out, eq=2, select=1, seWithMean=TRUE); lines(x2, f2.x2, col="red")
plot(out, eq=2, select=2, seWithMean=TRUE); lines(x3, f3.x3, col="red")
## p-values suggest to drop x3 from both equations, with a stronger
## evidence for eq. 2. This can be also achieved using shrinkage smoothers
outSS <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cs") + s(x3,bs="cs"),
y2 ~ x1 + s(x2,bs="cs") + s(x3,bs="cs"),
data=dataSim)
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(outSS, eq=1, select=1)
plot(outSS, eq=1, select=2, ylim=c(-0.1,0.1))
plot(outSS, eq=2, select=1)
plot(outSS, eq=2, select=2, ylim=c(-0.1,0.1))
#
#
############
## EXAMPLE 2
############
## Generate data with one endogenous variable and exclusion restriction
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rmvnorm(n,rep(0,2),Sigma)
cov <- rmvnorm(n,rep(0,2),Sigma, method="svd")
cov <- pnorm(cov)
x1 <- round(cov[,1]); x2 <- cov[,2]
f1 <- function(x) (cos(pi*2*x)) + sin(pi*x)
f2 <- function(x) (x+exp(-30*(x-0.5)^2))
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*y1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1,y2,x1,x2)
#
## Testing the hypothesis of absence of endogeneity...
LM.bpm(y1 ~ x1 + s(x2),y2 ~ y1 + s(x2),dataSim)
# p-value suggests presence of endogeneity, hence fit a bivariate model
## CLASSIC RECURSIVE BIVARIATE PROBIT
# Algorithm based on Fisher Scoring
out <- SemiParBIVProbit(y1 ~ x1 + x2,
y2 ~ y1 + x2,
data=dataSim)
summary(out)
# Algorithm based on Newton's method
out <- SemiParBIVProbit(y1 ~ x1 + x2,
y2 ~ y1 + x2,
data=dataSim,H=TRUE)
summary(out)
## SEMIPARAMETRIC RECURSIVE BIVARIATE PROBIT
out <- SemiParBIVProbit(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2),
data=dataSim)
summary(out)
#
## average treatment effect on the treated (ATT) with CIs
AT(out,eq=2,nm.bin="y1",E=FALSE)
#
## recursive bivariate probit modelling with unpenalized splines
## can be achieved as follows
outFP <- SemiParBIVProbit(y1 ~ x1 + s(x2,bs="cr",k=5),
y2 ~ y1 + s(x2,bs="cr",k=6), fp=TRUE,
data=dataSim)
summary(outFP)
#
#
############
## EXAMPLE 3
############
## Generate data with sample selection mechanism and exclusion restriction
set.seed(0)
n <- 1200
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rmvnorm(n,rep(0,2),Sigma)
SigmaC <- matrix(0.5, 3, 3); diag(SigmaC) <- 1
cov <- rmvnorm(n,rep(0,3),SigmaC, method="svd")
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + u[, 2] > 0
yo <- y*(ys > 0)
dataSim <- data.frame(y,ys,yo,bi,x1,x2)
## Testing the hypothesis of absence of sample selection...
LM.bpm(ys ~ bi + s(x1) + s(x2),yo ~ bi + s(x1),dataSim,selection=TRUE)
# p-value suggests presence of sample selection, hence fit a bivariate model
#
## SEMIPARAMETRIC SAMPLE SELECTION BIVARIATE PROBIT
## the first equation MUST be the selection equation
out <- SemiParBIVProbit(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1),
data=dataSim, selection=TRUE)
AT(out,eq=2,nm.bin="bi",E=FALSE) ## ATT
## compare the two summary outputs
## the second output produces a summary of the results obtained when only
## the outcome equation is fitted, i.e. selection bias is not accounted for
summary(out)
summary(out$gam2)
## approximate predicted probability that 'yo' is equal to 1
## the second predicted probability does not account for selection bias
mean(pnorm(out$eta2[out$eta2!=0]))
mean(pnorm(predict(out$gam2)))
## estimated smooth function plots
## the red line is the true curve
## the blue line is the naive curve not accounting for selection bias
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)]-mean(f21(x1.s))
plot(out, eq=2, select=1,ylim=c(-1.2,1)); lines(x1.s, f21.x1, col="red")
par(new=TRUE)
plot(out$gam2, select=1, se=FALSE, col="blue",ylim=c(-1.2,1),ylab="",rug=FALSE)
#
#
############
## EXAMPLE 4
############
## Generate data with one endogenous variable, exclusion restriction
## and subject specific features
set.seed(0)
n.cl <- 100
n.ob.cl <- 13
sre <- round( runif(n.cl,n.ob.cl,n.ob.cl+2) )
t.n <- sum(sre)
id <- rep(seq(1,n.cl),sre)
sigma1 <- 1
sigma2 <- 2
rho <- rho.re <- 0.5
SigmaRE <- matrix(c(sigma1^2, sigma1*sigma2*rho.re,
sigma1*sigma2*rho.re,sigma2^2 ),2,2)
u1 <- rmvnorm(n.cl,rep(0,2),SigmaRE,method="svd")
u <- cbind(rep(u1[,1],sre),rep(u1[,2],sre))
Sigma <- matrix(rho, 2, 2); diag(Sigma) <- 1
eps <- rmvnorm(t.n,rep(0,2),Sigma)
SigmaC <- matrix(0.5, 3, 3); diag(SigmaC) <- 1
cov <- rmvnorm(t.n,rep(0,3),SigmaC, method="svd")
x1 <- round(pnorm(cov[,1]))
x2 <- pnorm(cov[,2])
x3 <- cov[,3]
f1 <- function(x) 0.5*(exp(x) + sin(2.9*x))
f2 <- function(x) 1.8*(0.25*exp(x) - x^3)
f1.x2 <- f1(x2) - mean(f1(x2))
f2.x2 <- f2(x2) - mean(f2(x2))
y1 <- ifelse(u[,1] + -0.75*x1 + f1.x2 + x3 + eps[,1] > 0, 1, 0)
y2 <- ifelse(u[,2] + -1.5*y1 + 1*x1 + f2.x2 + eps[,2] > 0, 1, 0)
dataSim <- data.frame(id,y1,y2,x1,x2,x3)
out <- SemiParBIVProbit(y1 ~ x1 + s(x2) + x3,
y2 ~ y1 + x1 + s(x2),
data=dataSim,
npRE=TRUE, K=3, id=dataSim$id)
summary(out)
AT(out,eq=2,nm.bin="y1",E=FALSE)
x2s <- sort(x2)
f1.x2 <- f1.x2[order(x2)]
f2.x2 <- f2.x2[order(x2)]
par(mfrow=c(1,2))
plot(out, eq=1, select=1); lines(x2s, f1.x2, col="red")
plot(out, eq=2, select=1); lines(x2s, f2.x2, col="red")
#
#
Run the code above in your browser using DataLab