library(SemiParSampleSel)
######################################################################
## Generate data
## Correlation between the two equations and covariate correlation 0.5
## Sample size 2000
######################################################################
set.seed(0)
n <- 2000
rhC <- rhU <- 0.5
SigmaU <- matrix(c(1,rhU,rhU,1),2,2)
U <- rmvnorm(n,rep(0,2),SigmaU)
SigmaC <- matrix( c(1,rhC,rhC,
rhC,1,rhC,
rhC,rhC,1), 3 , 3)
cov <- rmvnorm(n,rep(0,3),SigmaC, method="svd")
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + U[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + U[, 2]
yo <- y*(ys > 0)
dataSim <- data.frame(ys,yo,bi,x1,x2)
## CLASSIC SAMPLE SELECTION MODEL
## the first equation must be the selection equation
out <- SemiParSampleSel(ys ~ bi + x1 + x2,
yo ~ bi + x1,
data=dataSim)
ss.checks(out)
summary(out)
InfCr(out)
InfCr(out,cr="BIC")
## SEMIPARAMETRIC SAMPLE SELECTION MODEL
## the first equation MUST be the selection equation
## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
## k is the basis dimension - default is 10
## m is the order of the penalty for the specific term - default is 2
out <- SemiParSampleSel(ys ~ bi + s(x1,bs="cr",k=10,m=2) + s(x2,bs="cr",k=10),
yo ~ bi + s(x1,bs="cr",k=10),
data=dataSim)
InfCr(out)
## compare the two summary outputs
## the second output produces a summary of the results obtained when only
## the outcome equation is fitted, i.e. selection bias is not accounted for
summary(out)
summary(out$gam2)
## estimated smooth function plots
## the red line is the true curve
## the blue line is the naive curve not accounting for selection bias
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)]-mean(f21(x1.s))
plot(out, eq=2, ylim=c(-1,0.8)); lines(x1.s, f21.x1, col="red")
par(new=TRUE)
plot(out$gam2, se=FALSE, col="blue", ylim=c(-1,0.8), ylab="",rug=FALSE)
#
#
###################################################
## example using Clayton copula with normal margins
###################################################
set.seed(0)
teta <- 10
sig <- 1.5
myCop <- archmCopula(family = "clayton", dim = 2, param = teta)
# other copula options are for instance: "amh", "frank", "gumbel", "joe"
# for FGM use the following code:
# myCop <- fgmCopula(teta, dim=2)
bivg <- mvdc(copula = myCop, c("norm","norm"), list(list(mean=0,sd=1), list(mean=0,sd=sig)))
er <- rMvdc(n,bivg)
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + er[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + er[, 2]
yo <- y*(ys > 0)
dataSim <- data.frame(ys,yo,bi,x1,x2)
out <- SemiParSampleSel(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1),
data=dataSim, BivD="C")
ss.checks(out)
summary(out)
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)]-mean(f21(x1.s))
plot(out, eq=2, ylim=c(-1.1,1.6)); lines(x1.s, f21.x1, col="red")
par(new=TRUE)
plot(out$gam2, se=FALSE, col="blue", ylim=c(-1.1,1.6), ylab="",rug=FALSE)
#
#Run the code above in your browser using DataLab