library(SemiParSampleSel)
######################################################################
## Generate data
## Correlation between the two equations and covariate correlation 0.5
## Sample size 2000
######################################################################
set.seed(0)
n <- 2000
rhC <- rhU <- 0.5
SigmaU <- matrix(c(1, rhU, rhU, 1), 2, 2)
U <- rmvnorm(n, rep(0,2), SigmaU)
SigmaC <- matrix(rhC, 3, 3); diag(SigmaC) <- 1
cov <- rmvnorm(n, rep(0,3), SigmaC, method = "svd")
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + U[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + U[, 2]
yo <- y*(ys > 0)
dataSim <- data.frame(ys, yo, bi, x1, x2)
## CLASSIC SAMPLE SELECTION MODEL
## the first equation MUST be the selection equation
out <- SemiParSampleSel(list(ys ~ bi + x1 + x2,
yo ~ bi + x1),
data = dataSim)
ss.checks(out)
summary(out)
AIC(out)
BIC(out)
est.aver(out)
## SEMIPARAMETRIC SAMPLE SELECTION MODEL
## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
## k is the basis dimension - default is 10
## m is the order of the penalty for the specific term - default is 2
out <- SemiParSampleSel(list(ys ~ bi + s(x1, bs = "tp", k = 10, m = 2) + s(x2),
yo ~ bi + s(x1)),
data = dataSim)
ss.checks(out)
AIC(out)
est.aver(out)
## compare the two summary outputs
## the second output produces a summary of the results obtained when only
## the outcome equation is fitted, i.e. selection bias is not accounted for
summary(out)
summary(out$gam2)
## estimated smooth function plots
## the red line is the true curve
## the blue line is the naive curve not accounting for selection bias
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)] - mean(f21(x1.s))
plot(out, eq = 2, ylim = c(-1, 0.8)); lines(x1.s, f21.x1, col = "red")
par(new = TRUE)
plot(out$gam2, se = FALSE, col = "blue", ylim = c(-1, 0.8), ylab = "", rug = FALSE)
#
#
###################################################
## example using Clayton copula with normal margins
###################################################
set.seed(0)
teta <- 10
sig <- 1.5
myCop <- archmCopula(family = "clayton", dim = 2, param = teta)
# other copula options are for instance: "amh", "frank", "gumbel", "joe"
# for FGM use the following code:
# myCop <- fgmCopula(teta, dim=2)
bivg <- mvdc(copula = myCop, c("norm", "norm"),
list(list(mean = 0, sd = 1),
list(mean = 0, sd = sig)))
er <- rMvdc(n, bivg)
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + er[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + er[, 2]
yo <- y*(ys > 0)
dataSim <- data.frame(ys, yo, bi, x1, x2)
out <- SemiParSampleSel(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, BivD = "C0")
ss.checks(out)
summary(out)
est.aver(out)
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)] - mean(f21(x1.s))
plot(out, eq = 2, ylim = c(-1.1, 1.6)); lines(x1.s, f21.x1, col = "red")
par(new = TRUE)
plot(out$gam2, se = FALSE, col = "blue", ylim = c(-1.1, 1.6), ylab = "", rug = FALSE)
#
#
########################################################
## example using Gumbel copula with normal-gamma margins
########################################################
set.seed(0)
k <- 2 # shape of gamma distribution
miu <- exp(-0.68 - 1.5*bi + f21(x1)) # mean values of y's (log m = Xb)
lambda <- k/miu # rate of gamma distribution
theta <- 6
# Two-dimensional Gumbel copula with unif margins
gumbel.cop <- onacopula("Gumbel", C(theta, 1:2))
# Random sample from two-dimensional Gumbel copula with uniform margins
U <- rnacopula(n = n, gumbel.cop)
# Margins: normal and gamma
er <- cbind(qnorm(U[,1], 0, 1), qgamma(U[, 2], shape = k, rate = lambda))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + er[, 1] > 0
y <- er[, 2]
yo <- y*(ys > 0)
dataSim <- data.frame(ys, yo, bi, x1, x2)
out <- SemiParSampleSel(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, BivD = "G0", margins = c("N", "G"))
ss.checks(out)
summary(out)
est.aver(out)
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)] - mean(f21(x1.s))
plot(out, eq = 2, ylim = c(-1.1, 1)); lines(x1.s, f21.x1, col = "red")
par(new = TRUE)
plot(out$gam2, se = FALSE, col = "blue", ylim = c(-1.1, 1), ylab = "", rug = FALSE)
#
#Run the code above in your browser using DataLab