## Not run:
#
# library(SemiParBIVProbit)
#
# ######################################################################
# ## Generate data
# ## Correlation between the two equations and covariate correlation 0.5
# ## Sample size 2000
# ######################################################################
#
# set.seed(0)
#
# n <- 2000
#
# rh <- 0.5
#
# sigmau <- matrix(c(1, rh, rh, 1), 2, 2)
# u <- rMVN(n, rep(0,2), sigmau)
#
# sigmac <- matrix(rh, 3, 3); diag(sigmac) <- 1
# cov <- rMVN(n, rep(0,3), sigmac)
# cov <- pnorm(cov)
#
# bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
#
# f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
# f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
# f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
#
# ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
# y <- -0.68 - 1.5*bi + f21(x1) + u[, 2]
# yo <- y*(ys > 0)
#
# dataSim <- data.frame(ys, yo, bi, x1, x2)
#
# ## CLASSIC SAMPLE SELECTION MODEL
# ## the first equation MUST be the selection equation
#
# resp.check(yo[ys > 0], "N")
#
# out <- copulaSampleSel(list(ys ~ bi + x1 + x2,
# yo ~ bi + x1),
# data = dataSim)
# conv.check(out)
# post.check(out)
# summary(out)
#
# AIC(out)
# BIC(out)
#
#
# ## SEMIPARAMETRIC SAMPLE SELECTION MODEL
#
# ## "cr" cubic regression spline basis - "cs" shrinkage version of "cr"
# ## "tp" thin plate regression spline basis - "ts" shrinkage version of "tp"
# ## for smooths of one variable, "cr/cs" and "tp/ts" achieve similar results
# ## k is the basis dimension - default is 10
# ## m is the order of the penalty for the specific term - default is 2
#
# out <- copulaSampleSel(list(ys ~ bi + s(x1, bs = "tp", k = 10, m = 2) + s(x2),
# yo ~ bi + s(x1)),
# data = dataSim)
# conv.check(out)
# post.check(out)
# AIC(out)
#
# ## compare the two summary outputs
# ## the second output produces a summary of the results obtained when only
# ## the outcome equation is fitted, i.e. selection bias is not accounted for
#
# summary(out)
# summary(out$gam2)
#
# ## estimated smooth function plots
# ## the red line is the true curve
# ## the blue line is the naive curve not accounting for selection bias
#
# x1.s <- sort(x1[dataSim$ys>0])
# f21.x1 <- f21(x1.s)[order(x1.s)] - mean(f21(x1.s))
#
# plot(out, eq = 2, ylim = c(-1, 0.8)); lines(x1.s, f21.x1, col = "red")
# par(new = TRUE)
# plot(out$gam2, se = FALSE, lty = 3, lwd = 2, ylim = c(-1, 0.8),
# ylab = "", rug = FALSE)
#
#
# ## SEMIPARAMETRIC SAMPLE SELECTION MODEL with association
# ## and dispersion parameters
# ## depending on covariates as well
#
# eq.mu.1 <- ys ~ bi + s(x1) + s(x2)
# eq.mu.2 <- yo ~ bi + s(x1)
# eq.sigma2 <- ~ bi
# eq.theta <- ~ bi + x1
#
# fl <- list(eq.mu.1, eq.mu.2, eq.sigma2, eq.theta)
#
# out <- copulaSampleSel(fl, data = dataSim)
# conv.check(out)
# post.check(out)
# summary(out)
# out$sigma2
# out$theta
#
# jc.probs(out, 0, 0.3, intervals = TRUE)[1:4,]
#
# outC0 <- copulaSampleSel(fl, data = dataSim, BivD = "C0")
# conv.check(outC0)
# post.check(outC0)
# AIC(out, outC0)
# BIC(out, outC0)
#
# #
# #
#
# #######################################################
# ## example using Gumbel copula and normal-gamma margins
# #######################################################
#
# y <- rgamma(n, shape = 1/2^2, exp(-0.68 - 1.5*bi + f21(x1) + u[, 2])*2^2)
# yo <- y*(ys > 0)
#
# dataSim <- data.frame(ys, yo, bi, x1, x2)
#
#
# out <- copulaSampleSel(list(ys ~ bi + s(x1) + s(x2),
# yo ~ bi + s(x1)),
# data = dataSim, BivD = "G0",
# margins = c("probit", "GA"))
# conv.check(out)
# post.check(out)
# summary(out)
#
#
# #
# #
# ## End(Not run)
Run the code above in your browser using DataLab