# NOT RUN {
library(GJRM)
####################################
####################################
####################################
# JOINT MODELS WITH BINARY MARGINS #
####################################
####################################
####################################
############
## Example 1
############
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*x1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1, y2, x1, x2, x3)
## CLASSIC BIVARIATE PROBIT
out <- gjrm(list(y1 ~ x1 + x2 + x3,
y2 ~ x1 + x2 + x3),
data = dataSim,
margins = c("probit", "probit"),
Model = "B")
conv.check(out)
summary(out)
AIC(out)
BIC(out)
# }
# NOT RUN {
## BIVARIATE PROBIT with Splines
out <- gjrm(list(y1 ~ x1 + s(x2) + s(x3),
y2 ~ x1 + s(x2) + s(x3)),
data = dataSim,
margins = c("probit", "probit"),
Model = "B")
conv.check(out)
summary(out)
AIC(out)
## estimated smooth function plots - red lines are true curves
x2 <- sort(x2)
f1.x2 <- f1(x2)[order(x2)] - mean(f1(x2))
f2.x2 <- f2(x2)[order(x2)] - mean(f2(x2))
f3.x3 <- rep(0, length(x3))
par(mfrow=c(2,2),mar=c(4.5,4.5,2,2))
plot(out, eq = 1, select = 1, seWithMean = TRUE, scale = 0)
lines(x2, f1.x2, col = "red")
plot(out, eq = 1, select = 2, seWithMean = TRUE, scale = 0)
lines(x3, f3.x3, col = "red")
plot(out, eq = 2, select = 1, seWithMean = TRUE, scale = 0)
lines(x2, f2.x2, col = "red")
plot(out, eq = 2, select = 2, seWithMean = TRUE, scale = 0)
lines(x3, f3.x3, col = "red")
## BIVARIATE PROBIT with Splines and
## varying dependence parameter
eq.mu.1 <- y1 ~ x1 + s(x2)
eq.mu.2 <- y2 ~ x1 + s(x2)
eq.theta <- ~ x1 + s(x2)
fl <- list(eq.mu.1, eq.mu.2, eq.theta)
outD <- gjrm(fl, data = dataSim,
margins = c("probit", "probit"),
Model = "B")
conv.check(outD)
summary(outD)
outD$theta
plot(outD, eq = 1, seWithMean = TRUE)
plot(outD, eq = 2, seWithMean = TRUE)
plot(outD, eq = 3, seWithMean = TRUE)
graphics.off()
############
## Example 2
############
## Generate data with one endogenous variable
## and exclusion restriction
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
cov <- rMVN(n, rep(0,2), Sigma)
cov <- pnorm(cov)
x1 <- round(cov[,1]); x2 <- cov[,2]
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*y1 + f2(x2) + u[,2] > 0, 1, 0)
dataSim <- data.frame(y1, y2, x1, x2)
#
## Testing the hypothesis of absence of endogeneity...
#
LM.bpm(list(y1 ~ x1 + s(x2), y2 ~ y1 + s(x2)), dataSim, Model = "B")
## CLASSIC RECURSIVE BIVARIATE PROBIT
out <- gjrm(list(y1 ~ x1 + x2,
y2 ~ y1 + x2),
data = dataSim,
margins = c("probit", "probit"),
Model = "B")
conv.check(out)
summary(out)
AIC(out); BIC(out)
## FLEXIBLE RECURSIVE BIVARIATE PROBIT
out <- gjrm(list(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2)),
data = dataSim,
margins = c("probit", "probit"),
Model = "B")
conv.check(out)
summary(out)
AIC(out); BIC(out)
#
## Testing the hypothesis of absence of endogeneity post estimation...
gt.bpm(out)
#
## treatment effect, risk ratio and odds ratio with CIs
mb(y1, y2, Model = "B")
AT(out, nm.end = "y1", hd.plot = TRUE)
RR(out, nm.end = "y1")
OR(out, nm.end = "y1")
AT(out, nm.end = "y1", type = "univariate")
re.imp <- imputeCounter(out, m = 10, "y1")
re.imp$AT
## try a Clayton copula model...
outC <- gjrm(list(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2)),
data = dataSim, BivD = "C0",
margins = c("probit", "probit"),
Model = "B")
conv.check(outC)
summary(outC)
AT(outC, nm.end = "y1")
re.imp <- imputeCounter(outC, m = 10, "y1")
re.imp$AT
## try a Joe copula model...
outJ <- gjrm(list(y1 ~ x1 + s(x2),
y2 ~ y1 + s(x2)),
data = dataSim, BivD = "J0",
margins = c("probit", "probit"),
Model = "B")
conv.check(outJ)
summary(outJ)
AT(outJ, "y1")
re.imp <- imputeCounter(outJ, m = 10, "y1")
re.imp$AT
VuongClarke(out, outJ)
#
## recursive bivariate probit modelling with unpenalized splines
## can be achieved as follows
outFP <- gjrm(list(y1 ~ x1 + s(x2, bs = "cr", k = 5),
y2 ~ y1 + s(x2, bs = "cr", k = 6)),
fp = TRUE, data = dataSim,
margins = c("probit", "probit"),
Model = "B")
conv.check(outFP)
summary(outFP)
# in the above examples a third equation could be introduced
# as illustrated in Example 1
#
#################
## See also ?meps
#################
############
## Example 3
############
## Generate data with a non-random sample selection mechanism
## and exclusion restriction
set.seed(0)
n <- 2000
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
SigmaC <- matrix(0.5, 3, 3); diag(SigmaC) <- 1
cov <- rMVN(n, rep(0,3), SigmaC)
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + + u[, 2] > 0
yo <- y*(ys > 0)
dataSim <- data.frame(y, ys, yo, bi, x1, x2)
#
## Testing the hypothesis of absence of non-random sample selection...
LM.bpm(list(ys ~ bi + s(x1) + s(x2), yo ~ bi + s(x1)), dataSim, Model = "BSS")
# p-value suggests presence of sample selection, hence fit a bivariate model
#
## SEMIPARAMETRIC SAMPLE SELECTION BIVARIATE PROBIT
## the first equation MUST be the selection equation
out <- gjrm(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, Model = "BSS",
margins = c("probit", "probit"))
conv.check(out)
gt.bpm(out)
## compare the two summary outputs
## the second output produces a summary of the results obtained when
## selection bias is not accounted for
summary(out)
summary(out$gam2)
## corrected predicted probability that 'yo' is equal to 1
mb(ys, yo, Model = "BSS")
prev(out, hd.plot = TRUE)
prev(out, type = "univariate", hd.plot = TRUE)
## estimated smooth function plots
## the red line is the true curve
## the blue line is the univariate model curve not accounting for selection bias
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)]-mean(f21(x1.s))
plot(out, eq = 2, ylim = c(-1.65,0.95)); lines(x1.s, f21.x1, col="red")
par(new = TRUE)
plot(out$gam2, se = FALSE, col = "blue", ylim = c(-1.65,0.95),
ylab = "", rug = FALSE)
#
#
## try a Clayton copula model...
outC <- gjrm(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, Model = "BSS", BivD = "C0",
margins = c("probit", "probit"))
conv.check(outC)
summary(outC)
prev(outC)
#######################
# Impute using Mice
#######################
library(mice)
ys <- dataSim$ys
dataSim$yo[dataSim$ys == FALSE] <- NA
dataSim <- dataSim[, -c(1:2)]
imp <- mice(dataSim, m = 1, method = c("copulaSS", "", "", ""),
mice.formula = outC$mice.formula, margins = outC$margins,
BivD = outC$BivD, maxit = 1)
comp.yo <- dataSim$yo
comp.yo[ys == 0] <- imp$imp$yo[[1]]
mean(comp.yo)
#
################
## See also ?hiv
################
############
## Example 4
############
## Generate data with partial observability
set.seed(0)
n <- 10000
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
y1 <- ifelse(-1.55 + 2*x1 + x2 + u[,1] > 0, 1, 0)
y2 <- ifelse( 0.45 - x3 + u[,2] > 0, 1, 0)
y <- y1*y2
dataSim <- data.frame(y, x1, x2, x3)
## BIVARIATE PROBIT with Partial Observability
out <- gjrm(list(y ~ x1 + x2,
y ~ x3),
data = dataSim, Model = "BPO",
margins = c("probit", "probit"))
conv.check(out)
summary(out)
# first ten estimated probabilities for the four events from object out
cbind(out$p11, out$p10, out$p00, out$p01)[1:10,]
# case with smooth function
# (more computationally intensive)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse( 0.45 - x3 + u[,2] > 0, 1, 0)
y <- y1*y2
dataSim <- data.frame(y, x1, x2, x3)
out <- gjrm(list(y ~ x1 + s(x2),
y ~ x3),
data = dataSim, Model = "BPO",
margins = c("probit", "probit"))
conv.check(out)
summary(out)
# plot estimated and true functions
x2 <- sort(x2); f1.x2 <- f1(x2)[order(x2)] - mean(f1(x2))
plot(out, eq = 1, scale = 0); lines(x2, f1.x2, col = "red")
#
################
## See also ?war
################
# }
# NOT RUN {
# }
# NOT RUN {
######################################################
######################################################
######################################################
# JOINT MODELS WITH BINARY AND/OR CONTINUOUS MARGINS #
######################################################
######################################################
######################################################
library(GJRM)
############
## Example 5
## Generate data
## Correlation between the two equations 0.5 - Sample size 400
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- -1.55 + 2*x1 + f1(x2) + u[,1]
y2 <- -0.25 - 1.25*x1 + f2(x2) + u[,2]
dataSim <- data.frame(y1, y2, x1, x2, x3)
resp.check(y1, "N")
resp.check(y2, "N")
eq.mu.1 <- y1 ~ x1 + s(x2) + s(x3)
eq.mu.2 <- y2 ~ x1 + s(x2) + s(x3)
eq.sigma2.1 <- ~ 1
eq.sigma2.2 <- ~ 1
eq.theta <- ~ x1
fl <- list(eq.mu.1, eq.mu.2, eq.sigma2.1, eq.sigma2.2, eq.theta)
# the order above is the one to follow when
# using more than two equations
out <- gjrm(fl, data = dataSim, margins = c("N", "N"),
Model = "B")
conv.check(out)
post.check(out)
summary(out)
AIC(out)
BIC(out)
jc.probs(out, 1.4, 2.3, intervals = TRUE)[1:4,]
############
## Example 6
############
## Generate data with one endogenous binary variable
## and continuous outcome
set.seed(0)
n <- 1000
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
cov <- rMVN(n, rep(0,2), Sigma)
cov <- pnorm(cov)
x1 <- round(cov[,1]); x2 <- cov[,2]
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 + f1(x2) + u[,1] > 0, 1, 0)
y2 <- -0.25 - 1.25*y1 + f2(x2) + u[,2]
dataSim <- data.frame(y1, y2, x1, x2)
## RECURSIVE Model
rc <- resp.check(y2, margin = "N", print.par = TRUE, loglik = TRUE)
AIC(rc); BIC(rc)
out <- gjrm(list(y1 ~ x1 + x2,
y2 ~ y1 + x2),
data = dataSim, margins = c("probit","N"),
Model = "B")
conv.check(out)
summary(out)
post.check(out)
## SEMIPARAMETRIC RECURSIVE Model
eq.mu.1 <- y1 ~ x1 + s(x2)
eq.mu.2 <- y2 ~ y1 + s(x2)
eq.sigma2 <- ~ 1
eq.theta <- ~ 1
fl <- list(eq.mu.1, eq.mu.2, eq.sigma2, eq.theta)
out <- gjrm(fl, data = dataSim,
margins = c("probit","N"), gamlssfit = TRUE,
Model = "B")
conv.check(out)
summary(out)
post.check(out)
jc.probs(out, 1, 1.5, intervals = TRUE)[1:4,]
AT(out, nm.end = "y1")
AT(out, nm.end = "y1", type = "univariate")
#
#
############
## Example 7
############
## Generate data with one endogenous continuous exposure
## and binary outcome
set.seed(0)
n <- 1000
Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1
u <- rMVN(n, rep(0,2), Sigma)
cov <- rMVN(n, rep(0,2), Sigma)
cov <- pnorm(cov)
x1 <- round(cov[,1]); x2 <- cov[,2]
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- -0.25 - 2*x1 + f2(x2) + u[,2]
y2 <- ifelse(-0.25 - 0.25*y1 + f1(x2) + u[,1] > 0, 1, 0)
dataSim <- data.frame(y1, y2, x1, x2)
eq.mu.1 <- y2 ~ y1 + s(x2)
eq.mu.2 <- y1 ~ x1 + s(x2)
eq.sigma2 <- ~ 1
eq.theta <- ~ 1
fl <- list(eq.mu.1, eq.mu.2, eq.sigma2, eq.theta)
out <- gjrm(fl, data = dataSim,
margins = c("probit","N"),
Model = "B")
conv.check(out)
summary(out)
post.check(out)
AT(out, nm.end = "y1")
AT(out, nm.end = "y1", type = "univariate")
RR(out, nm.end = "y1", rr.plot = TRUE)
RR(out, nm.end = "y1", type = "univariate")
OR(out, nm.end = "y1", or.plot = TRUE)
OR(out, nm.end = "y1", type = "univariate")
#
#
############
## Example 8
##################
## Survival models
##################
set.seed(0)
n <- 2000
c <- runif(n, 3, 8)
u <- runif(n, 0, 1)
z1 <- rbinom(n, 1, 0.5)
z2 <- runif(n, 0, 1)
t <- rep(NA, n)
beta_0 <- -0.2357
beta_1 <- 1
f <- function(t, beta_0, beta_1, u, z1, z2){
S_0 <- 0.7 * exp(-0.03*t^1.9) + 0.3*exp(-0.3*t^2.5)
exp(-exp(log(-log(S_0))+beta_0*z1 + beta_1*z2))-u
}
for (i in 1:n){
t[i] <- uniroot(f, c(0, 8), tol = .Machine$double.eps^0.5,
beta_0 = beta_0, beta_1 = beta_1, u = u[i],
z1 = z1[i], z2 = z2[i], extendInt = "yes" )$root
}
delta1 <- ifelse(t < c, 1, 0)
u1 <- apply(cbind(t, c), 1, min)
dataSim <- data.frame(u1, delta1, z1, z2)
c <- runif(n, 4, 8)
u <- runif(n, 0, 1)
z <- rbinom(n, 1, 0.5)
beta_0 <- -1.05
t <- rep(NA, n)
f <- function(t, beta_0, u, z){
S_0 <- 0.7 * exp(-0.03*t^1.9) + 0.3*exp(-0.3*t^2.5)
1/(1 + exp(log((1-S_0)/S_0)+beta_0*z))-u
}
for (i in 1:n){
t[i] <- uniroot(f, c(0, 8), tol = .Machine$double.eps^0.5,
beta_0 = beta_0, u = u[i], z = z[i],
extendInt="yes" )$root
}
delta2 <- ifelse(t < c,1, 0)
u2 <- apply(cbind(t, c), 1, min)
dataSim$delta2 <- delta2
dataSim$u2 <- u2
dataSim$z <- z
eq1 <- u1 ~ s(u1, bs = "mpi") + z1 + s(z2)
eq2 <- u2 ~ s(u2, bs = "mpi") + z
eq3 <- ~ s(z2)
out <- gjrm(list(eq1, eq2), data = dataSim, surv = TRUE,
margins = c("PH", "PO"),
cens1 = delta1, cens2 = delta2, Model = "B")
# PH margin fit can also be compared with cox.ph from mgcv
conv.check(out)
res <- post.check(out)
## martingale residuals
mr1 <- out$cens1 - res$qr1
mr2 <- out$cens2 - res$qr2
# can be plotted against covariates
# obs index, survival time, rank order of
# surv times
# to determine func form, one may use
# res from null model against covariate
# to test for PH, use:
# library(survival)
# fit <- coxph(Surv(u1, delta1) ~ z1 + z2, data = dataSim)
# temp <- cox.zph(fit)
# print(temp)
# plot(temp, resid = FALSE)
summary(out)
AIC(out); BIC(out)
plot(out, eq = 1, scale = 0, pages = 1)
plot(out, eq = 2, scale = 0, pages = 1)
hazsurv.plot(out, eq = 1, newdata = data.frame(z1 = 0, z2 = 0),
shade = TRUE, n.sim = 1000)
hazsurv.plot(out, eq = 1, newdata = data.frame(z1 = 0, z2 = 0),
shade = TRUE, n.sim = 1000, type = "hazard")
hazsurv.plot(out, eq = 2, newdata = data.frame(z = 0),
shade = TRUE, n.sim = 1000)
hazsurv.plot(out, eq = 2, newdata = data.frame(z = 0),
shade = TRUE, n.sim = 1000, type = "hazard")
jc.probs(out, type = "joint", intervals = TRUE)[1:5,]
newd0 <- newd1 <- data.frame(z = 0, z1 = mean(dataSim$z1),
z2 = mean(dataSim$z2),
u1 = mean(dataSim$u1) + 1,
u2 = mean(dataSim$u2) + 1)
newd1$z <- 1
jc.probs(out, type = "joint", newdata = newd0, intervals = TRUE)
jc.probs(out, type = "joint", newdata = newd1, intervals = TRUE)
out1 <- gjrm(list(eq1, eq2, eq3), data = dataSim, surv = TRUE,
margins = c("PH", "PO"),
cens1 = delta1, cens2 = delta2, gamlssfit = TRUE,
Model = "B")
# eq1 <- u1 ~ z1 + s(z2)
# eq2 <- u2 ~ z
# eq3 <- ~ s(z2)
# note that Weibull is implemented as AFT model (test case)
# out2 <- gjrm(list(eq1, eq2, ~ 1, ~ 1, eq3), data = dataSim, surv = TRUE,
# margins = c("WEI", "WEI"),
# cens1 = delta1, cens2 = delta2,
# Model = "B")
#########################################
## Joint continuous and survival outcomes
#########################################
# work in progress
#
# eq1 <- z2 ~ z1
# eq2 <- u2 ~ s(u2, bs = "mpi") + z
# eq3 <- ~ s(z2)
# eq4 <- ~ s(z2)
#
# f.l <- list(eq1, eq2, eq3, eq4)
#
# out3 <- gjrm(f.l, data = dataSim, surv = TRUE,
# margins = c("N", "PO"),
# cens1 = NULL, cens2 = delta2,
# gamlssfit = TRUE, Model = "B")
#
# conv.check(out3)
# post.check(out3)
# summary(out3)
# AIC(out3); BIC(out3)
# plot(out3, eq = 2, scale = 0, pages = 1)
# plot(out3, eq = 3, scale = 0, pages = 1)
# plot(out3, eq = 4, scale = 0, pages = 1)
#
# newd <- newd1 <- data.frame(z = 0, z1 = mean(dataSim$z1),
# z2 = mean(dataSim$z2),
# u2 = mean(dataSim$u2) + 1)
#
# jc.probs(out3, y1 = 0.6, type = "joint", newdata = newd, intervals = TRUE)
# }
# NOT RUN {
# }
# NOT RUN {
##########################################
##########################################
##########################################
# JOINT MODELS WITH THREE BINARY MARGINS #
##########################################
##########################################
##########################################
library(GJRM)
############
## Example 9
############
## Generate data
## Correlation between the two equations 0.5 - Sample size 400
set.seed(0)
n <- 400
Sigma <- matrix(0.5, 3, 3); diag(Sigma) <- 1
u <- rMVN(n, rep(0,3), Sigma)
x1 <- round(runif(n)); x2 <- runif(n); x3 <- runif(n)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
y1 <- ifelse(-1.55 + 2*x1 - f1(x2) + u[,1] > 0, 1, 0)
y2 <- ifelse(-0.25 - 1.25*x1 + f2(x2) + u[,2] > 0, 1, 0)
y3 <- ifelse(-0.75 + 0.25*x1 + u[,3] > 0, 1, 0)
dataSim <- data.frame(y1, y2, y3, x1, x2)
f.l <- list(y1 ~ x1 + s(x2),
y2 ~ x1 + s(x2),
y3 ~ x1)
out <- gjrm(f.l, data = dataSim, Model = "T",
margins = c("probit", "probit", "probit"))
out1 <- gjrm(f.l, data = dataSim, Chol = TRUE, Model = "T",
margins = c("probit", "probit", "probit"))
conv.check(out)
summary(out)
plot(out, eq = 1)
plot(out, eq = 2)
AIC(out)
BIC(out)
out <- gjrm(f.l, data = dataSim, Model = "T",
margins = c("probit","logit","cloglog"))
out1 <- gjrm(f.l, data = dataSim, Chol = TRUE, Model = "T",
margins = c("probit","logit","cloglog"))
conv.check(out)
summary(out)
plot(out, eq = 1)
plot(out, eq = 2)
AIC(out)
BIC(out)
f.l <- list(y1 ~ x1 + s(x2),
y2 ~ x1 + s(x2),
y3 ~ x1,
~ 1, ~ 1, ~ 1)
out1 <- gjrm(f.l, data = dataSim, Chol = TRUE, Model = "T",
margins = c("probit", "probit", "probit"))
f.l <- list(y1 ~ x1 + s(x2),
y2 ~ x1 + s(x2),
y3 ~ x1,
~ 1, ~ s(x2), ~ 1)
out2 <- gjrm(f.l, data = dataSim, Chol = TRUE, Model = "T",
margins = c("probit", "probit", "probit"))
f.l <- list(y1 ~ x1 + s(x2),
y2 ~ x1 + s(x2),
y3 ~ x1,
~ x1, ~ s(x2), ~ x1 + s(x2))
out2 <- gjrm(f.l, data = dataSim, Chol = TRUE, Model = "T",
margins = c("probit", "probit", "probit"))
f.l <- list(y1 ~ x1 + s(x2),
y2 ~ x1 + s(x2),
y3 ~ x1,
~ x1, ~ x1, ~ s(x2))
out2 <- gjrm(f.l, data = dataSim, Chol = TRUE, Model = "T",
margins = c("probit", "probit", "probit"))
f.l <- list(y1 ~ x1 + s(x2),
y2 ~ x1 + s(x2),
y3 ~ x1,
~ x1, ~ x1 + x2, ~ s(x2))
out2 <- gjrm(f.l, data = dataSim, Chol = TRUE, Model = "T",
margins = c("probit", "probit", "probit"))
f.l <- list(y1 ~ x1 + s(x2),
y2 ~ x1 + s(x2),
y3 ~ x1,
~ x1 + x2, ~ x1 + x2, ~ x1 + x2)
out2 <- gjrm(f.l, data = dataSim, Chol = TRUE, Model = "T",
margins = c("probit", "probit", "probit"))
jcres1 <- jc.probs(out2, 1, 1, 1, type = "joint", cond = 0,
intervals = TRUE, n.sim = 100)
nw <- data.frame( x1 = 0, x2 = seq(0, 1, length.out = 100) )
jcres2 <- jc.probs(out2, 1, 1, 1, newdata = nw, type = "joint",
cond = 0, intervals = TRUE, n.sim = 100)
#############
## Example 10
#############
## Generate data
## with double sample selection
set.seed(0)
n <- 5000
Sigma <- matrix(c(1, 0.5, 0.4,
0.5, 1, 0.6,
0.4, 0.6, 1 ), 3, 3)
u <- rMVN(n, rep(0,3), Sigma)
f1 <- function(x) cos(pi*2*x) + sin(pi*x)
f2 <- function(x) x+exp(-30*(x-0.5)^2)
x1 <- runif(n)
x2 <- runif(n)
x3 <- runif(n)
x4 <- runif(n)
y1 <- 1 + 1.5*x1 - x2 + 0.8*x3 - f1(x4) + u[, 1] > 0
y2 <- 1 - 2.5*x1 + 1.2*x2 + x3 + u[, 2] > 0
y3 <- 1.58 + 1.5*x1 - f2(x2) + u[, 3] > 0
dataSim <- data.frame(y1, y2, y3, x1, x2, x3, x4)
f.l <- list(y1 ~ x1 + x2 + x3 + s(x4),
y2 ~ x1 + x2 + x3,
y3 ~ x1 + s(x2))
out <- gjrm(f.l, data = dataSim, Model = "TSS",
margins = c("probit", "probit", "probit"))
conv.check(out)
summary(out)
plot(out, eq = 1)
plot(out, eq = 3)
prev(out)
prev(out, type = "univariate")
prev(out, type = "naive")
# }
# NOT RUN {
# }
# NOT RUN {
###################################################
###################################################
###################################################
# JOINT MODELS WITH BINARY AND CONTINUOUS MARGINS #
# WITH SAMPLE SELECTION #
###################################################
###################################################
###################################################
library(GJRM)
######################################################################
## Generate data
## Correlation between the two equations and covariate correlation 0.5
## Sample size 2000
######################################################################
#############
## Example 11
#############
set.seed(0)
n <- 2000
rh <- 0.5
sigmau <- matrix(c(1, rh, rh, 1), 2, 2)
u <- rMVN(n, rep(0,2), sigmau)
sigmac <- matrix(rh, 3, 3); diag(sigmac) <- 1
cov <- rMVN(n, rep(0,3), sigmac)
cov <- pnorm(cov)
bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3]
f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x))
f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x))
f21 <- function(x) 0.6*(exp(x) + sin(2.9*x))
ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0
y <- -0.68 - 1.5*bi + f21(x1) + u[, 2]
yo <- y*(ys > 0)
dataSim <- data.frame(ys, yo, bi, x1, x2)
## CLASSIC SAMPLE SELECTION MODEL
## the first equation MUST be the selection equation
resp.check(yo[ys > 0], "N")
out <- gjrm(list(ys ~ bi + x1 + x2,
yo ~ bi + x1),
data = dataSim, Model = "BSS",
margins = c("probit", "N"))
conv.check(out)
post.check(out)
summary(out)
AIC(out)
BIC(out)
## SEMIPARAMETRIC SAMPLE SELECTION MODEL
out <- gjrm(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, Model = "BSS",
margins = c("probit", "N"))
conv.check(out)
post.check(out)
AIC(out)
## compare the two summary outputs
## the second output produces a summary of the results obtained when only
## the outcome equation is fitted, i.e. selection bias is not accounted for
summary(out)
summary(out$gam2)
## estimated smooth function plots
## the red line is the true curve
## the blue line is the naive curve not accounting for selection bias
x1.s <- sort(x1[dataSim$ys>0])
f21.x1 <- f21(x1.s)[order(x1.s)] - mean(f21(x1.s))
plot(out, eq = 2, ylim = c(-1, 0.8)); lines(x1.s, f21.x1, col = "red")
par(new = TRUE)
plot(out$gam2, se = FALSE, lty = 3, lwd = 2, ylim = c(-1, 0.8),
ylab = "", rug = FALSE)
## IMPUTE MISSING VALUES
n.m <- 10
res <- imputeSS(out, n.m)
bet <- NA
for(i in 1:n.m){
dataSim$yo[dataSim$ys == 0] <- res[[i]]
outg <- gamlss(list(yo ~ bi + s(x1)), data = dataSim)
bet[i] <- coef(outg)["bi"]
print(i)
}
mean(bet)
##
## SEMIPARAMETRIC SAMPLE SELECTION MODEL with association
## and dispersion parameters
## depending on covariates as well
eq.mu.1 <- ys ~ bi + s(x1) + s(x2)
eq.mu.2 <- yo ~ bi + s(x1)
eq.sigma2 <- ~ bi
eq.theta <- ~ bi + x1
fl <- list(eq.mu.1, eq.mu.2, eq.sigma2, eq.theta)
out <- gjrm(fl, data = dataSim, Model = "BSS",
margins = c("probit", "N"))
conv.check(out)
post.check(out)
summary(out)
out$sigma2
out$theta
jc.probs(out, 0, 0.3, intervals = TRUE)[1:4,]
outC0 <- gjrm(fl, data = dataSim, BivD = "C0", Model = "BSS",
margins = c("probit", "N"))
conv.check(outC0)
post.check(outC0)
AIC(out, outC0)
BIC(out, outC0)
## IMPUTE MISSING VALUES
n.m <- 10
res <- imputeSS(outC0, n.m)
###############
# or using MICE
###############
library(mice)
ys <- dataSim$ys
dataSim$yo[dataSim$ys == FALSE] <- NA
dataSim <- dataSim[, -1]
imp <- mice(dataSim, m = 1, method = c("copulaSS", "", "", ""),
mice.formula = outC0$mice.formula, margins = outC0$margins,
BivD = outC0$BivD, maxit = 1)
comp.yo <- dataSim$yo
comp.yo[ys == 0] <- imp$imp$yo[[1]]
mean(comp.yo)
#
#
#######################################################
## example using Gumbel copula and normal-gamma margins
#######################################################
#############
## Example 12
#############
set.seed(1)
y <- exp(-0.68 - 1.5*bi + f21(x1) + u[, 2])
yo <- y*(ys > 0)
dataSim <- data.frame(ys, yo, bi, x1, x2)
out <- gjrm(list(ys ~ bi + s(x1) + s(x2),
yo ~ bi + s(x1)),
data = dataSim, BivD = "G0",
margins = c("probit", "GA"),
Model = "BSS")
conv.check(out)
post.check(out)
summary(out)
ATE <- NA
n.m <- 10
res <- imputeSS(out, n.m)
for(i in 1:n.m){
dataSim$yo[dataSim$ys == 0] <- res[[i]]
outg <- gamlss(list(yo ~ bi + s(x1)), margin = "GA", data = dataSim)
out$gamlss <- outg
ATE[i] <- AT(out, nm.end = "bi", type = "univariate")$res[2]
print(i)
}
AT(out, nm.end = "bi")
mean(ATE)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab