library(survival)
library(prodlim)
library(data.table)
############################
#### Survival settings  ####
#### ATE with Cox model ####
############################
#### generate data ####
n <- 100
set.seed(10)
dtS <- sampleData(n, outcome="survival")
dtS$time <- round(dtS$time,1)
dtS$X1 <- factor(rbinom(n, prob = c(0.3,0.4) , size = 2), labels = paste0("T",0:2))
##### estimate the survival model ####
## Cox proportional hazard model
fit.Cox <- coxph(Surv(time,event)~ X1+X2, data=dtS, y=TRUE, x=TRUE)
## Kaplan Meier - same as copxh(Surv(time,event)~ strata(X1)+strata(X2), ties = "breslow")
fit.KM <- prodlim(Hist(time,event)~ X1+X2, data=dtS)
##### compute the ATE ####
## at times 5, 6, 7, and 8 using X1 as the treatment variable
## standard error computed using the influence function
## confidence intervals / p-values based on asymptotic results
ateFit1a <- ate(fit.Cox, treatment = "X1", times = 5:8, data = dtS)
summary(ateFit1a)
model.tables(ateFit1a, type = "meanRisk")
model.tables(ateFit1a, type = "diffRisk")
model.tables(ateFit1a, type = "ratioRisk")
## relaxing PH assumption using a stratified model
ateFit1a.NPH <- ate(fit.KM, treatment = "X1", times = 5:8, data = dtS,
                    product.limit = FALSE)
model.tables(ateFit1a.NPH, times = 5) ## no PH assumption
model.tables(ateFit1a, times = 5)  ## PH assumption
if (FALSE) {
#### ATE with confidence bands ####
## same as before with in addition the confidence bands / adjusted p-values
## (argument band = TRUE)
ateFit1b <- ate(fit.Cox, treatment = "X1", times = 5:8, data = dtS, band = TRUE)
summary(ateFit1b)
## by default bands/adjuste p-values computed separately for each treatment modality
summary(ateFit1b, band = 1,
        se = FALSE, type = "diffRisk", short = TRUE, quantile = TRUE)
## adjustment over treatment and time using the band argument of confint
summary(ateFit1b, band = 2,
       se = FALSE, type = "diffRisk", short = TRUE, quantile = TRUE)
}
if (FALSE) {
#### ATE with non-parametric bootstrap ####
## confidence intervals / p-values computed using 1000 bootstrap samples
## (argument se = TRUE and B = 1000) 
ateFit1c <- ate(fit.Cox, treatment = "X1", times = 5:8, data = dtS,
                se = TRUE, B = 50, handler = "mclapply")
## NOTE: for real applications 50 bootstrap samples is not enough 
## same but using 2 cpus for generating and analyzing the bootstrap samples
## (parallel computation, argument mc.cores = 2) 
ateFit1d <- ate(fit.Cox, treatment = "X1", times = 5:8, data = dtS, 
                se = TRUE, B = 50, mc.cores = 2)
## manually defining the cluster to be used
## useful when specific packages need to be loaded in each cluster
fit.CoxNL <- coxph(Surv(time,event)~ X1+X2+rcs(X6),data=dtS,y=TRUE,x=TRUE)
cl <- parallel::makeCluster(2)
parallel::clusterEvalQ(cl, library(rms))
ateFit1e <- ate(fit.CoxNL, treatment = "X1", times = 5:8, data = dtS, 
                se = TRUE, B = 50, handler = "foreach", cl = cl)
parallel::stopCluster(cl)
}
################################################
#### Competing risks settings               ####
#### ATE with cause specific Cox regression ####
################################################
#### generate data ####
n <- 500
set.seed(10)
dt <- sampleData(n, outcome="competing.risks")
dt$X1 <- factor(rbinom(n, prob = c(0.2,0.3) , size = 2), labels = paste0("T",0:2))
#### estimate cause specific Cox model ####
fit.CR <-  CSC(Hist(time,event)~ X1+X8,data=dt,cause=1)
#### compute the ATE ####
## at times 1, 5, 10
## using X1 as the treatment variable
ateFit2a <- ate(fit.CR, treatment = "X1", times = c(1,5,10), data = dt, 
                cause = 1, se = TRUE, band = TRUE)
summary(ateFit2a)
data.table::as.data.table(ateFit2a)
#############################################
#### Survival settings without censoring ####
#### ATE with glm                        ####
#############################################
#### generate data ####
n <- 100
dtB <- sampleData(n, outcome="binary")
dtB[, X2 := as.numeric(X2)]
##### estimate a logistic regression model ####
fit.glm <- glm(formula = Y ~ X1+X2, data=dtB, family = "binomial")
#### compute the ATE ####
## using X1 as the treatment variable
## only point estimate (argument se = FALSE)
ateFit1a <- ate(fit.glm, treatment = "X1", data = dtB, se = FALSE)
ateFit1a
if (FALSE) {
## with confidence intervals
ateFit1b <- ate(fit.glm, data = dtB, treatment = "X1",
               times = 5) ## just for having a nice output not used in computations
summary(ateFit1b, short = TRUE)
## using the lava package
library(lava)
ateLava <- estimate(fit.glm, function(p, data){
a <- p["(Intercept)"] ; b <- p["X11"] ; c <- p["X2"] ;
R.X11 <- expit(a + b + c * data[["X2"]])
R.X10 <- expit(a + c * data[["X2"]])
list(risk0=R.X10,risk1=R.X11,riskdiff=R.X11-R.X10)},
average=TRUE)
ateLava
}
############################
#### Survival settings  ####
#### ATE with glm       ####
############################
## see wglm for handling right-censoring with glm
#################################
#### Double robust estimator ####
#################################
if (FALSE) {
## generate data
n <- 500
set.seed(10)
dt <- sampleData(n, outcome="competing.risks")
dt$X1 <- factor(rbinom(n, prob = c(0.4) , size = 1), labels = paste0("T",0:1))
## working models
m.event <-  CSC(Hist(time,event)~ X1+X2+X3+X5+X8,data=dt)
m.censor <-  coxph(Surv(time,event==0)~ X1+X2+X3+X5+X8,data=dt, x = TRUE, y = TRUE)
m.treatment <-  glm(X1~X2+X3+X5+X8,data=dt,family=binomial(link="logit"))
## prediction + average
ateRobust <- ate(event = m.event,
                 treatment = m.treatment,
                 censor = m.censor,
                 data = dt, times = 5:10, 
                 cause = 1)
summary(ateRobust)
## compare various estimators
system.time( ## about 1.5s
ateRobust2 <- ate(event = m.event,
                 treatment = m.treatment,
                 censor = m.censor,
                 estimator = c("GFORMULA","IPTW","AIPTW"),
                 data = dt, times = c(5:10), 
                 cause = 1, se = TRUE)
)
data.table::as.data.table(ateRobust2, type = "meanRisk")
data.table::as.data.table(ateRobust2, type = "diffRisk")
## reduce memory load by computing the integral term
## on only 100 observations at a time (only relevant when iid = FALSE)
ateRobust3 <- ate(event = m.event,
                 treatment = m.treatment,
                 censor = m.censor,
                 data = dt, times = 5:10, 
                 cause = 1, se = FALSE,
                 store = c("size.split" = 100), verbose = 2)
coef(ateRobust3) - coef(ateRobust) ## same
## approximation to speed up calculations
dt$time.round <- round(dt$time/0.5)*0.5 ## round to the nearest half
dt$time.round[dt$time.round==0] <- 0.1 ## ensure strictly positive event times
mRound.event <-  CSC(Hist(time.round,event)~ X1+X2+X3+X5+X8,data=dt)
mRound.censor <-  coxph(Surv(time.round,event==0)~ X1+X2+X3+X5+X8,data=dt, x = TRUE, y = TRUE)
system.time( ## about 0.4s
ateRobustFast <- ate(event = mRound.event, treatment = m.treatment,
                    censor = mRound.censor,
                 estimator = c("GFORMULA","IPTW","AIPTW"), product.limit = FALSE,
                 data = dt, times = c(5:10), cause = 1, se = TRUE)
)
coef(ateRobustFast) - coef(ateRobust) ## inaccuracy
}
Run the code above in your browser using DataLab