set.seed(50)
N=500
# cohort analysis
dat <- data.frame(id = 1:N, time=(tmg <- pmin(.1,rweibull(N, 10, 0.1))),
d=1.0*(tmg<0.1), x1=runif(N), x2=runif(N), z=rbinom(N, 1, 0.5))
expnms=paste0("x", 1:2)
f1 = survival::Surv(time, d)~x1 + x2 + z
(fit1 <- survival::coxph(f1, data = dat))
(obj <- qgcomp.cox.noboot(f1, expnms = expnms, data = dat))
f1s = survival::Surv(time, d)~x1 + x2 + strata(z)
(fit1s <- survival::coxph(f1s, data = dat))
(objs <- qgcomp.cox.noboot(f1s, expnms = expnms, data = dat))
#### now doing a case-cohort analysis
# 1) sampling simple case-cohort data
dat$subcohort = 1:nrow(dat) %in% sort(sample(1:nrow(dat), 100))
caco_dat = dat[dat$subcohort | dat$d,]
dim(caco_dat)
dim(dat)
# getting quantile categories from the subcohort
qdata = quantize(caco_dat[caco_dat$subcohort,], expnms=expnms)
qdata$breaks
# 2) doing simple (unstratified) analysis
f2 = survival::Surv(time, d)~x1 + x2 + z
(obj2 <- qgcomp.cch.noboot(f2, expnms = expnms, breaks = qdata$breaks,
data = caco_dat, subcoh = ~ subcohort, id = ~id, cohort.size=N))
obj2$fit
### doing stratified analysis (if subcohort and/or cases are a stratified sample)
# 1) sampling stratified case-cohort data
sampfracs = c(.25, .75) # z=0 vs. z=1
nco = 100 # total subcohort members
nca = nrow(dat[dat$d==1,])
selected_ids = sort(c(sample(dat[dat$z==0, "id"], round(nco*sampfracs[1])),
sample(dat[dat$z==1, "id"], round(nco*sampfracs[2]))))
selected_cases = sort(c(sample(dat[dat$d==1 & dat$z==0, "id"], round(nca*sampfracs[1])),
sample(dat[dat$d==1 & dat$z==1, "id"], round(nca*sampfracs[1]))))
dat$subcohort = dat$id %in% selected_ids
dat$selectedcases = dat$id %in% selected_cases
caco_dat_strat = dat[dat$subcohort | dat$selectedcases,]
dim(caco_dat_strat)
dim(dat)
# getting quantile categories from the subcohort by differential sampling across strata
subco_strat = caco_dat_strat[caco_dat_strat$subcohort,]
z_stratum_sizes = table(dat$z)
z_stratum_sizes_caco = table(subco_strat$z)
sampweights = z_stratum_sizes/z_stratum_sizes_caco
sampweightsn = sampweights/(min(sampweights))
# now oversample the undersampled into a dataset used to create cutpoints
wtdcutids = data.frame(id=sort(c(sample(subco_strat[subco_strat$z==0,"id"],
sampweightsn[1]*z_stratum_sizes_caco[1], replace=TRUE),
sample(subco_strat[subco_strat$z==1,"id"],
sampweightsn[2]*z_stratum_sizes_caco[2]))))
cutdata = merge(wtdcutids,subco_strat, all.x=TRUE)
qdata_strat = quantize(cutdata, expnms=expnms)
qdata_strat$breaks
f2s = survival::Surv(time, d)~x1 + x2
(obj2s <- qgcomp.cch.noboot(f2s, expnms = expnms, breaks = qdata_strat$breaks,
data = caco_dat_strat, subcoh = ~ subcohort, id = ~id,
stratum=~z,
cohort.size=z_stratum_sizes, method="I.Borgan"))
obj2s$fit
Run the code above in your browser using DataLab