anovaDDP: Bayesian Nonparametric Survival Model

Description

This function fits a Bayesian nonparametric model for non-spatial right censored time-to-event data.

Usage

anovaDDP(formula, data, na.action, prediction=NULL,
         mcmc=list(nburn=3000, nsave=2000, nskip=0, ndisplay=500),
         prior=NULL, state=NULL, scale.designX=TRUE)

Arguments

formula

a formula expression with the response returned by the Surv function in the survival package. It currently only supports right-censoring.

data

a data frame in which to interpret the variables named in the formula argument.

na.action

a missing-data filter function, applied to the model.frame.

prediction

a list giving the information used to obtain conditional inferences. The list includes the following element: xpred giving the npred by p covariates matrix, used for prediction. If prediction=NULL, xpred will be set to be the design matrix used in formula.

mcmc

a list giving the MCMC parameters. The list must include the following elements: nburn an integer giving the number of burn-in scans, nskip an integer giving the thinning interval, nsave an integer giving the total number of scans to be saved, ndisplay an integer giving the number of saved scans to be displayed on screen (the function reports on the screen when every ndisplay iterations have been carried out).

prior

a list giving the prior information. The list includes the following elements: N an integer giving the truncation of the Dirichlet process. See Zhou, Hanson and Zhang (2017) for more detailed hyperprior specifications.

state

a list giving the current value of the parameters. This list is used if the current analysis is the continuation of a previous analysis.

scale.designX

flag to indicate wheter the design matrix X will be centered by column means and scaled by column standard deviations, where TRUE indicates yes. The default is TRUE for improving numerical stability. All returned posterior samples fit from scaled covariates. Note if we want to specify informative priors for regression coefficients, these priors should correspond to scaled predictors when scale.designX=TRUE.

Value

The results include the MCMC chains for the parameters discussed in Zhou, Hanson and Zhang (2017). Use names to find out what they are.

Details

This function fits a Bayesian Nonparametric model (De Iorio et al., 2009) for non-spatial right censored time-to-event data.

References

Zhou, H., Hanson, T., and Knapp, R. (2015). Marginal Bayesian nonparametric model for time to disease arrival of threatened amphibian populations. Biometrics, 71(4): 1101-1110.

De Iorio, M., Johnson, W. O., Mueller, P., and Rosner, G. L. (2009). Bayesian nonparametric nonproportional hazards survival modeling. Biometrics, 65(3): 762-771.

Examples

Run this code

# NOT RUN {
###############################################################
# A simulated data: mixture of two normals
###############################################################
rm(list=ls())
library(survival)
library(spBayesSurv)
library(coda)
## True parameters 
betaT = cbind(c(3.5, 0.5), c(2.5, -1)); 
wT = c(0.4, 0.6); 
sig2T = c(1^2, 0.5^2);
n=100; 
## The Survival function for log survival times:
fiofy = function(y, xi, w=wT){
  nw = length(w);
  ny = length(y);
  res = matrix(0, ny, nw);
  Xi = c(1,xi);
  for (k in 1:nw){
    res[,k] = w[k]*dnorm(y, sum(Xi*betaT[,k]), sqrt(sig2T[k]) )
  }
  apply(res, 1, sum)
}
fioft = function(t, xi, w=wT) fiofy(log(t), xi, w)/t;
Fiofy = function(y, xi, w=wT){
  nw = length(w);
  ny = length(y);
  res = matrix(0, ny, nw);
  Xi = c(1,xi);
  for (k in 1:nw){
    res[,k] = w[k]*pnorm(y, sum(Xi*betaT[,k]), sqrt(sig2T[k]) )
  }
  apply(res, 1, sum)
}
Fioft = function(t, xi, w=wT) Fiofy(log(t), xi, w);
## The inverse for Fioft
Finv = function(u, x) uniroot(function (y) Fiofy(y,x)-u, lower=-250, 
                              upper=250, extendInt ="yes", tol=1e-6)$root

## generate x 
x1 = runif(n,-1.5,1.5); X = cbind(x1);
## generate survival times
u = runif(n);
tT = rep(0, n);
for (i in 1:n){
  tT[i] = exp(Finv(u[i], X[i,]));
}

### ----------- right-censored -------------###
t_obs=tT 
Centime = runif(n, 20, 200);
delta = (tT<=Centime) +0 ; 
length(which(delta==0))/n; # censoring rate
rcen = which(delta==0);
t_obs[rcen] = Centime[rcen]; ## observed time 
## make a data frame
d = data.frame(tobs=t_obs, x1=x1, delta=delta, tT=tT); 
table(d$delta)/n;

###############################################################
# Independent DDP: Bayesian Nonparametric Survival Model
###############################################################
# MCMC parameters
nburn=500; nsave=500; nskip=0;
# Note larger nburn, nsave and nskip should be used in practice.
mcmc=list(nburn=nburn, nsave=nsave, nskip=nskip, ndisplay=1000);
prior = list(N=10, a0=2, b0=2);
# Fit the Cox PH model
res1 = anovaDDP(formula = Surv(tobs, delta)~x1, data=d, 
                prior=prior, mcmc=mcmc);
## LPML
LPML = sum(log(res1$cpo)); LPML;
## Number of non-negligible components
quantile(colSums(res1$w>0.05))

############################################
## Curves
############################################
ygrid = seq(0,6.0,length=100); tgrid = exp(ygrid);
ngrid = length(tgrid);
xpred = rbind(-1, 1); 
estimates=plot(res1, xpred=xpred, tgrid=tgrid);

## plot
par(mfrow = c(1,2))
plot(tgrid, 1-Fioft(tgrid, xpred[1,]), "l", lwd=3, 
     main="Survival function", xlab="time", ylab="survival");
lines(tgrid, 1-Fioft(tgrid, xpred[2,]), "l", lwd=3);
lines(estimates$tgrid, estimates$Shat[,1], lty=2, lwd=3)
lines(estimates$tgrid, estimates$Shatlow[,1], lty=3, lwd=1)
lines(estimates$tgrid, estimates$Shatup[,1], lty=3, lwd=1)
lines(estimates$tgrid, estimates$Shat[,2], lty=2, lwd=3)
lines(estimates$tgrid, estimates$Shatlow[,2], lty=3, lwd=1)
lines(estimates$tgrid, estimates$Shatup[,2], lty=3, lwd=1)
plot(log(tgrid), fiofy(log(tgrid), xpred[1,]), "l", lwd=3, 
     main="Density of log time", xlab="log time", ylab="survival");
lines(log(tgrid), fiofy(log(tgrid), xpred[2,]), "l", lwd=3);
lines(log(estimates$tgrid), estimates$fhat[,1]*tgrid, lty=2, lwd=3)
lines(log(estimates$tgrid), estimates$fhatlow[,1]*tgrid, lty=3, lwd=1)
lines(log(estimates$tgrid), estimates$fhatup[,1]*tgrid, lty=3, lwd=1)
lines(log(estimates$tgrid), estimates$fhat[,2]*tgrid, lty=2, lwd=3)
lines(log(estimates$tgrid), estimates$fhatlow[,2]*tgrid, lty=3, lwd=1)
lines(log(estimates$tgrid), estimates$fhatup[,2]*tgrid, lty=3, lwd=1)
# }

Run the code above in your browser using DataLab