spCopulaDDP: A Spatial Copula Approach to Fully Bayesian Nonparametric Survival Analysis

Description

This function fits a spatial copula survival model for survival analysis of point-referenced right censored time-to-event data.

Usage

spCopulaDDP(y, delta, x=NULL, s, prediction, prior, mcmc, state, FSA = TRUE, knots,
         data=sys.frame(sys.parent()), na.action=na.fail, work.dir=NULL)

Arguments

an n by 1 vector giving the log survival times.

delta

an n by 1 vector indicating whether it is right censored (=0) or not (=1).

an n by p matrix of covariates without intercept. The default is NULL, indicating no covariates included.

an n by d matrix of UMT coordinates, where d is the dimension of space.

prediction

a list giving the information used to obtain conditional inferences. The list includes the following elements: spred and xpred giving the n by 2 new locations and corr

prior

a list giving the prior information. The list includes the following parameter: N an integer giving the truncation of the Dirichlet process, a0 and b0 giving the hyperpara

mcmc

a list giving the MCMC parameters. The list must include the following elements: nburn an integer giving the number of burn-in scans, nskip an integer giving the thinning interval,

state

a list giving the current value of the parameters. This list is used if the current analysis is the continuation of a previous analysis.

FSA

indicate if the full scale approximation is need. The default is FALSE.

knots

a list giving the knots and block ids when FSA=TRUE. This list includes the following parameter: ss an m by d matrix of UMT coordinates, where d is the dimension of space, blockid

data

data frame.

na.action

a function that indicates what should happen when the data contain NAs. The default action (na.fail) causes spCopulaDDP to print an error message and terminate if there ar

work.dir

working directory.

Value

The results include the MCMC chains for the parameters discussed in Zhou, Hanson and Knapp (2014+). Use names to find out what they are.

Details

This generic function fits a spatial copula survival model (Zhou, Hanson and Knapp, 2014+), for (potentially) point-referenced right-censored data.

References

Zhou, H., Hanson, T. and Knapp, R. (2014+). A Spatial Copula Approach to Fully Bayesian Nonparametric Survival Analysis. In preparation for publication.

Examples

Run this code

###############################################################
# A simulated data: mixture of two normals
###############################################################
rm(list=ls())
library(MASS)
library(Rcpp)
library(RcppArmadillo)
library(coda)
library(survival)
library(spBayesSurv)

## True parameters 
betaT = cbind(c(3.5, 0.5), c(2.5, -1)); 
wT = c(0.4, 0.6); 
sig2T = c(1^2, 0.5^2);
theta1 = 0.98; theta2 = 0.1;

## generate coordinates: 
## npred is the # of locations for prediction
n = 300; npred = 30; ntot = n + npred;
ldist = 100; wdist = 40;
s1 = runif(ntot, 0, wdist); s2 = runif(ntot, 0, ldist);
s = rbind(s1,s2);
#plot(s[1,], s[2,]);
## divide them into blocks
nldist=10; nwdist=4;
nb=nldist*nwdist; nb; # number of blocks;
coor = matrix(0, nb, 4); ## four edges for each block;
tempindex=1; lstep=ldist/nldist; wstep=wdist/nwdist;
for(i in 1:nwdist){
  for(j in 1:nldist){
    coor[tempindex,] = c((i-1)*wstep, i*wstep, (j-1)*lstep, j*lstep );  
    tempindex = tempindex + 1;
  }
}
## Assign block id for each location
blockid = rep(NA,ntot);
for(i in 1:nb){
  blockid[((s1>coor[i,1])*(s1<=coor[i,2])*(s2>coor[i,3])*(s2<=coor[i,4]))==1]=i;
}
## Choose knots S*
nldist=15; nwdist=6;
m=nldist*nwdist; m; # number of knots;
ss = matrix(0, m, 2); 
tempindex=1; lstep=ldist/nldist; wstep=wdist/nwdist;
for(i in 1:nwdist){
  for(j in 1:nldist){
    ss[tempindex,] = c( (i-1)*wstep+wstep/2, (j-1)*lstep+lstep/2);  
    tempindex = tempindex + 1;
  }
}
## Covariance matrix
dnn = .Call("DistMat", s, s, PACKAGE = "spBayesSurv");
corT = theta1*exp(-theta2*dnn)+(1-theta1)*diag(ntot);

## Generate x 
x = runif(ntot,-1.5,1.5);
X = cbind(rep(1,ntot), x);
p = ncol(X); # number of covariates + 1
## Generate transformed log of survival times
z = mvrnorm(1, rep(0, ntot), corT);
## The pdf of Ti:
fi = function(y, xi, w=wT){
  nw = length(w);
  ny = length(y);
  res = matrix(0, ny, nw);
  Xi = c(1,xi);
  for (k in 1:nw){
    res[,k] = w[k]*dnorm(y, sum(Xi*betaT[,k]), sqrt(sig2T[k]) )
  }
  apply(res, 1, sum)
}
## true plot
xx = seq(-2, 7, 0.01)
plot(xx, fi(xx, -1), "l", lwd=2, col=2)
lines(xx, fi(xx, 1), "l", lwd=2, col=3)
## The CDF of Ti:
Fi = function(y, xi, w=wT){
  nw = length(w);
  ny = length(y);
  res = matrix(0, ny, nw);
  Xi = c(1,xi);
  for (k in 1:nw){
    res[,k] = w[k]*pnorm(y, sum(Xi*betaT[,k]), sqrt(sig2T[k]) )
  }
  apply(res, 1, sum)
}
## The inverse for CDF of Ti
Finvsingle = function(u, xi) {
  res = uniroot(function (x) Fi(x, xi)-u, lower=-500, upper=500);
  res$root
}
Finv = function(u, xi) {sapply(u, Finvsingle, xi)};
## Generate log of survival times y
u = pnorm(z);
y = rep(0, ntot);
for (i in 1:ntot){
  y[i] = Finv(u[i], x[i]);
}
#plot(x,y);
yTrue = y;

## Censoring scheme
Centime = runif(ntot, 3.5,5);
Centime = 10000;
delta = (y<=Centime) +0 ;
sum(delta)/ntot;
cen = which(delta==0);
y[cen] = Centime[cen];

## make a data frame
dtotal = data.frame(s1=s1, s2=s2, y=y, x=x, delta=delta, yTrue=yTrue, id=blockid);
## Hold out npred=30 for prediction purpose
predindex = sample(1:ntot, npred);
dpred = dtotal[predindex,];
dtrain = dtotal[-predindex,];

# rename the variables 
d = dtrain; n=nrow(d); n;
s = cbind(d$s1, d$s2);
y = d$y;
x = d$x;
delta =d$delta;

# FSA settings
knots = list(ss=ss, blockid=d$id);

# Prediction settings 
xpred = dpred$x;
s0 = cbind( dpred$s1, dpred$s2 );
prediction = list(spred=s0, xpred=xpred, predid=dpred$id);

###############################################################
# spatial copula DDP 
###############################################################
# MCMC parameters
nburn <- 1000
nsave <- 500
nskip <- 4
ndisplay <- 100
mcmc <- list(nburn=nburn,
             nsave=nsave,
             nskip=nskip,
             ndisplay=ndisplay)

# Prior information
prior = list(N = 10,
             m0 = rep(0,p), 
             S0 = diag(rep(1e5,p), nrow=p, ncol=p), 
             Sig0 = diag(rep(1e5,p), nrow=p, ncol=p), 
             k0 = 7,
             nua = 2, nub = 1, 
             a0 = 1, b0 = 1,
             spS0 = diag(c(4,4)), 
             spadapter = 1,
             theta0 = c(1.0, 1.0, 0.5, 0.2));

# current state values
state <- list(theta=c(0.95, 0.5));

# Fit the model
fitspDDP = spCopulaDDP( y = y,
              delta =delta, 
              x = x, 
              s = s, 
              prediction=prediction, 
              prior=prior, 
              mcmc=mcmc,
              state=state,
              FSA=FALSE,
              knots=knots);
res = fitspDDP; remove(fitspDDP);
# trace plots
par(mfrow = c(3,2))
w.save2 = res$w;
Kindex = which.max(rowMeans(w.save2));
traceplot(mcmc(w.save2[Kindex,]), main="w")
sig2.save2 = res$sigma2;
traceplot(mcmc(sig2.save2[Kindex,]), main="sig2")
beta.save2 = res$beta;
alpha.save2 = res$alpha;
traceplot(mcmc(beta.save2[2,Kindex,]), main="beta")
traceplot(mcmc(alpha.save2), main="alpha")
theta1.save2 = res$theta1;
theta2.save2 = res$theta2
traceplot(mcmc(theta1.save2), main="theta1")
traceplot(mcmc(theta2.save2), main="theta2")

## LPML
LPML2 = sum(log(res$cpo)); LPML2;
## MSPE
mean((dpred$yTrue-apply(res$Ypred, 1, median))^2); 

## plots
par(mfrow = c(2,2));
xnew = c(-1, 1);
xpred = cbind(xnew); 
nxpred = nrow(xpred);
ygrid = seq(0,6.0,0.05); tgrid = exp(ygrid);
ngrid = length(ygrid);
estimates = GetCurves(res, xpred, ygrid, CI=c(0.05, 0.95));
fhat = estimates$fhat; 
Shat = estimates$Shat;
## density in y
plot(ygrid, fi(ygrid, xnew[1]), "l", lwd=2, ylim=c(0, 0.8), 
      xlim=c(0,6), main="density in y")
for(i in 1:nxpred){
  lines(ygrid, fi(ygrid, xnew[i]), lwd=2)
  lines(ygrid, fhat[,i], lty=2, lwd=2, col=4);
}
## survival in y
plot(ygrid, 1-Fi(ygrid, xnew[1]), "l", lwd=2, ylim=c(0, 1), 
      xlim=c(0,6), main="survival in y")
for(i in 1:nxpred){
  lines(ygrid, 1-Fi(ygrid, xnew[i]), lwd=2)
  lines(ygrid, Shat[,i], lty=2, lwd=2, col=4);
}
## density in t
plot(tgrid, fi(ygrid, xnew[1])/tgrid, "l", lwd=2, ylim=c(0, 0.15), 
      xlim=c(0,100), main="density in t")
for(i in 1:nxpred){
  lines(tgrid, fi(ygrid, xnew[i])/tgrid, lwd=2)
  lines(tgrid, fhat[,i]/tgrid, lty=2, lwd=2, col=4);
}
## survival in t
plot(tgrid, 1-Fi(ygrid, xnew[1]), "l", lwd=2, ylim=c(0, 1), 
      xlim=c(0,100), main="survival in t")
for(i in 1:nxpred){
  lines(tgrid, 1-Fi(ygrid, xnew[i]), lwd=2)
  lines(tgrid, Shat[,i], lty=2, lwd=2, col=4);
}

Run the code above in your browser using DataLab