## ================================================================================ ##
## generate a data based on the semi-parametric negative binomial
## mixed-effect AR(1) model.
## Under this model, the response counts follows the negative binomial:
## Y_ij | G_i = g_i ~ NB(r_ij,p_i) where r_ij = exp(X^T beta)/a , p_i =1/(a*g_i+1)
## G_i is from unknown distribution.
## For simulation purpose, we generate the sample of gi from
## the mixture of three gamma distribuions.
## The adjacent repeated measures of the same subjects are correlated
## with correlation structure:
## cov(Y_ij,Y_ij'|G_i=g_i)=d^{j-j'} E(Y_ij')*(a*g_i^2+g_i)
# log(a) = -0.5, log(th)=1.3, logit(delta) = -0.2
# b0 = 0.5, no covariates;
loga = -0.5
logtheta = 1.3
logitd = -0.2
b0 = 0.5
# 80 subjects each with 5 scans
n = 80
sn = 5
## generate a sample of size B from the mixture of three gamma distribution:
p1 <- 0.5
p2 <- 0.3
B <- 1000
sampledG<- c(
rgamma(n=p1*B,scale=1,shape=10),
rgamma(n=p2*B,scale=3,shape=5),
rgamma(n=(1-p1-p2)*B,scale=5,shape=5)
)
## mean is set to 1;
sampledG <- sampledG/mean(sampledG)
logvarG <- log(var(sampledG))
## hist(sampledG)
DT4 = rNBME.R(gdist = "NoN",
n = n, ## the total number of subjectss
sn = sn,
u1 = rep(exp(b0),sn),
u2 = rep(exp(b0),sn),
a = exp(loga),
d = exp(logitd)/(1+exp(logitd)),
othrp = sampledG
)
Vcode=rep(-1:(sn-2),n) # scan number -1, 0, 1, 2, 3
ID = DT4$id
new = Vcode > 0
dt4=data.frame(CEL=DT4$y)
## ================================================================================ ##
## [1] Fit the negative binomial mixed-effect AR(1) model
## where random effects is from the gamma distribution
re.gamma.ar1=mle.ar1.fun(formula=CEL~1,data=dt4,ID=ID,
Vcode=Vcode,
p.ini=c(loga,logtheta,logitd,b0),
## log(a), log(theta), logit(d), b0
model="G",
IPRT=TRUE)
Psum=index.batch(olmeNB=re.gamma.ar1, data=dt4,ID=ID,Vcode=Vcode,
labelnp=new,qfun="sum", IPRT=TRUE,i.se=FALSE)
## [2] Fit the negative binomial mixed-effect AR(1) model
## where random effects is from the log-normal distribution
re.logn.ar1=mle.ar1.fun(formula=CEL~1,data=dt4,ID=ID,
Vcode=Vcode,
p.ini=c(loga,logtheta,logitd,b0),
## log(a), log(theta), logit(d), b0
model="N",
IPRT=TRUE)
\dontrun{
## Requires some time
Psum=index.batch(olmeNB=re.logn.ar1,data=dt4,ID=ID,Vcode=Vcode,
labelnp=new,qfun="sum", IPRT=TRUE)
}
## [3] Fit the negative binomial independent model
## where random effects is from the lognormal distribution
re.logn.ind=mle.fun(formula=CEL~1,data=dt4,ID=ID,
model="N",
p.ini=c(loga,logtheta,b0),
IPRT=TRUE)
Psum=index.batch(olmeNB=re.logn.ind,data=dt4,ID=ID,
labelnp=new,qfun="sum", IPRT=TRUE)
## [4] Fit the semi-parametric negative binomial AR(1) model
## This model is closest to the true model
logvarG = log(var(sampledG))
re.semi.ar1=mle.ar1.non3(formula=CEL~1,data=dt4,ID=ID,
p.ini=c(loga, logvarG, logitd,b0),Vcode=Vcode)
## compute the estimates of the conditional probabilities
## with sum of the new repeated measure as a summary statistics
Psum=index.batch(olmeNB=re.semi.ar1, labelnp=new,data=dt4,ID=ID,Vcode=Vcode,
qfun="sum", IPRT=TRUE,i.se=TRUE)
## compute the estimates of the conditional probabilities
## with max of the new repeated measure as a summary statistics
Pmax=index.batch(olmeNB=re.semi.ar1, labelnp=new,qfun="max",data=dt4,ID=ID,Vcode=Vcode,
IPRT=TRUE,i.se=TRUE)
## Which patient's estimated probabilities
## based on the sum and max statistics disagrees the most?
( IDBigDif <- which(rank(abs(Pmax$condProbSummary[,1]-Psum$condProbSummary[,1]))==80) )
## Show the patient's CEL counts
dt4$CEL[ID==IDBigDif]
## Show the estimated conditional probabilities based on the sum summary statistics
Psum$condProbSummary[IDBigDif,1]
## Show the estimated conditional probabilities based on the max summary statistics
Pmax$condProbSummary[IDBigDif,1]
## [5] Fit the semi-parametric negative binomial independent model
re.semi.ind=mle.a3.fun(formula=CEL~1,data=dt4,ID=ID, p.ini=c(loga, logvarG, b0))
Psum=index.batch(olmeNB=re.semi.ind, labelnp=new,
data=dt4,ID=ID, qfun="sum", IPRT=TRUE,i.se=TRUE)
## ======================================================================== ##
## == Which model performed the best in terms of the estimation of beta0 == ##
## ======================================================================== ##
getpoints <- function(y,estb0,sdb0=NULL,crit=qnorm(0.975))
{
points(estb0,y,col="blue",pch=16)
if (!is.null(sdb0))
{
points(c(estb0-crit*sdb0,estb0+crit*sdb0),rep(y,2),col="red",type="l")
}
}
ordermethod <- c("gamma.ar1","logn.ar1","logn.ind","semi.ar1","semi.ind")
estb0s <- c(
re.gamma.ar1$est[4,1],
re.logn.ar1$est[4,1],
re.logn.ind$est[3,1],
re.semi.ar1$est[4],
re.semi.ind$est[3]
)
## The true beta0 is:
b0
c <- 1.1
plot(0,0,type="n",xlim=c(min(estb0s)-0.5,max(estb0s)*c),ylim=c(0,7),yaxt="n",
main="Simulated from the AR(1) model \n with random effect ~ a semi-parametric distribution")
legend("topright",
legend=ordermethod)
abline(v=b0,lty=3)
## [1] gamma.ar1
sdb0 <- re.gamma.ar1$est[4,2]
getpoints(6,estb0s[1],sdb0)
## [2] logn.ar1
sdb0 <- re.logn.ar1$est[4,2]
getpoints(5,estb0s[2],sdb0)
## [3] logn.ind
sdb0 <- re.logn.ind$est[3,2]
getpoints(4,estb0s[3],sdb0)
## [4] semi.ar1
getpoints(3,estb0s[4])
## [5] semi.ind
getpoints(2,estb0s[5])
Run the code above in your browser using DataLab