# NOT RUN {
# }
# NOT RUN {
# ----------------------------------------------------------------------------------------- ##
# Mother's stress on child illness:
# Investigate whether mother's stress is (Granger) causal on child illness
# 'hrf' model is fit using previous observations of mother's stress to predict
# child's illness at given time point, but not mother's stress at that time point
#
# Predictor variables are classified into "historical" and "concurrent"
#
# A predictor is "historical" if its prior realizations can be used to predict
# the outcome.
#
# A predictor is "concurrent" if its realization at the same timepoint as the outcome
# can be used to predict the outcome at that timepoint
#
# A predictor can be both "concurrent" and "historical", the status of the predictors
# can be set by the 'vh' and 'vc' arguments of 'hrf'.
# (if not set these are automatically determined)
#
# ------------------------------------------------------------------------------------------- ##
data(mscm)
mscm=as.data.frame(na.omit(mscm))
## first 2 weeks
mscm=mscm[mscm$day<=14,]
# -- set concurrent and historical predictors
historical_predictors=match(c("illness","stress"),names(mscm))
concurrent_predictors=which(names(mscm)!="stress")
# logistic regression
ff=htb(x=mscm,id=mscm$id,time=mscm$day,yindx="illness",ntrees=100,lambda=.5,method="freqw",
vh=historical_predictors,vc=concurrent_predictors,cv.fold=10,family="bernoulli")
# cross-validated negative log-likelihood
plot(1:ff$ntrees,ff$cv_error,type="l",col="blue",ylab="",
xlab="iterations",main="Cross-validated deviance")
vi=varimp_htb(ff)
vi
par(mfrow=c(2,2))
for(k in 1:4)
partdep_htb(ff,xindx=rownames(vi)[k])
par(mfrow=c(1,1))
# -- random intercept example --- #
p=5;sigma_e=.5;sigma_a=.5;v=rep(1,p);n=500;pnoise=2
random_intercept=rnorm(n,sd=sigma_a^.5)
random_intercept=as.numeric(matrix(random_intercept,nrow=p,ncol=n,byrow=TRUE))
x=random_intercept+rnorm(n*p,sd=sigma_e^.5)
id=sort(rep(1:n,p))
time<-rep(1:p,n)
znoise=matrix(rnorm(n*p*pnoise),ncol=pnoise)
xx=cbind(time,x,znoise)
# fit historical random forest
ff=htb(x=xx,time=time,id=id,yindx=2,ntrees=100,nsamp=5,lambda=.1,cv.fold=10)
# plot cv-error
plot(1:ff$ntrees,ff$cv_error,type="l",col="blue",xlab="iterations",
ylab="",main="Cross-validated mse")
# plot partial dependence of response on its past
pd=partdep_htb(ff,xindx=2,ngrid=25,subsample=.1)
# variable importance table
varimp_htb(ff,nperm=20)
# ------------------------------------------ #
# Non-longitudinal data examples
# ------------------------------------------ #
# ------------------------------------------ #
# Boston Housing data
# ------------------------------------------ #
library(htree)
library(mlbench)
data(BostonHousing)
dat=as.data.frame(na.omit(BostonHousing))
# omitting arguments 'time' and 'id' assumes rows are iid
h=htb(x=dat,yindx="medv",ntrees=500,cv.fold=10)
# -- plot cross-validated Mean-squared error --- #
plot(1:length(h$cv_error),h$cv_error,type="l",xlab="Boosting iterations",
ylab="MSE",main="Cross-validated Mean-squared error")
# -- iteration with lowest error
iter_hat=order(h$cv_error)[1]
# -- variable importance table
vi=varimp_htb(h,nperm=20)
vi
# -- partial dependence of top 4 predictors with +/-2 S.E.
# (standard errors are based on the
# delete-d jack-knife estimator, using the cross-validation runs)
par(mfrow=c(2,2))
for(k in 1:4)
partdep_htb(h,xindx=rownames(vi)[k])
par(mfrow=c(1,1))
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab