# NOT RUN {
# }
# NOT RUN {
# ----------------------------------------------------------------------------------------- ##
# Mother's stress on child illness:
# Investigate whether mother's stress is (Granger) causal on child illness
# 'hrf' model is fit using previous observations of mother's stress to predict
# child's illness at given time point, but not mother's stress at that time point
#
# Predictor variables are classified into "historical" and "concurrent"
#
# A predictor is "historical" if its prior realizations can be used to predict
# the outcome.
#
# A predictor is "concurrent" if its realization at the same timepoint as the outcome
# can be used to predict the outcome at that timepoint
#
# A predictor can be both "concurrent" and "historical", the status of the predictors
# can be set by the 'vh' and 'vc' arguments of 'hrf'.
# (if not set these are automatically determined)
#
# ------------------------------------------------------------------------------------------- ##
data(mscm)
mscm=as.data.frame(na.omit(mscm))
# -- first two weeks
mscm=mscm[mscm$day<=14,]
# -- set concurrent and historical predictors
historical_predictors=match(c("stress","illness"),names(mscm))
concurrent_predictors=which(names(mscm)!="stress")
# -- fit historical random forest
# (NOTE: response is 0/1 so a regression tree is equivalent
## to a classification tree with Gini-index splitting)
ff=hrf(x=mscm,id=mscm$id,time=mscm$day,yindx="illness",
vh=historical_predictors,vc=concurrent_predictors)
# -- variable importance table
vi=varimp_hrf(ff)
vi
# -- fit historical random forest with 'se=TRUE'
ff=hrf(x=mscm,id=mscm$id,time=mscm$day,yindx="illness",
vh=historical_predictors,vc=concurrent_predictors,se=T,B=50)
# -- partial dependence with top 4 predictors
par(mfrow=c(2,2))
for(k in 1:4)
pd=partdep_hrf(ff,xindx=rownames(vi)[k])
par(mfrow=c(1,1))
# -------------------------------------------- ##
# Simulated data from random intercept model
# -------------------------------------------- ##
## -- simulated data -------------------------------------------------- #
p=5;sigma_e=.5;sigma_a=.5;v=rep(1,p);n=500;pnoise=2
random_intercept=rnorm(n,sd=sigma_a^.5)
random_intercept=as.numeric(matrix(random_intercept,nrow=p,ncol=n,byrow=TRUE))
x=random_intercept+rnorm(n*p,sd=sigma_e^.5)
id=sort(rep(1:n,p))
time<-rep(1:p,n)
znoise=matrix(rnorm(n*p*pnoise),ncol=pnoise)
xx=as.data.frame(cbind(time,x,znoise))
xx$rfact=as.factor(sample(letters,nrow(xx),replace=TRUE))
## --------------------------------------------------------------------- #
# fit historical random forest
ff=hrf(x=xx,time=time,id=id,yindx=2,ntrees=100)
# plot oob-error
plot(1:ff$ntrees,ff$error,type="l",col="blue")
# fit forest with standard error selection
ff=hrf(x=xx,time=time,id=id,yindx=2,ntrees=100,se=TRUE)
# plot partial dependence of response on its past
pd=partdep_hrf(ff,xindx=2)
# variable importance table
varimp_hrf(ff)
# -------------------------------------------- ##
# Examples with non-longitudinal data
# -------------------------------------------- ##
# -------------------- ##
# Boston Housing data
# -------------------- ##
library(mlbench)
library(randomForest)
data(BostonHousing)
dat=as.data.frame(na.omit(BostonHousing))
## omitting arguments time/id assumes rows are iid
h=hrf(x=dat,yindx="medv",ntrees=500)
## randomForest comparison
r=randomForest(medv~.,data=dat)
## plot oob-error for both
plot(1:length(r$mse),r$mse,type="l",ylim=c(min(r$mse,h$error),max(r$mse,h$error)),
main="BostonHousing",xlab="forest size",ylab="out-of-bag mse")
points(1:length(h$error),h$error,type="l",col="blue")
## -- variable importance table
vi=varimp_hrf(h)
vi
## -- partial dependence plots with approximate 95<!-- % C.I -->
h=hrf(x=dat,yindx="medv",ntrees=500,se=TRUE)
par(mfrow=c(2,2))
for(k in 1:4)
pd=partdep_hrf(h,xindx=rownames(vi)[k])
par(mfrow=c(1,1))
# ------------------------------------- ##
# Ionosphere data (classification)
# ---------------------------------- ##
data(Ionosphere)
dat=as.data.frame(na.omit(Ionosphere))
h=hrf(x=dat,yindx="Class",ntrees=500)
r=randomForest(Class~.,data=dat)
plot(1:length(r$err[,1]),r$err[,1],type="l",ylim=c(min(r$err[,1],h$error),max(r$err[,1],h$error)),
ylab="mis-classification rate",xlab="forest size",main="Out-of-bag mis-classification")
points(1:length(h$error),h$error,type="l",col="blue")
# -- variable importance table
vi=varimp_hrf(h)
par(mfrow=c(2,2))
for(k in 1:4)
pd=partdep_hrf(h,xindx=rownames(vi)[k])
par(mfrow=c(1,1))
# }
Run the code above in your browser using DataLab