# NOT RUN {
data(mscm)
mscm=as.data.frame(na.omit(mscm))
# -- set concurrent and historical predictors
historical_predictors=match(c("stress","illness"),names(mscm))
concurrent_predictors=which(names(mscm)!="stress")
control=list(vh=historical_predictors,vc=concurrent_predictors,nodesize=20)
## -- fit model
ff=hrf(x=mscm,id=mscm$id,time=mscm$day,yindx="illness",control=control)
# -- variable importance table
vi=varimp_hrf(ff)
vi
## same with htb
control=list(vh=historical_predictors,vc=concurrent_predictors,
lambda=.1,ntrees=200,nsplit=3,family="bernoulli")
control$cvfold=10 ## need cross-validation runs to run varimp_htb
ff=htb(x=mscm,id=mscm$id,time=mscm$day,yindx="illness",control=control)
# -- variable importance table
vi=varimp_htb(ff)
vi
# --------------------------------------------------------------------------------------------- ##
# Boston Housing data
# Comparison of Z-score variable importance with coefficient Z-scores from linear model
# --------------------------------------------------------------------------------------------- ##
# Boston Housing data
library(mlbench)
data(BostonHousing)
dat=as.data.frame(na.omit(BostonHousing))
dat$chas=as.numeric(dat$chas)
# -- random forest
h=hrf(x=dat,yindx="medv")
# -- tree boosting
hb=htb(x=dat,yindx="medv",ntrees=1000,cv.fold=10,nsplit=3)
# -- Comparison of variable importance Z-scores and Z-scores from linear model
vi=varimp_hrf(h)
vb=varimp_htb(hb)
dvi=data.frame(var=as.character(vi$Predictor),Z_hrf=vi$Z)
dvb=data.frame(var=as.character(vb$Predictor),Z_htb=vb$Z)
dlm=summary(lm(medv~.,dat))$coeffi
dlm=data.frame(var=rownames(dlm),Z_lm=round(abs(dlm[,3]),3))
dlm=merge(dlm[-1,],dvi,by="var",all.x=TRUE)
# -- Z-scores of hrf and lm for predictor variables
merge(dlm,dvb,by="var",all.x=TRUE)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab