##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Example on a public dataset: the burn data
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## The burn data are also displayed in the KMsurv package
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (FALSE) {
data(burn)
## Build the rpart tree with all the variables
rpart.burn <- rpart(D2 ~ Z1 + Z2 + Z3 + Z4 + Z5 + Z6 + Z7 + Z8 + Z9
+ Z10 + Z11, data = burn, method = "class")
plot(rpart.burn, main = 'rpart tree')
text(rpart.burn, xpd = TRUE, cex = .6, use.n = TRUE)
## fit the PLTR model after adjusting on gender (Z2) using the proposed method
args.rpart <- list(minbucket = 10, maxdepth = 4, cp = 0, maxcompete = 0,
maxsurrogate = 0)
family <- "binomial"
X.names = "Z2"
Y.name = "D2"
G.names = c('Z1','Z3','Z4','Z5','Z6','Z7','Z8','Z9','Z10','Z11')
pltr.burn <- pltr.glm(burn, Y.name, X.names, G.names, args.rpart = args.rpart,
family = family, iterMax = 4, iterMin = 3, verbose = FALSE)
## Prunned back the maximal tree using either the BIC or the AIC criterion
pltr.burn_prun <- best.tree.BIC.AIC(xtree = pltr.burn$tree, burn, Y.name,
X.names, family = family)
## plot the BIC selected tree
plot(pltr.burn_prun$tree$BIC, main = 'BIC selected tree')
text(pltr.burn_prun$tree$BIC, xpd = TRUE, cex = .6, col = 'blue')
## Summary of the selected tree by a BIC criterion
summary(pltr.burn_prun$tree$BIC)
## Summary of the final selected pltr model
summary(pltr.burn_prun$fit_glm$BIC)
## fit the PLTR model after adjusting on gender (Z2) using the parametric
## bootstrap method
## set numWorkers = 1 on a windows plateform
args.parallel = list(numWorkers = 10)
best_bootstrap <- best.tree.bootstrap(pltr.burn$tree, burn, Y.name, X.names,
G.names, B = 2000, BB = 2000, args.rpart = args.rpart, epsi = 0.008,
iterMax = 6, iterMin = 5, family = family, LEVEL = 0.05, LB = FALSE,
args.parallel = args.parallel, verbose = FALSE)
plot(best_bootstrap$selected_model$tree, main = 'original method')
text(best_bootstrap$selected_model$tree, xpd = TRUE)
## Bagging a set of basic unprunned pltr predictors
# ?bagging.pltr
Bag.burn <- bagging.pltr(burn, Y.name, X.names, G.names, family,
args.rpart,epsi = 0.01, iterMax = 4, iterMin = 3,
Bag = 10, verbose = FALSE, doprune = FALSE)
## The thresshold values used
Bag.burn$CUT
## The set of PLTR models in the bagging procedure
PLTR_BAG.burn <- Bag.burn$Glm_BAG
## The set of trees in the bagging procedure
TREE_BAG.burn <- Bag.burn$Tree_BAG
## Use the bagging procedure to predict new features
# ?predict_bagg.pltr
Pred_Bag.burn <- predict_bagg.pltr(Bag.burn, Y.name, newdata = burn,
type = "response", thresshold = seq(0, 1, by = 0.1))
## The confusion matrix for each thresshold value using the majority vote
Pred_Bag.burn$CONF1
## The prediction error for each thresshold value
Pred_Bag.burn$PRED_ERROR1
## Compute the variable importances using the bagging procedure
Var_Imp_BAG.burn <- VIMPBAG(Bag.burn, burn, Y.name)
## Importance score using the permutaion method for each thresshold value
Var_Imp_BAG.burn$PIS
## Shadow plot of three proposed scores
par(mfrow=c(1,3))
barplot(Var_Imp_BAG.burn$PIS$CUT5, main = 'PIS', horiz = TRUE, las = 1,
cex.names = .8, col = 'lightblue')
barplot(Var_Imp_BAG.burn$DIS, main = 'DIS', horiz = TRUE, las = 1,
cex.names = .8, col = 'grey')
barplot(Var_Imp_BAG.burn$DDIS, main = 'DDIS', horiz = TRUE, las = 1,
cex.names = .8, col = 'purple')
}
Run the code above in your browser using DataLab