# NOT RUN {
# }
# NOT RUN {
### Cross Validation Example ####
# Start the graphics device driver to save all plots in a pdf format
pdf(file = "CrossValidationExample.pdf",width = 8, height = 6)
# Get the stage C prostate cancer data from the rpart package
data(stagec,package = "rpart")
# Prepare the data. Create a model matrix with interactions but no event time
stagec$pgtime <- NULL
stagec$eet <- as.factor(stagec$eet)
options(na.action = 'na.pass')
stagec_mat <- cbind(pgstat = stagec$pgstat,
as.data.frame(model.matrix(pgstat ~ .*.,stagec))[-1])
fnames <- colnames(stagec_mat)
fnames <- str_replace_all(fnames,":","__")
colnames(stagec_mat) <- fnames
# Impute the missing data
dataCancerImputed <- nearestNeighborImpute(stagec_mat)
dataCancerImputed[,1:ncol(dataCancerImputed)] <- sapply(dataCancerImputed,as.numeric)
# Cross validating a Random Forest classifier
cvRF <- randomCV(dataCancerImputed,"pgstat",
randomForest::randomForest,
trainFraction = 0.8,
repetitions = 10,
asFactor = TRUE);
# Evaluate the prediction performance of the Random Forest classifier
RFStats <- predictionStats_binary(cvRF$medianTest,
plotname = "Random Forest",cex = 0.9);
# Cross validating a BSWiMS with the same train/test set
cvBSWiMS <- randomCV(fittingFunction = BSWiMS.model,
trainSampleSets = cvRF$trainSamplesSets);
# Evaluate the prediction performance of the BSWiMS classifier
BSWiMSStats <- predictionStats_binary(cvBSWiMS$medianTest,
plotname = "BSWiMS",cex = 0.9);
# Cross validating a LDA classifier with a t-student filter
cvLDA <- randomCV(dataCancerImputed,"pgstat",MASS::lda,
trainSampleSets = cvRF$trainSamplesSets,
featureSelectionFunction = univariate_tstudent,
featureSelection.control = list(limit = 0.5,thr = 0.975));
# Evaluate the prediction performance of the LDA classifier
LDAStats <- predictionStats_binary(cvLDA$medianTest,plotname = "LDA",cex = 0.9);
# Cross validating a QDA classifier with LDA t-student features and RF train/test set
cvQDA <- randomCV(fittingFunction = MASS::qda,
trainSampleSets = cvRF$trainSamplesSets,
featureSelectionFunction = cvLDA$selectedFeaturesSet);
# Evaluate the prediction performance of the QDA classifier
QDAStats <- predictionStats_binary(cvQDA$medianTest,plotname = "QDA",cex = 0.9);
#Create a barplot with 95<!-- %CI that compare the balance error of the classifiers -->
errorciTable <- rbind(RFStats$berror,
BSWiMSStats$berror,
LDAStats$berror,
QDAStats$berror)
bpCI <- barPlotCiError(as.matrix(errorciTable),metricname = "Balanced Error",
thesets = c("Classifier Method"),
themethod = c("RF","BSWiMS","LDA","QDA"),
main = "Balanced Error",
offsets = c(0.5,0.15),
scoreDirection = "<",
ho = 0.5,
args.legend = list(bg = "white",x = "topright"),
col = terrain.colors(4));
dev.off()
# }
Run the code above in your browser using DataLab