#train classifier with 10 genes (after a logarithmic transformation) on Alon's Colon Cancer Data set.
log10genes <- log10(AlonDS[,-1])
ldarule1 <- RFlda(log10genes,AlonDS$grouping,Selmethod="fixedp",maxp=10)
# get in-sample classification results
predict(ldarule1,log10genes,grpcodes=levels(AlonDS$grouping))$class
# compare classifications with true assignments
cat("Original classes:
")
print(AlonDS$grouping)
# Estimate error rates by four-fold cross-validation.
# (Note: In cross-validation analysis it is recommended to set the argument
# 'ldafun' to "classification", in order to speed up computations by avoiding
# unecessary eigen-decompositions)
CrosValRes1 <- DACrossVal(log10genes,AlonDS$grouping,TrainAlg=RFlda,
Selmethod="fixedp",ldafun="classification",maxp=10,kfold=4,CVrep=1)
summary(CrosValRes1[,,"Clerr"])
# Find the best factor model amongst the choices q=1 or 2
ldarule2 <- RFlda(log10genes,AlonDS$grouping,q="CVq",CVqtrials=1:2,
Selmethod="fixedp",ldafun="classification",maxp=10)
cat("Best error rate estimate found with q =",ldarule2$q,"")
# Perform the analysis finding the number of selected genes by the Expanded HC scheme
ldarule3 <- RFlda(log10genes,AlonDS$grouping,q=ldarule2$q)
cat("Number of selected genes =",ldarule3$nvkpt,"")
# get classification results
predict(ldarule3,log10genes,grpcodes=levels(AlonDS$grouping))$classRun the code above in your browser using DataLab