# \donttest{
library("MSclassifR")
library("MALDIquant")
###############################################################################
## 1. Pre-processing of mass spectra
# load mass spectra and their metadata
data("CitrobacterRKIspectra","CitrobacterRKImetadata", package = "MSclassifR")
# standard pre-processing of mass spectra
spectra <- SignalProcessing(CitrobacterRKIspectra)
# detection of peaks in pre-processed mass spectra
peaks <- MSclassifR::PeakDetection(x = spectra, averageMassSpec=FALSE)
# matrix with intensities of peaks arranged in rows (each column is a mass-to-charge value)
IntMat <- MALDIquant::intensityMatrix(peaks)
rownames(IntMat) <- paste(CitrobacterRKImetadata$Strain_name_spot)
# remove missing values in the matrix
IntMat[is.na(IntMat)] <- 0
# normalize peaks according to the maximum intensity value for each mass spectrum
IntMat <- apply(IntMat,1,function(x) x/(max(x)))
# transpose the matrix for statistical analysis
X <- t(IntMat)
# define the known categories of mass spectra for the classification
Y <- factor(CitrobacterRKImetadata$Species)
###############################################################################
## 2. Selection of discriminant mass-to-charge values using RFERF
# with 5 to 10 variables,
# up sampling method and
# trained with the Accuracy coefficient metric
a <- MSclassifR::SelectionVar(X,
Y,
MethodSelection = c("RFERF"),
MethodValidation = c("cv"),
PreProcessing = c("center","scale","nzv","corr"),
NumberCV = 2,
Metric = "Accuracy",
Sizes = c(2:5),
Sampling = "up")
sel_moz=a$sel_moz
###############################################################################
## 3. Perform LogReg from shortlisted discriminant mass-to-charge values
# linear multinomial regression
# without sampling mehod
# and trained with the Kappa coefficient metric
model_lm=MSclassifR::LogReg(X=X,
moz=sel_moz,
Y=factor(Y),
number=2,
repeats=2,
Metric = "Kappa")
# Estimated model:
model_lm
# nonlinear multinomial regression using neural networks
# with up-sampling method and
# trained with the Kappa coefficient metric
model_nn=MSclassifR::LogReg(X=X,
moz=sel_moz,
Y=factor(Y),
number=2,
repeats=2,
kind="nnet",
Metric = "Kappa",
Sampling = "up")
# Estimated model:
model_nn
# nonlinear multinomial regression using random forests
# without down-sampling method and
# trained with the Kappa coefficient metric
model_rf=MSclassifR::LogReg(X=X,
moz=sel_moz,
Y=factor(Y),
number=2,
repeats=2,
kind="rf",
Metric = "Kappa",
Sampling = "down")
# Estimated model:
model_rf
# nonlinear multinomial regression using xgboost
# with down-sampling method and
# trained with the Kappa coefficient metric
model_xgb=MSclassifR::LogReg(X=X,
moz=sel_moz,
Y=factor(Y),
number=2,
repeats=2,
kind="xgb",
Metric = "Kappa",
Sampling = "down")
# Estimated model:
model_xgb
# nonlinear multinomial regression using svm
# with down-sampling method and
# trained with the Kappa coefficient metric
model_svm=MSclassifR::LogReg(X=X,
moz=sel_moz,
Y=factor(Y),
number=2,
repeats=2,
kind="svm",
Metric = "Kappa",
Sampling = "down")
# Estimated model:
model_svm
##########
# Of note, step 3 can be performed several times
# to find optimal models
# because of random hyperparameter search
###############################################################################
## 4. Select best models in term of average Kappa and saving it for reuse
Kappa_model=c(model_lm$stats_global[1,2],model_nn$stats_global[1,2],
model_rf$stats_global[1,2],model_xgb$stats_global[1,2],model_svm$stats_global[1,2])
names(Kappa_model)=c("lm","nn","rf","xgb","svm")
#Best models in term of accuracy
Kappa_model[which(Kappa_model==max(Kappa_model))]
#save best models for reuse
#models=list(model_lm$train_mod,model_nn$train_mod,model_rf$train_mod,
#model_xgb$train_mod,model_svm$train_mod)
#models_best=models[which(Kappa_model==max(Kappa_model))]
#for (i in 1:length(models_best)){
#save(models_best[[i]], file = paste0("model_best_",i,".rda",collapse="")
#}
#load a saved model
#load("model_best_1.rda")
###############################################################################
## 5. Try other metrics to select the best model
# linear multinomial regression
# with up-sampling method and
# trained with the Adjusted Rank index metric
model_lm=MSclassifR::LogReg(X=X,
moz=sel_moz,
Y=factor(Y),
number=2,
repeats=3,
Metric = "AdjRankIndex",
Sampling = "up")
# }
Run the code above in your browser using DataLab