# \donttest{
library(MSclassifR)
library(MALDIquant)
## 1) Preprocess and detect peaks
data("CitrobacterRKIspectra", "CitrobacterRKImetadata", package = "MSclassifR")
spectra <- SignalProcessing(CitrobacterRKIspectra)
peaks <- MSclassifR::PeakDetection(x = spectra, averageMassSpec = FALSE)
## 2) Build X and Y (sample-by-peak intensities + labels)
## Option A: if you prefer the helper and a sparse return:
Y <- factor(CitrobacterRKImetadata$Species)
xy <- build_XY_from_peaks(peaks, labels = Y, normalize = "max", sparse = FALSE)
X <- xy$X
Y <- xy$Y
## Option B: via MALDIquant::intensityMatrix (as in the original examples)
##IntMat <- MALDIquant::intensityMatrix(peaks)
##rownames(IntMat) <- paste(CitrobacterRKImetadata$Strain_name_spot)
##IntMat[is.na(IntMat)] <- 0
##IntMat <- t(apply(IntMat, 1, function(x) x / max(x))) # per-spectrum max norm
##X <- t(IntMat) # features in columns
##Y <- factor(CitrobacterRKImetadata$Species)
## 3) Select discriminant m/z with "cvp" method
a <- MSclassifR::SelectionVar(
X, Y,
MethodSelection = "cvp",
MethodValidation = "cv",
PreProcessing = c("center","scale","nzv","corr"),
NumberCV = 2,
Metric = "Kappa"
)
sel_moz <- a$sel_moz
## 4) Train several models on the shortlisted m/z
model_lm <- MSclassifR::LogReg(X = X, moz = sel_moz, Y = Y, number = 2,
repeats = 2, Metric = "Kappa", kind = "linear")
model_nn <- MSclassifR::LogReg(X = X, moz = sel_moz, Y = Y, number = 2,
repeats = 2, Metric = "Kappa", kind = "nnet", Sampling = "up")
model_rf <- MSclassifR::LogReg(X = X, moz = sel_moz, Y = Y, number = 2,
repeats = 2, Metric = "Kappa", kind = "rf", Sampling = "down")
model_svm <- MSclassifR::LogReg(X = X, moz = sel_moz, Y = Y, number = 2,
repeats = 2, Metric = "Kappa", kind = "svm", Sampling = "up")
Models <- list(
model_lm$train_mod,
model_nn$train_mod,
model_rf$train_mod,
model_svm$train_mod
)
## 5) Predict classes for a subset of peaks; 6 Da tolerance for matching
prob_cat <- MSclassifR::PredictLogReg(
peaks = peaks[1:5],
model = Models,
moz = sel_moz,
tolerance = 6,
Reference = Y[1:5]
)
prob_cat
## 6) Meta-classifier strategy (several RF models + SMOTE + Fisher combine)
a2 <- MSclassifR::SelectionVar(X, Y, MethodSelection = "mda", Ntree = 5 * ncol(X))
sel_moz2 <- a2$sel_moz
models2 <- vector("list", 4L)
for (i in seq_along(models2)) {
models2[[i]] <- MSclassifR::LogReg(
X = X, moz = sel_moz2, Y = Y,
number = 5, repeats = 5,
kind = "rf", Metric = "Kappa",
Sampling = "smote"
)$train_mod
}
prob_cat2 <- MSclassifR::PredictLogReg(
peaks = peaks,
model = models2,
moz = sel_moz2,
tolerance = 6,
Reference = Y
)
# }
Run the code above in your browser using DataLab