# A splitter that only keeps variables with a class-wise mean difference > `d`
my.split <- function(x, y, fold, d=2){
fit.idx <- index.fit(fold)
test.idx <- index.test(fold)
class.means <- sapply(
split(x[fit.idx,, drop=FALSE], y[fit.idx]),
sapply, mean, na.rm=TRUE)
diff.feats <- apply(class.means, 1, function(x) diff(range(x))) > d
return(list(
fit = list(x = x[fit.idx, diff.feats, drop=FALSE],
y = y[fit.idx]),
test = list(x = x[test.idx, diff.feats, drop=FALSE],
y = y[test.idx]),
features = diff.feats))
}
# Use it during modeling
proc <- modeling.procedure("lda")
perf <- evaluate.modeling(proc, x = iris[-5], y = iris$Species,
pre.process = my.split)
# Example of how the end user can change the `d` parameter,
# without redefining the function
perf <- evaluate.modeling(proc, x = iris[-5], y = iris$Species,
pre.process = function(...) my.split(..., d = 1.3))
Run the code above in your browser using DataLab