Learn R Programming

smartdata (version 1.0.3)

feature_selection: Feature selection wrapper

Description

Feature selection wrapper

Usage

feature_selection(dataset, method, class_attr = NULL, exclude = NULL, ...)

Arguments

dataset

we want to do feature selection on

method

selected method of feature selection

class_attr

character. Indicates the class attribute or attributes from dataset. Must exist in it.

exclude

character. Vector of attributes to exclude from the feature selection process

...

Further arguments for method

Value

The treated dataset (either with noisy instances replaced or erased)

Examples

Run this code
# NOT RUN {
library("smartdata")
library("rpart")
data(ecoli1, package = "imbalance")
data(HouseVotes84, package = "mlbench")

# Extracted from FSelector::best.first.search documentation
evaluator <- function(subset) {
  k <- 5
  splits <- runif(nrow(iris))
  results = sapply(1:k, function(i) {
    test.idx <- (splits >= (i - 1) / k) & (splits < i / k)
    train.idx <- !test.idx
    test <- iris[test.idx, , drop=FALSE]
    train <- iris[train.idx, , drop=FALSE]
    tree <- rpart(FSelector::as.simple.formula(subset, "Species"), train)
    error.rate = sum(test$Species != predict(tree, test, type="c")) / nrow(test)
    return(1 - error.rate)
  })
  print(subset)
  print(mean(results))
  return(mean(results))
}



super_iris <- feature_selection(iris, "Boruta", class_attr = "Species")
super_iris <- feature_selection(iris, "chi_squared",
                                class_attr = "Species", num_features = 3)
# Pick 3 attributes from the continuous ones
super_ecoli <- feature_selection(ecoli1, "information_gain",
                                 class_attr = "Class", num_features = 3)
super_ecoli <- feature_selection(ecoli1, "gain_ratio",
                                 class_attr = "Class", num_features = 3)
super_ecoli <- feature_selection(ecoli1, "sym_uncertainty",
                                 class_attr = "Class", num_features = 3)
super_votes <- feature_selection(HouseVotes84, "oneR", exclude = c("V1", "V2"),
                                 class_attr = "Class", num_features = 3)
super_votes <- feature_selection(iris, "RF_importance", class_attr = "Species",
                                 num_features = 3, type = 2)
# }
# NOT RUN {
super_iris  <- feature_selection(iris, "best_first_search", exclude = "Species",
                                 eval_fun = evaluator)
super_iris  <- feature_selection(iris, "forward_search", exclude = "Species",
                                 eval_fun = evaluator)
super_iris  <- feature_selection(iris, "backward_search", exclude = "Species",
                                 eval_fun = evaluator)
# }
# NOT RUN {
super_iris  <- feature_selection(iris, "cfs", class_attr = "Species")
super_iris  <- feature_selection(iris, "consistency", class_attr = "Species")

# }

Run the code above in your browser using DataLab