Learn R Programming

RemixAutoML (version 0.4.2)

H2oIsolationForest: H2oIsolationForest for anomaly detection

Description

H2oIsolationForest for anomaly detection

Usage

H2oIsolationForest(
  data,
  TestData = NULL,
  ColumnNumbers = NULL,
  Threshold = 0.975,
  MaxMem = "28G",
  NThreads = -1,
  NTrees = 100,
  SampleRate = (sqrt(5) - 1)/2
)

Arguments

data

The data.table with the columns you wish to have analyzed

TestData

Data for scoring the trained isolation forest

ColumnNumbers

A vector with the column numbers you wish to analyze

Threshold

Quantile value to find the cutoff value for classifying outliers

MaxMem

Specify the amount of memory to allocate to H2O. E.g. "28G"

NThreads

Specify the number of threads (E.g. cores * 2)

NTrees

Specify the number of decision trees to build

SampleRate

Specify the row sample rate per tree

Value

A data.table

See Also

Other Unsupervised Learning: AutoKMeans(), GenTSAnomVars(), ResidualOutliers()

Examples

Run this code
# NOT RUN {
# Create simulated data

# Define correlation strength of features to target
Correl <- 0.85

# Number of rows you want returned
N <- 10000L

# Create data
data <- data.table::data.table(Target = runif(N))
data[, x1 := qnorm(Target)]
data[, x2 := runif(N)]
data[, Independent_Variable1 := log(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable2 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable3 := exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2))))]
data[, Independent_Variable5 := sqrt(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable6 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.10]
data[, Independent_Variable7 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.25]
data[, Independent_Variable8 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.75]
data[, Independent_Variable9 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^2]
data[, Independent_Variable10 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^4]
data[, Target := as.factor(
 data.table::fifelse(Independent_Variable2 < 0.20, "A",
        data.table::fifelse(Independent_Variable2 < 0.40, "B",
               data.table::fifelse(Independent_Variable2 < 0.6,  "C",
                      data.table::fifelse(Independent_Variable2 < 0.8,  "D", "E")))))]
data[, Independent_Variable11 := as.factor(
 data.table::fifelse(Independent_Variable2 < 0.15, "A",
        data.table::fifelse(Independent_Variable2 < 0.45, "B",
               data.table::fifelse(Independent_Variable2 < 0.65,  "C",
                      data.table::fifelse(Independent_Variable2 < 0.85,  "D", "E")))))]
data.table::set(data, j = c("x1", "x2"), value = NULL)

# Run algo
Outliers <- H2oIsolationForest(data,
                               TestData = NULL,
                               ColumnNumbers = NULL,
                               Threshold = 0.95,
                               MaxMem = "28G",
                               NThreads = -1,
                               NTrees = 100,
                               SampleRate = (sqrt(5)-1)/2)
# }

Run the code above in your browser using DataLab