# NOT RUN {
# Create simulated data
data <- RemixAutoML::FakeDataGenerator(
Correlation = 0.70,
N = 50000,
ID = 2L,
FactorCount = 2L,
AddDate = TRUE,
ZIP = 0L,
TimeSeries = FALSE,
ChainLadderData = FALSE,
Classification = FALSE,
MultiClass = FALSE)
# Run algo
data <- RemixAutoML::H2OIsolationForest(
data,
Features = names(data)[2L:ncol(data)],
IDcols = c("Adrian", "IDcol_1", "IDcol_2"),
ModelID = "Adrian",
SavePath = getwd(),
Threshold = 0.95,
MaxMem = "28G",
NThreads = -1,
NTrees = 100,
MaxDepth = 8,
MinRows = 1,
RowSampleRate = (sqrt(5)-1)/2,
ColSampleRate = 1,
ColSampleRatePerLevel = 1,
ColSampleRatePerTree = 1,
CategoricalEncoding = c("AUTO"),
Debug = TRUE)
# Remove output from data and then score
data[, eval(names(data)[17:ncol(data)]) := NULL]
# Run algo
Outliers <- RemixAutoML::H2OIsolationForestScoring(
data,
Features = names(data)[2:ncol(data)],
IDcols = c("Adrian", "IDcol_1", "IDcol_2"),
H2OStart = TRUE,
H2OShutdown = TRUE,
ModelID = "TestModel",
SavePath = getwd(),
Threshold = 0.95,
MaxMem = "28G",
NThreads = -1,
Debug = FALSE)
# }
Run the code above in your browser using DataLab