
Last chance! 50% off unlimited learning
Sale ends in
H2OAutoencoderScoring for anomaly detection and or dimensionality reduction
H2OAutoencoderScoring(
data,
Features = NULL,
RemoveFeatures = FALSE,
ModelObject = NULL,
AnomalyDetection = TRUE,
DimensionReduction = TRUE,
ReturnLayer = 4L,
per_feature = TRUE,
NThreads = max(1L, parallel::detectCores() - 2L),
MaxMem = "28G",
H2OStart = TRUE,
H2OShutdown = TRUE,
ModelID = "TestModel",
model_path = NULL
)
The data.table with the columns you wish to have analyzed
NULL Column numbers or column names
Set to TRUE if you want the features you specify in the Features argument to be removed from the data returned
If NULL then the model will be loaded from file. Otherwise, it will use what is supplied
Set to TRUE to run anomaly detection
Set to TRUE to run dimension reduction
Which layer of the NNet to return. Choose from 1-7 with 4 being the layer with the least amount of nodes
Set to TRUE to have per feature anomaly detection generated. Otherwise and overall value will be generated
max(1L, parallel::detectCores()-2L)
"28G"
TRUE to start H2O inside the function
Setting to TRUE will shutdown H2O when it done being used internally.
"TestModel"
If NULL no model will be saved. If a valid path is supplied the model will be saved there
A data.table
Other Feature Engineering:
AutoDataPartition()
,
AutoDiffLagN()
,
AutoHierarchicalFourier()
,
AutoInteraction()
,
AutoLagRollStatsScoring()
,
AutoLagRollStats()
,
AutoTransformationCreate()
,
AutoTransformationScore()
,
AutoWord2VecModeler()
,
AutoWord2VecScoring()
,
ContinuousTimeDataGenerator()
,
CreateCalendarVariables()
,
CreateHolidayVariables()
,
DT_GDL_Feature_Engineering()
,
DifferenceDataReverse()
,
DifferenceData()
,
DummifyDT()
,
H2OAutoencoder()
,
ModelDataPrep()
,
Partial_DT_GDL_Feature_Engineering()
,
TimeSeriesFill()
# NOT RUN {
############################
# Training
############################
# Create simulated data
data <- RemixAutoML::FakeDataGenerator(
Correlation = 0.70,
N = 1000L,
ID = 2L,
FactorCount = 2L,
AddDate = TRUE,
AddComment = FALSE,
ZIP = 2L,
TimeSeries = FALSE,
ChainLadderData = FALSE,
Classification = FALSE,
MultiClass = FALSE)
# Run algo
data <- RemixAutoML::H2OAutoencoder(
# Select the service
AnomalyDetection = TRUE,
DimensionReduction = TRUE,
# Data related args
data = data,
ValidationData = NULL,
Features = names(data)[2L:(ncol(data)-1L)],
per_feature = FALSE,
RemoveFeatures = TRUE,
ModelID = "TestModel",
model_path = getwd(),
# H2O Environment
NThreads = max(1L, parallel::detectCores()-2L),
MaxMem = "28G",
H2OStart = TRUE,
H2OShutdown = TRUE,
# H2O ML Args
LayerStructure = NULL,
ReturnLayer = 4L,
Activation = "Tanh",
Epochs = 5L,
L2 = 0.10,
ElasticAveraging = TRUE,
ElasticAveragingMovingRate = 0.90,
ElasticAveragingRegularization = 0.001)
############################
# Scoring
############################
# Create simulated data
data <- RemixAutoML::FakeDataGenerator(
Correlation = 0.70,
N = 1000L,
ID = 2L,
FactorCount = 2L,
AddDate = TRUE,
AddComment = FALSE,
ZIP = 2L,
TimeSeries = FALSE,
ChainLadderData = FALSE,
Classification = FALSE,
MultiClass = FALSE)
# Run algo
data <- RemixAutoML::H2OAutoencoderScoring(
# Select the service
AnomalyDetection = TRUE,
DimensionReduction = TRUE,
# Data related args
data = data,
Features = names(data)[2L:ncol(data)],
RemoveFeatures = TRUE,
per_feature = FALSE,
ModelObject = NULL,
ModelID = "TestModel",
model_path = getwd(),
# H2O args
NThreads = max(1L, parallel::detectCores()-2L),
MaxMem = "28G",
H2OStart = TRUE,
H2OShutdown = TRUE,
ReturnLayer = 4L)
# }
Run the code above in your browser using DataLab