# NOT RUN {
# Create simulated data
# Define correlation strength of features to target
Correl <- 0.85
# Number of rows you want returned
N <- 10000
# Create data
data <- data.table::data.table(Adrian = runif(N))
data[, x1 := qnorm(Adrian)]
data[, x2 := runif(N)]
data[, Independent_Variable1 := log(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable2 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable3 := exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2))))]
data[, Independent_Variable5 := sqrt(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable6 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.10]
data[, Independent_Variable7 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.25]
data[, Independent_Variable8 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.75]
data[, Independent_Variable9 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^2]
data[, Independent_Variable10 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^4]
data[, Independent_Variable11 := as.factor(
data.table::fifelse(Independent_Variable2 < 0.15, "A",
data.table::fifelse(Independent_Variable2 < 0.45, "B",
data.table::fifelse(Independent_Variable2 < 0.65, "C",
data.table::fifelse(Independent_Variable2 < 0.85, "D", "E")))))]
data.table::set(data, j = c("x1", "x2"), value = NULL)
# Get number of columns for LayerStructure
N <- length(names(data)[2L:ncol(data)])
# Run algo
Output <- RemixAutoML::H2oAutoencoder(
# Select the service
AnomalyDetection = TRUE,
DimensionReduction = TRUE,
# Data related args
data = data,
ValidationData = NULL,
Features = names(data)[2L:ncol(data)],
RemoveFeatures = FALSE,
# H2O args
NThreads = max(1L, parallel::detectCores()-2L),
MaxMem = "28G",
H2oShutdown = TRUE,
ModelID = "TestModel",
LayerStructure = NULL,
ReturnLayer = 4L,
per_feature = TRUE,
Activation = "Tanh",
Epochs = 5L,
L2 = 0.10,
ElasticAveraging = TRUE,
ElasticAveragingMovingRate = 0.90,
ElasticAveragingRegularization = 0.001)
# Inspect output
Data <- Output$Data
Model <- Output$Model
# If ValidationData is not null
ValidationData <- Output$ValidationData
# }
Run the code above in your browser using DataLab