# NOT RUN {
# Classification Example
Correl <- 0.85
aa <- data.table::data.table(target = runif(1000))
aa[, x1 := qnorm(target)]
aa[, x2 := runif(1000)]
aa[, Independent_Variable1 := log(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable2 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable3 := exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2))))]
aa[, Independent_Variable5 := sqrt(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable6 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.10]
aa[, Independent_Variable7 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.25]
aa[, Independent_Variable8 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.75]
aa[, Independent_Variable9 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^2]
aa[, Independent_Variable10 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^4]
aa[, ':=' (x1 = NULL, x2 = NULL)]
aa[, target := as.factor(ifelse(target > 0.5,1,0))]
Construct <- data.table::data.table(Targets = rep("target",3),
Distribution = c("bernoulli",
"bernoulli",
"bernoulli"),
Loss = c("AUC","AUC","CrossEntropy"),
Quantile = rep(NA,3),
ModelName = c("GBM","DRF","DL"),
Algorithm = c("gbm",
"randomForest",
"deeplearning"),
dataName = rep("aa",3),
TargetCol = rep(c("1"),3),
FeatureCols = rep(c("2:11"),3),
CreateDate = rep(Sys.time(),3),
GridTune = rep(FALSE,3),
ExportValidData = rep(TRUE,3),
ParDep = rep(2,3),
PD_Data = rep("All",3),
ThreshType = rep("f1",3),
FSC = rep(0.001,3),
tpProfit = rep(NA,3),
tnProfit = rep(NA,3),
fpProfit = rep(NA,3),
fnProfit = rep(NA,3),
SaveModel = rep(FALSE,3),
SaveModelType = c("Mojo","standard","mojo"),
PredsAllData = rep(TRUE,3),
TargetEncoding = rep(NA,3),
SupplyData = rep(FALSE,3))
AutoH2OModeler(Construct,
max_memory = "28G",
ratios = 0.75,
BL_Trees = 500,
nthreads = 5,
model_path = NULL,
MaxRuntimeSeconds = 3600,
MaxModels = 30,
TrainData = NULL,
TestData = NULL,
SaveToFile = FALSE,
ReturnObjects = TRUE)
# Multinomial Example
Correl <- 0.85
aa <- data.table::data.table(target = runif(1000))
aa[, x1 := qnorm(target)]
aa[, x2 := runif(1000)]
aa[, Independent_Variable1 := log(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable2 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable3 := exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2))))]
aa[, Independent_Variable5 := sqrt(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable6 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.10]
aa[, Independent_Variable7 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.25]
aa[, Independent_Variable8 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.75]
aa[, Independent_Variable9 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^2]
aa[, Independent_Variable10 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^4]
aa[, ':=' (x1 = NULL, x2 = NULL)]
aa[, target := as.factor(ifelse(target < 0.33,"A",ifelse(target < 0.66, "B","C")))]
Construct <- data.table::data.table(Targets = rep("target",3),
Distribution = c("multinomial",
"multinomial",
"multinomial"),
Loss = c("auc","logloss","accuracy"),
Quantile = rep(NA,3),
ModelName = c("GBM","DRF","DL"),
Algorithm = c("gbm",
"randomForest",
"deeplearning"),
dataName = rep("aa",3),
TargetCol = rep(c("1"),3),
FeatureCols = rep(c("2:11"),3),
CreateDate = rep(Sys.time(),3),
GridTune = rep(FALSE,3),
ExportValidData = rep(TRUE,3),
ParDep = rep(NA,3),
PD_Data = rep("All",3),
ThreshType = rep("f1",3),
FSC = rep(0.001,3),
tpProfit = rep(NA,3),
tnProfit = rep(NA,3),
fpProfit = rep(NA,3),
fnProfit = rep(NA,3),
SaveModel = rep(FALSE,3),
SaveModelType = c("Mojo","standard","mojo"),
PredsAllData = rep(TRUE,3),
TargetEncoding = rep(NA,3),
SupplyData = rep(FALSE,3))
AutoH2OModeler(Construct,
max_memory = "28G",
ratios = 0.75,
BL_Trees = 500,
nthreads = 5,
model_path = NULL,
MaxRuntimeSeconds = 3600,
MaxModels = 30,
TrainData = NULL,
TestData = NULL,
SaveToFile = FALSE,
ReturnObjects = TRUE)
# Regression Example
Correl <- 0.85
aa <- data.table::data.table(target = runif(1000))
aa[, x1 := qnorm(target)]
aa[, x2 := runif(1000)]
aa[, Independent_Variable1 := log(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable2 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable3 := exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2))))]
aa[, Independent_Variable5 := sqrt(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable6 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.10]
aa[, Independent_Variable7 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.25]
aa[, Independent_Variable8 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.75]
aa[, Independent_Variable9 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^2]
aa[, Independent_Variable10 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^4]
aa[, ':=' (x1 = NULL, x2 = NULL)]
Construct <- data.table::data.table(Targets = rep("target",3),
Distribution = c("gaussian",
"gaussian",
"gaussian"),
Loss = c("MSE","MSE","Quadratic"),
Quantile = rep(NA,3),
ModelName = c("GBM","DRF","DL"),
Algorithm = c("gbm",
"randomForest",
"deeplearning"),
dataName = rep("aa",3),
TargetCol = rep(c("1"),3),
FeatureCols = rep(c("2:11"),3),
CreateDate = rep(Sys.time(),3),
GridTune = rep(FALSE,3),
ExportValidData = rep(TRUE,3),
ParDep = rep(2,3),
PD_Data = rep("All",3),
ThreshType = rep("f1",3),
FSC = rep(0.001,3),
tpProfit = rep(NA,3),
tnProfit = rep(NA,3),
fpProfit = rep(NA,3),
fnProfit = rep(NA,3),
SaveModel = rep(FALSE,3),
SaveModelType = c("Mojo","standard","mojo"),
PredsAllData = rep(TRUE,3),
TargetEncoding = rep(NA,3),
SupplyData = rep(FALSE,3))
AutoH2OModeler(Construct,
max_memory = "28G",
ratios = 0.75,
BL_Trees = 500,
nthreads = 5,
model_path = NULL,
MaxRuntimeSeconds = 3600,
MaxModels = 30,
TrainData = NULL,
TestData = NULL,
SaveToFile = FALSE,
ReturnObjects = TRUE)
# Quantile Regression Example
Correl <- 0.85
aa <- data.table::data.table(target = runif(1000))
aa[, x1 := qnorm(target)]
aa[, x2 := runif(1000)]
aa[, Independent_Variable1 := log(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable2 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable3 := exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2))))]
aa[, Independent_Variable5 := sqrt(pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))]
aa[, Independent_Variable6 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.10]
aa[, Independent_Variable7 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.25]
aa[, Independent_Variable8 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^0.75]
aa[, Independent_Variable9 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^2]
aa[, Independent_Variable10 := (pnorm(Correl * x1 +
sqrt(1-Correl^2) * qnorm(x2)))^4]
aa[, ':=' (x1 = NULL, x2 = NULL)]
Construct <- data.table::data.table(Targets = rep("target",3),
Distribution = c("quantile",
"quantile"),
Loss = c("MAE","Absolute"),
Quantile = rep(0.75,2),
ModelName = c("GBM","DL"),
Algorithm = c("gbm",
"deeplearning"),
dataName = rep("aa",2),
TargetCol = rep(c("1"),2),
FeatureCols = rep(c("2:11"),2),
CreateDate = rep(Sys.time(),2),
GridTune = rep(FALSE,2),
ExportValidData = rep(TRUE,2),
ParDep = rep(4,2),
PD_Data = rep("All",2),
ThreshType = rep("f1",2),
FSC = rep(0.001,2),
tpProfit = rep(NA,2),
tnProfit = rep(NA,2),
fpProfit = rep(NA,2),
fnProfit = rep(NA,2),
SaveModel = rep(FALSE,2),
SaveModelType = c("Mojo","mojo"),
PredsAllData = rep(TRUE,2),
TargetEncoding = rep(NA,2),
SupplyData = rep(FALSE,2))
AutoH2OModeler(Construct,
max_memory = "28G",
ratios = 0.75,
BL_Trees = 500,
nthreads = 5,
model_path = NULL,
MaxRuntimeSeconds = 3600,
MaxModels = 30,
TrainData = NULL,
TestData = NULL,
SaveToFile = FALSE,
ReturnObjects = TRUE)
# }
Run the code above in your browser using DataLab