# NOT RUN {
# XGBoost----
# Define file path
Path <- "C:/Users/aantico/Documents/Package/GUI_Package"
# Create hurdle data with correlated features
data <- RemixAutoML::FakeDataGenerator(
Correlation = 0.70,
N = 25000,
ID = 3,
FactorCount = 2L,
AddDate = TRUE,
ZIP = 1,
Classification = FALSE,
MultiClass = FALSE)
# Define features
Features <- names(data)[!names(data) %chin%
c("Adrian","IDcol_1","IDcol_2","IDcol_3","DateTime")]
# Build hurdle model
Output <- RemixAutoML::AutoXGBoostHurdleModel(
# Operationalization args
TreeMethod = "hist",
TrainOnFull = FALSE,
PassInGrid = NULL,
# Metadata args
NThreads = max(1L, parallel::detectCores()-2L),
ModelID = "ModelTest",
Paths = normalizePath(Path),
MetaDataPaths = NULL,
ReturnModelObjects = TRUE,
# data args
data,
ValidationData = NULL,
TestData = NULL,
Buckets = c(0),
TargetColumnName = "Adrian",
FeatureColNames = Features,
IDcols = c("IDcol_1","IDcol_2","IDcol_3"),
# options
TransformNumericColumns = NULL,
SplitRatios = c(0.70, 0.20, 0.10),
SaveModelObjects = TRUE,
NumOfParDepPlots = 10L,
# grid tuning args
GridTune = FALSE,
grid_eval_metric = "accuracy",
MaxModelsInGrid = 1L,
BaselineComparison = "default",
MaxRunsWithoutNewWinner = 10L,
MaxRunMinutes = 60L,
# bandit hyperparameters
Trees = 100L,
eta = seq(0.05,0.40,0.05),
max_depth = seq(4L, 16L, 2L),
# random hyperparameters
min_child_weight = seq(1.0, 10.0, 1.0),
subsample = seq(0.55, 1.0, 0.05),
colsample_bytree = seq(0.55, 1.0, 0.05))
# Score XGBoost Hurdle Model
HurdleScores <- RemixAutoML::AutoHurdleScoring(
TestData = data,
Path = Path,
ModelID = "ModelTest",
ModelClass = "xgboost",
ModelList = NULL,
ArgList = NULL,
Threshold = NULL)
# }
Run the code above in your browser using DataLab