AutoCARMA_QA
AutoCARMA_QA(
ModelName = "catboost",
FeatureGridTune = FALSE,
MaxMem_ = "28G",
NThreads_ = max(1, parallel::detectCores() - 2),
TreeMethod__ = "hist",
TestRows = "ALL",
DataPath = "C:/Users/Bizon/Documents/GitHub/QA_DataSets",
dataForecastX = "CARMA-WALMART-2GroupVars_FC.csv",
dataX = "OneGroup-Eval-Walmart.csv",
XREGSX = "CARMA-WALMART-2GroupVars-XREGS_2Var.csv",
TargetColumnName_ = "Weekly_Sales",
DateColumnName_ = "Date",
HierarchGroups_ = c("Store", "Dept"),
GroupVariables_ = c("Store", "Dept"),
TimeUnit_ = "week",
TimeGroups_ = c("week", "month", "quarter"),
ZeroPadSeries_ = NULL,
DataTruncate_ = FALSE,
SplitRatios_ = c(1 - 3/143, 3/143),
PartitionType_ = "timeseries",
TrainOnFull_ = FALSE,
FC_Periods_ = 4,
EvalMetric_ = "RMSE",
GridTune_ = FALSE,
GridEvalMetric_ = "mae",
ModelCount_ = 5,
TaskType_ = "GPU",
Timer_ = TRUE,
TargetTransformation_ = TRUE,
Difference_ = TRUE,
CalendarVariables_ = TRUE,
HolidayVariable_ = TRUE,
HolidayLags_ = 1,
HolidayMovingAverages_ = 1:2,
Lags_ = c(1:5),
MA_Periods_ = c(1:5),
SD_Periods_ = c(2:5),
Skew_Periods_ = c(3:5),
Kurt_Periods_ = c(4:5),
Quantile_Periods_ = c(3:5),
Quantiles_Selected_ = c("q5", "q95"),
FourierTerms_ = 4,
TimeTrendVariable_ = TRUE,
NTrees_ = 150,
DebugMode_ = TRUE,
OptionsWarn = 1
)
Choose from 'catboost', 'h2odrf', 'h2ogbm', 'h2oglm', 'h2oautoml', 'xgboost'
Set to TRUE to only run in evaluation model opposed to TrainOnFull model which does not return model performance measures
= "28G"
= parallel::detectCores() - 2
= "hist" or "gpu_hist" for xgboost carma
= "ALL" to run all tests (see example for all tests), or a numeric vector with the row numbers from the test list (see example)
In quotes, provide the file path to where your data is stored
= "RawDataXREG.csv" Use quotes. # Be aware that grouped data and using XREGS_ requires that your joining group variables have the same name. MUST SUPPLY VALUE
= "RawDataXREG.csv" Use quotes. # Be aware that grouped data and using XREGS_ requires that your joining group variables have the same name. MUST SUPPLY VALUE
= "XREG.csv" Use quotes. # data.table with ONLY 3 COLUMN TYPES: 1: - GroupVariables_ and DateColumnName_ join-by variables with matching join column names and data types compared to data_ and; 2 - features - needs to exist for all historical periods matching data_ along with a sufficient amount of data to cover the forecast period as defined by FC_Periods_. OR Supply NULL to arg.
= "Weekly_Sales" # WalmartData target column name.
= "Date" # Name of data_ date column name.
= c("Store","Dept") # NULL otherwise
= c("Store","Dept") #
= "week" # Choices include "1min", "5min", "10min", "15min", "30min", "hour", "day", "week", "month", "quarter", "year"
= c("weeks","months","quarter") # These will tell GDL to build gdl features along the time aggregation dimension
= c('NULL', 'all', 'inner') ZeroPadSeries choose "all", "inner", or NULL. 'Outer' grows missing dates by group to the largest of all groups size. 'Inner' fills in series by using the group level's own max and min values (versus filling all group levels to the max value of the groups level with the widest time gap)
= FALSE # TRUE will truncate all rows where GDL columns produced a -1 (remove all rows where ID < max(rolling stats)). FALSE otherwise.
= c(1 - 10 / 143, 10 / 143) # If you have GroupVariables_ then base it on number of records in a group, like default
= "timeseries" # always time series for this function. Place holder for other time series options down the road.
= FALSE # Set to TRUE put in Forecase mode. FALSE to put in Evaluation mode. Forecast mode generates forecasts based on a model built using all of data_, and no evaluation metrics are collected when set to TRUE. Evaluation mode will build a forecast for your validation periods and collect the holdout metrics and other evaluation objects, but no future forecast beyond max date of data_. as specified in SplitRatios_.
= 4 # Self explanatory
= "RMSE" # "RMSE" only with catboost 17.5
= FALSE # NEEDS TO BE UPDATED ONCE BANDIT GRID TUNING WORKS.
= "mae" # 'poisson', 'mae', 'mape', 'mse', 'msle', 'kl', 'cs', 'r2'. If metric computation fails then no output is generated in final metric evaluation data.table
= 5 # NEEDS TO BE UPDATED ONCE BANDIT GRID TUNING WORKS.
= "GPU" # Set to "CPU" to train on CPU versus GPU. Must supply a value.
= TRUE # Print out the forecast step the function is currently working on. If it errors on the first run scoring the model then it is likely a very different error then if has printed "Forecasting 1:"
= TRUE # Set to TRUE to have every available numeric transformation compete for best normalization fit to normal distribution
= TRUE # The I in ARIMA. Works for single series and grouped series a.k.a. panel data.
= TRUE # This TURNS ON procedure to create numeric calendar variables that your TimeUnit_ directs. FALSE otherwise.
= TRUE # This TURNS ON procedure to create a numeric holiday count variable. FALSE otherwise.
= c(1:2) # Supply a numeric vector of lag periods
= c(1:2) # Supply a numeric vector of Moving Average periods
= c(1:5) # Numeric vector of lag periods
= c(1:5) # Numeric vector of lag periods
= c(2:5) # Numeric vector of lag periods
= c(3:5) # Numeric vector of lag periods
= c(4:5) # Numeric vector of lag periods
= c(3:5) # Numeric vector of lag periods
= c("q5","q95") # Select the quantiles you want calculated. "q5", "q10", ..., "q95".
= 2 # (TECHINICALLY FOURIER PAIRS) Hierarchy grouping (full group variable interaction set) is ran by default (MAKE INTO OPTIOn). Uses parallelization to loop through the unique set of all GroupVariables levels and computes fourier terms as if the group level's are a single series; just for all groups and it's parallelized.
= TRUE # Set to TRUE to have a sequence created from 1 to nrow by group or single series
= 150 # Number of trees to have trained. Can be 10000 or more depending on group level size.
= TRUE # When TRUE it will print every comment section header line. When it crashes, you can get a print out of the last N steps that were ran, depending on the print max limit.
Set to 1 to print warnings immediately to screen versus after a function finishes; 2 to kill processes if a warning occurs. See options(warn = )