# NOT RUN {
# Two group variables and xregs
# Load Walmart Data from Dropbox
data <- data.table::fread(
'https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1')
# Filter out zeros
data <- data[Weekly_Sales != 0]
# Subset for Stores / Departments With Full Series
data <- data[, Counts := .N, by = c('Store','Dept')][Counts == 143][
, Counts := NULL]
# Subset Columns (remove IsHoliday column)----
keep <- c('Store','Dept','Date','Weekly_Sales')
data <- data[, ..keep]
data <- data[Store %in% c(1,2)]
xregs <- data.table::copy(data)
xregs[, GroupVar := do.call(paste, c(.SD, sep = ' ')), .SDcols = c('Store','Dept')]
xregs[, c('Store','Dept') := NULL]
data.table::setnames(xregs, 'Weekly_Sales', 'Other')
xregs[, Other := jitter(Other, factor = 25)]
data <- data[as.Date(Date) < as.Date('2012-09-28')]
# Vector CARMA testing
data[, Weekly_Profit := Weekly_Sales * 0.75]
# Build forecast
CatBoostResults <- RemixAutoML::AutoCatBoostVectorCARMA(
# data args
data = data, # TwoGroup_Data,
TargetColumnName = c('Weekly_Sales','Weekly_Profit'),
DateColumnName = 'Date',
HierarchGroups = NULL,
GroupVariables = c('Store','Dept'),
TimeWeights = 1,
TimeUnit = 'weeks',
TimeGroups = c('weeks','months'),
# Production args
TaskType = 'GPU',
NumGPU = 1,
TrainOnFull = TRUE,
SplitRatios = c(1 - 10 / 138, 10 / 138),
PartitionType = 'random',
FC_Periods = 4,
Timer = TRUE,
DebugMode = TRUE,
# Target transformations
TargetTransformation = TRUE,
Methods = c('BoxCox', 'Asinh', 'Asin', 'Log',
'LogPlus1', 'Logit', 'YeoJohnson'),
Difference = FALSE,
NonNegativePred = FALSE,
RoundPreds = FALSE,
# Date features
CalendarVariables = c('week', 'month', 'quarter'),
HolidayVariable = c('USPublicHolidays',
'EasterGroup',
'ChristmasGroup','OtherEcclesticalFeasts'),
HolidayLookback = NULL,
HolidayLags = 1,
HolidayMovingAverages = 1:2,
# Time series features
Lags = list('weeks' = seq(2L, 10L, 2L),
'months' = c(1:3)),
MA_Periods = list('weeks' = seq(2L, 10L, 2L),
'months' = c(2,3)),
SD_Periods = NULL,
Skew_Periods = NULL,
Kurt_Periods = NULL,
Quantile_Periods = NULL,
Quantiles_Selected = c('q5','q95'),
# Bonus features
AnomalyDetection = NULL,
XREGS = xregs,
FourierTerms = 2,
TimeTrendVariable = TRUE,
ZeroPadSeries = NULL,
DataTruncate = FALSE,
# Eval args
NumOfParDepPlots = 100L,
EvalMetric = 'MultiRMSE',
EvalMetricValue = 1.5,
LossFunction = 'MultiRMSE',
LossFunctionValue = 1.5,
# Grid args
GridTune = FALSE,
PassInGrid = NULL,
ModelCount = 5,
MaxRunsWithoutNewWinner = 50,
MaxRunMinutes = 60*60,
# ML Args
NTrees = 1000,
Depth = 6,
LearningRate = NULL,
L2_Leaf_Reg = NULL,
RandomStrength = 1,
BorderCount = 254,
RSM = 1,
BootStrapType = 'Bayesian',
GrowPolicy = 'SymmetricTree',
Langevin = FALSE,
DiffusionTemperature = 10000,
ModelSizeReg = 0.5,
FeatureBorderType = 'GreedyLogSum',
SamplingUnit = 'Group',
SubSample = NULL,
ScoreFunction = 'Cosine',
MinDataInLeaf = 1)
# }
Run the code above in your browser using DataLab