# NOT RUN {
# Single group variable and xregs ----
# Load Walmart Data from Dropbox----
data <- data.table::fread(
'https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1')
# Subset for Stores / Departments With Full Series
data <- data[, Counts := .N, by = c('Store','Dept')][Counts == 143][
, Counts := NULL]
# Subset Columns (remove IsHoliday column)----
keep <- c('Store','Dept','Date','Weekly_Sales')
data <- data[, ..keep]
data <- data[Store == 1][, Store := NULL]
xregs <- data.table::copy(data)
data.table::setnames(xregs, 'Dept', 'GroupVar')
data.table::setnames(xregs, 'Weekly_Sales', 'Other')
data <- data[as.Date(Date) < as.Date('2012-09-28')]
# Add zeros for testing
data[runif(.N) < 0.25, Weekly_Sales := 0]
# Build forecast
CatBoostResults <- RemixAutoML::AutoCatBoostHurdleCARMA(
# data args
data = data, # TwoGroup_Data,
TargetColumnName = 'Weekly_Sales',
DateColumnName = 'Date',
HierarchGroups = NULL,
GroupVariables = c('Dept'),
TimeWeights = 1,
TimeUnit = 'weeks',
TimeGroups = c('weeks','months'),
# Production args
TrainOnFull = FALSE,
SplitRatios = c(1 - 20 / 138, 10 / 138, 10 / 138),
PartitionType = 'random',
FC_Periods = 4,
Timer = TRUE,
DebugMode = TRUE,
# Target transformations
TargetTransformation = TRUE,
Methods = c('BoxCox', 'Asinh', 'Asin', 'Log',
'LogPlus1', 'Sqrt', 'Logit', 'YeoJohnson'),
Difference = FALSE,
NonNegativePred = FALSE,
RoundPreds = FALSE,
# Date features
CalendarVariables = c('week', 'wom', 'month', 'quarter'),
HolidayVariable = c('USPublicHolidays',
'EasterGroup',
'ChristmasGroup','OtherEcclesticalFeasts'),
HolidayLookback = NULL,
HolidayLags = 1,
HolidayMovingAverages = 1:2,
# Time series features
Lags = list('weeks' = seq(2L, 10L, 2L),
'months' = c(1:3)),
MA_Periods = list('weeks' = seq(2L, 10L, 2L),
'months' = c(2,3)),
SD_Periods = NULL,
Skew_Periods = NULL,
Kurt_Periods = NULL,
Quantile_Periods = NULL,
Quantiles_Selected = c('q5','q95'),
# Bonus features
AnomalyDetection = NULL,
XREGS = xregs,
FourierTerms = 2,
TimeTrendVariable = TRUE,
ZeroPadSeries = NULL,
DataTruncate = FALSE,
# ML Args
NumOfParDepPlots = 100L,
EvalMetric = 'RMSE',
GridTune = FALSE,
PassInGrid = NULL,
ModelCount = 5,
TaskType = 'GPU',
NumGPU = 1,
MaxRunsWithoutNewWinner = 50,
MaxRunMinutes = 60*60,
NTrees = 2500,
L2_Leaf_Reg = 3.0,
LearningRate = list('classifier' = seq(0.01,0.25,0.01), 'regression' = seq(0.01,0.25,0.01)),
RandomStrength = 1,
BorderCount = 254,
BootStrapType = c('Bayesian', 'Bernoulli', 'Poisson', 'MVS', 'No'),
Depth = 6)
# Two group variables and xregs
# Load Walmart Data from Dropbox----
data <- data.table::fread(
'https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1')
# Subset for Stores / Departments With Full Series
data <- data[, Counts := .N, by = c('Store','Dept')][Counts == 143][
, Counts := NULL]
# Put negative values at 0
data[, Weekly_Sales := data.table::fifelse(Weekly_Sales < 0, 0, Weekly_Sales)]
# Subset Columns (remove IsHoliday column)----
keep <- c('Store','Dept','Date','Weekly_Sales')
data <- data[, ..keep]
data <- data[Store %in% c(1,2)]
xregs <- data.table::copy(data)
xregs[, GroupVar := do.call(paste, c(.SD, sep = ' ')), .SDcols = c('Store','Dept')]
xregs[, c('Store','Dept') := NULL]
data.table::setnames(xregs, 'Weekly_Sales', 'Other')
xregs[, Other := jitter(Other, factor = 25)]
data <- data[as.Date(Date) < as.Date('2012-09-28')]
# Add some zeros for testing
data[runif(.N) < 0.25, Weekly_Sales := 0]
# Build forecast
Output <- RemixAutoML::AutoCatBoostHurdleCARMA(
# data args
data = data,
TargetColumnName = 'Weekly_Sales',
DateColumnName = 'Date',
HierarchGroups = NULL,
GroupVariables = c('Store','Dept'),
TimeWeights = 1,
TimeUnit = 'weeks',
TimeGroups = c('weeks','months'),
# Production args
TrainOnFull = TRUE,
SplitRatios = c(1 - 20 / 138, 10 / 138, 10 / 138),
PartitionType = 'random',
FC_Periods = 4,
Timer = TRUE,
DebugMode = TRUE,
# Target transformations
TargetTransformation = TRUE,
Methods = c('BoxCox', 'Asinh', 'Asin', 'Log',
'LogPlus1', 'Sqrt', 'Logit', 'YeoJohnson'),
Difference = FALSE,
NonNegativePred = FALSE,
Threshold = NULL,
RoundPreds = FALSE,
# Date features
CalendarVariables = c('week', 'wom', 'month', 'quarter'),
HolidayVariable = c('USPublicHolidays',
'EasterGroup',
'ChristmasGroup','OtherEcclesticalFeasts'),
HolidayLookback = NULL,
HolidayLags = 1,
HolidayMovingAverages = 1:2,
# Time series features
Lags = list('weeks' = seq(2L, 10L, 2L),
'months' = c(1:3)),
MA_Periods = list('weeks' = seq(2L, 10L, 2L),
'months' = c(2,3)),
SD_Periods = NULL,
Skew_Periods = NULL,
Kurt_Periods = NULL,
Quantile_Periods = NULL,
Quantiles_Selected = c('q5','q95'),
# Bonus features
AnomalyDetection = NULL,
XREGS = xregs,
FourierTerms = 2,
TimeTrendVariable = TRUE,
ZeroPadSeries = NULL,
DataTruncate = FALSE,
# ML Args
NumOfParDepPlots = 100L,
EvalMetric = 'RMSE',
GridTune = FALSE,
PassInGrid = NULL,
ModelCount = 5,
TaskType = 'GPU',
NumGPU = 1,
MaxRunsWithoutNewWinner = 50,
MaxRunMinutes = 60*60,
NTrees = list('classifier' = 200, 'regression' = 200),
Depth = list('classifier' = 9, 'regression' = 9),
LearningRate = NULL,
L2_Leaf_Reg = NULL,
RandomStrength = list('classifier' = 1, 'regression' = 1),
BorderCount = list('classifier' = 254, 'regression' = 254),
BootStrapType = 'Bayesian'
# }
Run the code above in your browser using DataLab