# NOT RUN {
# Create fake data with 10 categorical
data <- RemixAutoML::FakeDataGenerator(
Correlation = 0.85,
N = 1000000,
ID = 2L,
ZIP = 0,
FactorCount = 10L,
AddDate = FALSE,
Classification = TRUE,
MultiClass = FALSE)
# Take your pick
Meth <- c('m_estimator',
'credibility',
'woe',
'target_encoding',
'poly_encode',
'backward_difference',
'helmert')
# Pass to function
MethNum <- 1
# Mock test data with same factor levels
test <- data.table::copy(data)
# Run in Train Mode
data <- RemixAutoML::CategoricalEncoding(
data = data,
ML_Type = "classification",
GroupVariables = paste0("Factor_", 1:10),
TargetVariable = "Adrian",
Method = Meth[MethNum],
SavePath = getwd(),
Scoring = FALSE,
ReturnFactorLevelList = FALSE,
SupplyFactorLevelList = NULL,
KeepOriginalFactors = FALSE)
# View results
print(data)
# Run in Score Mode by pulling in the csv's
test <- RemixAutoML::CategoricalEncoding(
data = data,
ML_Type = "classification",
GroupVariables = paste0("Factor_", 1:10),
TargetVariable = "Adrian",
Method = Meth[MethNum],
SavePath = getwd(),
Scoring = TRUE,
ImputeValueScoring = 222,
ReturnFactorLevelList = FALSE,
SupplyFactorLevelList = NULL,
KeepOriginalFactors = FALSE)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab