# NOT RUN {
#########################################
# Feature Engineering for Model Training
#########################################
# Create fake data
data <- RemixAutoML::FakeDataGenerator(
Correlation = 0.70,
N = 50000,
ID = 2L,
FactorCount = 2L,
AddDate = TRUE,
ZIP = 0L,
TimeSeries = FALSE,
ChainLadderData = FALSE,
Classification = FALSE,
MultiClass = FALSE)
# Print number of columns
print(ncol(data))
# Store names of numeric and integer cols
Cols <-names(data)[c(which(unlist(lapply(data, is.numeric))),
which(unlist(lapply(data, is.integer))))]
# Model Training Feature Engineering
system.time(data <- RemixAutoML::AutoInteraction(
data = data,
NumericVars = Cols,
InteractionDepth = 4,
Center = TRUE,
Scale = TRUE,
SkipCols = NULL,
Scoring = FALSE,
File = getwd()))
# user system elapsed
# 0.30 0.11 0.41
# Print number of columns
print(ncol(data))
########################################
# Feature Engineering for Model Scoring
########################################
# Create fake data
data <- RemixAutoML::FakeDataGenerator(
Correlation = 0.70,
N = 1000,
ID = 2L,
FactorCount = 2L,
AddDate = TRUE,
ZIP = 0L,
TimeSeries = FALSE,
ChainLadderData = FALSE,
Classification = FALSE,
MultiClass = FALSE)
# Print number of columns
print(ncol(data))
# Reduce to single row to mock a scoring scenario
data <- data[1L]
# Model Scoring Feature Engineering
system.time(data <- RemixAutoML::AutoInteraction(
data = data,
NumericVars = names(data)[
c(which(unlist(lapply(data, is.numeric))),
which(unlist(lapply(data, is.integer))))],
InteractionDepth = 4,
Center = TRUE,
Scale = TRUE,
SkipCols = NULL,
Scoring = TRUE,
File = file.path(getwd(), "Standardize.Rdata")))
# user system elapsed
# 0.19 0.00 0.19
# Print number of columns
print(ncol(data))
# }
Run the code above in your browser using DataLab