# NOT RUN {
# categorical example
set.seed(23525)
# we set up our raw training and application data
dTrainC <- data.frame(
x = c('a', 'a', 'a', 'b', 'b', NA, NA),
z = c(1, 2, 3, 4, NA, 6, NA),
y = c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE))
dTestC <- data.frame(
x = c('a', 'b', 'c', NA),
z = c(10, 20, 30, NA))
# we perform a vtreat cross frame experiment
# and unpack the results into treatmentsC
# and dTrainCTreated
unpack[
treatmentsC = treatments,
dTrainCTreated = crossFrame
] <- mkCrossFrameCExperiment(
dframe = dTrainC,
varlist = setdiff(colnames(dTrainC), 'y'),
outcomename = 'y',
outcometarget = TRUE,
verbose = FALSE)
# the treatments include a score frame relating new
# derived variables to original columns
treatmentsC$scoreFrame[, c('origName', 'varName', 'code', 'rsq', 'sig', 'extraModelDegrees')] %.>%
print(.)
# the treated frame is a "cross frame" which
# is a transform of the training data built
# as if the treatment were learned on a different
# disjoint training set to avoid nested model
# bias and over-fit.
dTrainCTreated %.>%
head(.) %.>%
print(.)
# Any future application data is prepared with
# the prepare method.
dTestCTreated <- prepare(treatmentsC, dTestC, pruneSig=NULL)
dTestCTreated %.>%
head(.) %.>%
print(.)
# }
Run the code above in your browser using DataCamp Workspace