# NOT RUN {
library(modeldata)
data(tate_text)
dummies <- recipe(~ artist + medium, data = tate_text) %>%
step_dummy_extract(artist, medium, sep = ", ") %>%
prep()
dummy_data <- bake(dummies, new_data = NULL)
dummy_data %>%
select(starts_with("medium")) %>%
names()
# More detailed splitting
dummies_specific <- recipe(~ medium, data = tate_text) %>%
step_dummy_extract(medium, sep = "(, )|( and )|( on )") %>%
prep()
dummy_data_specific <- bake(dummies_specific, new_data = NULL)
dummy_data_specific %>%
select(starts_with("medium")) %>%
names()
tidy(dummies, number = 1)
tidy(dummies_specific, number = 1)
# pattern argument can be useful to extract harder patterns
color_examples <- tibble(
colors = c("['red', 'blue']",
"['red', 'blue', 'white']",
"['blue', 'blue', 'blue']")
)
dummies_color <- recipe(~ colors, data = color_examples) %>%
step_dummy_extract(colors, pattern = "(?<=')[^',]+(?=')") %>%
prep()
dommies_data_color <- dummies_color %>%
bake(new_data = NULL)
dommies_data_color
# }
Run the code above in your browser using DataLab