# NOT RUN {
library(recipes)
library(modeldata)
data(okc_text)
okc_rec <- recipe(~ ., data = okc_text) %>%
step_tokenize(essay0) %>%
step_stem(essay0)
okc_obj <- okc_rec %>%
prep()
juice(okc_obj, essay0) %>%
slice(1:2)
juice(okc_obj) %>%
slice(2) %>%
pull(essay0)
tidy(okc_rec, number = 2)
tidy(okc_obj, number = 2)
# Using custom stemmer. Here a custom stemmer that removes the last letter
# if it is a "s".
remove_s <- function(x) gsub("s$", "", x)
okc_rec <- recipe(~ ., data = okc_text) %>%
step_tokenize(essay0) %>%
step_stem(essay0, custom_stemmer = remove_s)
okc_obj <- okc_rec %>%
prep()
juice(okc_obj, essay0) %>%
slice(1:2)
juice(okc_obj) %>%
slice(2) %>%
pull(essay0)
# }
Run the code above in your browser using DataLab