data("movie_review")
txt <- movie_review$review[1:100]
ids <- movie_review$id[1:100]
it <- itoken(txt, tolower, word_tokenizer, chunks_number = 10)
it <- itoken(txt, tolower, word_tokenizer, chunks_number = 10, ids = ids)
# Example of stemming tokenizer
# stem_tokenizer <- function(x) {
# word_tokenizer(x) %>% lapply(SnowballC::wordStem('en'))
# }
Run the code above in your browser using DataLab