doc <- c("first second", "bla, bla, blaa")
# split by words
word_tokenizer(doc)
#faster, but far less general - perform split by a fixed single whitespace symbol.
regexp_tokenizer(doc, pattern = stringr::fixed(" "))
Run the code above in your browser using DataLab