toy_corpus <- "Once upon a time there was a tiny toy corpus.
It consisted of three sentences. And it lived happily ever after."
flist <- freqlist(toy_corpus, re_token_splitter = "\\W+", as_text = TRUE)
print(flist, n = 1000)
(sel_types <- as_types(c("happily", "lived", "once")))
keep_types(flist, sel_types)
tks <- tokenize(toy_corpus, re_token_splitter = "\\W+")
print(tks, n = 1000)
tks[3:12] # idx is relative to selection
head(tks) # idx is relative to selection
tail(tks) # idx is relative to selection
Run the code above in your browser using DataLab