# NOT RUN {
corpus <-
list(a = c("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"),
b = c("the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog",
"the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"))
vocab(corpus)
vocab(corpus, ngram = 3)
vocab(corpus, ngram = c(2, 3))
v <- vocab(corpus)
extra_corpus <- list(extras = c("apples", "oranges"))
v <- update_vocab(v, extra_corpus)
v
prune_vocab(v, max_terms = 7)
prune_vocab(v, term_count_min = 2)
prune_vocab(v, max_terms = 7, nbuckets = 2)
# }
Run the code above in your browser using DataLab