# NOT RUN {
require(dplyr)
data(obama)
# Top words in the first Obama S.O.T.U., using all tokens
tfidf <- cnlp_get_tfidf(obama)
vids <- order(tfidf[1,], decreasing = TRUE)[1:10]
colnames(tfidf)[vids]
# Top words, only using non-proper nouns
tfidf <- cnlp_get_token(obama) %>%
filter(pos %in% c("NN", "NNS")) %>%
cnlp_get_tfidf()
vids <- order(tfidf[1,], decreasing = TRUE)[1:10]
colnames(tfidf)[vids]
# }
Run the code above in your browser using DataLab