# \donttest{
require(seededlda)
require(quanteda)
corp <- head(data_corpus_moviereviews, 500)
toks <- tokens(corp, remove_punct = TRUE, remove_symbols = TRUE, remove_number = TRUE)
dfmt <- dfm(toks) %>%
dfm_remove(stopwords('en'), min_nchar = 2) %>%
dfm_trim(min_termfreq = 0.90, termfreq_type = "quantile",
max_docfreq = 0.1, docfreq_type = "prop")
# unsupervised LDA
lda <- textmodel_lda(head(dfmt, 450), 6)
terms(lda)
topics(lda)
lda2 <- textmodel_lda(tail(dfmt, 50), model = lda) # new documents
topics(lda2)
# semisupervised LDA
dict <- dictionary(list(people = c("family", "couple", "kids"),
space = c("alien", "planet", "space"),
moster = c("monster*", "ghost*", "zombie*"),
war = c("war", "soldier*", "tanks"),
crime = c("crime*", "murder", "killer")))
slda <- textmodel_seededlda(dfmt, dict, residual = TRUE, min_termfreq = 10)
terms(slda)
topics(slda)
# }
Run the code above in your browser using DataLab