# NOT RUN {
require(quanteda)
data("data_corpus_moviereviews", package = "quanteda.textmodels")
corp <- head(data_corpus_moviereviews, 500)
dfmt <- dfm(corp, remove_number = TRUE) %>%
dfm_remove(stopwords('en'), min_nchar = 2) %>%
dfm_trim(min_termfreq = 0.90, termfreq_type = "quantile",
max_docfreq = 0.1, docfreq_type = "prop")
# unsupervised LDA
lda <- textmodel_lda(dfmt, 6)
terms(lda)
# semisupervised LDA
dict <- dictionary(list(people = c("family", "couple", "kids"),
space = c("areans", "planet", "space"),
moster = c("monster*", "ghost*", "zombie*"),
war = c("war", "soldier*", "tanks"),
crime = c("crime*", "murder", "killer")))
slda <- textmodel_seededlda(dfmt, dict, residual = TRUE)
terms(slda)
# }
Run the code above in your browser using DataLab