require(quanteda)
# split into sentences
corp <- corpus_reshape(data_corpus_ungd2017)
# tokenize
toks <- tokens(corp, remove_punct = TRUE) %>%
tokens_remove(stopwords("en"))
# apply seed dictionary
toks_dict <- tokens_lookup(toks, data_dictionary_topic)
# form dfm
dfmt_feat <- dfm(toks)
dfmt_dict <- dfm(toks_dict)
# fit wordmap model
map <- textmodel_wordmap(dfmt_feat, dfmt_dict)
coef(map)
predict(map)
Run the code above in your browser using DataLab