# NOT RUN {
data("movie_review")
tokens = movie_review$review %>%
tolower %>%
word_tokenizer
v = create_vocabulary(itoken(tokens)) %>%
prune_vocabulary(term_count_min = 5, doc_proportion_max = 0.5)
corpus = create_corpus(itoken(tokens), vocab_vectorizer(v, skip_grams_window = 5))
dtm = get_dtm(corpus)
tcm = get_tcm(corpus)
glove_model = GloVe$new(word_vectors_size = 50, vocabulary = v, x_max = 10)
wv = glove_model$fit(tcm, n_iter = 10)
rwmd_model = RWMD(wv)
rwmd_dist = dist2(dtm[1:10, ], dtm[1:100, ], method = rwmd_model, norm = 'none')
# }
Run the code above in your browser using DataLab