## Not run:
# library(readr)
# temp <- tempfile()
# download.file('http://mattmahoney.net/dc/text8.zip', temp)
# text8 <- read_lines(unz(temp, "text8"))
# it <- itoken(text8, preprocess_function = identity,
# tokenizer = function(x) strsplit(x, " ", TRUE))
# vocab <- vocabulary(it) %>%
# prune_vocabulary(term_count_min = 5)
#
# it <- itoken(text8, preprocess_function = identity,
# tokenizer = function(x) strsplit(x, " ", TRUE))
#
# tcm <- create_tcm(it, vocab_vectorizer(vocab, grow_dtm = FALSE, skip_grams_window = 5L))
#
# # use the following command to manually set number of threads (if you want)
# # by default glove will use all available CPU cores
# # RcppParallel::setThreadOptions(numThreads = 8)
# fit <- glove(tcm = tcm, shuffle_seed = 1L, word_vectors_size = 50,
# x_max = 10, learning_rate = 0.2,
# num_iters = 50, grain_size = 1e5,
# max_cost = 100, convergence_threshold = 0.005)
# word_vectors <- fit$word_vectors[[1]] + fit$word_vectors[[2]]
# rownames(word_vectors) <- rownames(tcm)
# qlst <- prepare_analogy_questions('./questions-words.txt', rownames(word_vectors))
# res <- check_analogy_accuracy(questions_lst = qlst, m_word_vectors = word_vectors)
# ## End(Not run)
Run the code above in your browser using DataCamp Workspace