## Not run:
# data("movie_review")
#
# # single threadx
#
# tokens <- movie_review$review %>% tolower %>% word_tokenizer
# it <- itoken(tokens)
# v <- create_vocabulary(jobs)
# vectorizer <- vocab_vectorizer(v, grow_dtm = FALSE, skip_grams_window = 3L)
# tcm <- create_tcm(itoken(tokens), vectorizer)
#
# # parallel version
#
# # set to number of cores on your machine
# N_WORKERS <- 1
# splits <- split_into(movie_review$review, N_WORKERS)
# jobs <- lapply(splits, itoken, tolower, word_tokenizer)
# v <- create_vocabulary(jobs)
# vectorizer <- vocab_vectorizer(v, grow_dtm = FALSE, skip_grams_window = 3L)
# jobs <- lapply(splits, itoken, tolower, word_tokenizer)
# doParallel::registerDoParallel(N_WORKERS)
# tcm <- create_tcm(jobs, vectorizer)
# ## End(Not run)
Run the code above in your browser using DataCamp Workspace