# NOT RUN {
data("movie_review")
# single thread
tokens = movie_review$review %>% tolower %>% word_tokenizer
it = itoken(tokens)
v = create_vocabulary(jobs)
vectorizer = vocab_vectorizer(v, grow_dtm = FALSE, skip_grams_window = 3L)
tcm = create_tcm(itoken(tokens), vectorizer)
# parallel version
# set to number of cores on your machine
N_WORKERS = 1
splits = split_into(movie_review$review, N_WORKERS)
jobs = lapply(splits, itoken, tolower, word_tokenizer)
v = create_vocabulary(jobs)
vectorizer = vocab_vectorizer(v, grow_dtm = FALSE, skip_grams_window = 3L)
jobs = lapply(splits, itoken, tolower, word_tokenizer)
doParallel::registerDoParallel(N_WORKERS)
tcm = create_tcm(jobs, vectorizer)
# }
Run the code above in your browser using DataLab