library(quanteda)
# note, cr_sample_corpus is too small to produce sensical word vectors
# tokenize
toks <- tokens(cr_sample_corpus)
# construct feature-co-occurrence matrix
toks_fcm <- fcm(toks, context = "window", window = 6,
count = "weighted", weights = 1 / (1:6), tri = FALSE)
# you will generally want to estimate a new (corpus-specific)
# GloVe model, we will use cr_glove_subset instead
# see the Quick Start Guide to see a full example.
# estimate transform
local_transform <- compute_transform(x = toks_fcm,
pre_trained = cr_glove_subset, weighting = 'log')
Run the code above in your browser using DataLab