library(quanteda)
# tokenize corpus
toks <- tokens(cr_sample_corpus)
# build a tokenized corpus of contexts sorrounding a target term
immig_toks <- tokens_context(x = toks, pattern = "immigr*", window = 6L)
# build document-feature matrix
immig_dfm <- dfm(immig_toks)
# construct document-embedding-matrix
immig_dem <- dem(immig_dfm, pre_trained = cr_glove_subset,
transform = TRUE, transform_matrix = cr_transform, verbose = FALSE)
# to get a random sample
immig_wv_party <- dem_sample(immig_dem, size = 10,
replace = TRUE, by = "party")
# also works
immig_wv_party <- dem_sample(immig_dem, size = 10,
replace = TRUE, by = immig_dem@docvars$party)
Run the code above in your browser using DataLab