if(require(tokenizers.bpe)){
library(tokenizers.bpe)
data(belgium_parliament, package = "tokenizers.bpe")
x <- subset(belgium_parliament, language %in% "french")
x <- subset(x, nchar(text) > 0 & txt_count_words(text) < 1000)
model <- paragraph2vec(x = x, type = "PV-DM", dim = 15, iter = 5)
# \donttest{
model <- paragraph2vec(x = x, type = "PV-DBOW", dim = 100, iter = 20)
# }
embedding <- as.matrix(model, which = "docs")
embedding <- as.matrix(model, which = "words")
embedding <- as.matrix(model, which = "docs", normalize = FALSE)
embedding <- as.matrix(model, which = "words", normalize = FALSE)
} # End of main if statement running only if the required packages are installed
Run the code above in your browser using DataLab