if(require(tokenizers.bpe)){
library(tokenizers.bpe)
data(belgium_parliament, package = "tokenizers.bpe")
x <- subset(belgium_parliament, language %in% "french")
x <- subset(x, nchar(text) > 0 & txt_count_words(text) < 1000)
# \donttest{
model <- paragraph2vec(x = x, type = "PV-DM", dim = 100, iter = 20)
model <- paragraph2vec(x = x, type = "PV-DBOW", dim = 100, iter = 20)
# }
# \dontshow{
model <- paragraph2vec(x = head(x, 5),
type = "PV-DM", dim = 5, iter = 1, min_count = 0)
# }
path <- "mymodel.bin"
# \dontshow{
path <- tempfile(pattern = "paragraph2vec", fileext = ".bin")
# }
write.paragraph2vec(model, file = path)
model <- read.paragraph2vec(file = path)
vocab <- summary(model, type = "vocabulary", which = "docs")
vocab <- summary(model, type = "vocabulary", which = "words")
embedding <- as.matrix(model, which = "docs")
embedding <- as.matrix(model, which = "words")
# \dontshow{
file.remove(path)
# }
} # End of main if statement running only if the required packages are installed
Run the code above in your browser using DataLab