##
## Example loading model from disk
##
folder <- system.file(package = "sentencepiece", "models")
embedding <- file.path(folder, "nl.wiki.bpe.vs1000.d25.w2v.bin")
model <- file.path(folder, "nl.wiki.bpe.vs1000.model")
encoder <- BPEembed(model, embedding)
## Do tokenisation with the sentencepiece model + embed these
txt <- c("De eigendomsoverdracht aan de deelstaten is ingewikkeld.",
"On est d'accord sur le prix de la biere?")
values <- predict(encoder, txt, type = "encode")
str(values)
values
txt <- rownames(values[[1]])
predict(encoder, txt, type = "decode")
txt <- lapply(values, FUN = rownames)
predict(encoder, txt, type = "decode")
Run the code above in your browser using DataLab