embedding <- system.file(package = "sentencepiece", "models",
"nl.wiki.bpe.vs1000.d25.w2v.bin")
model <- system.file(package = "sentencepiece", "models",
"nl.wiki.bpe.vs1000.model")
encoder <- BPEembed(model, embedding)
txt <- c("De eigendomsoverdracht aan de deelstaten is ingewikkeld.",
"On est d'accord sur le prix de la biere?")
values <- predict(encoder, txt, type = "encode")
str(values)
values
txt <- rownames(values[[1]])
predict(encoder, txt, type = "decode")
txt <- lapply(values, FUN = rownames)
predict(encoder, txt, type = "decode")
txt <- c("De eigendomsoverdracht aan de deelstaten is ingewikkeld.",
"On est d'accord sur le prix de la biere?")
predict(encoder, txt, type = "tokenize", "subwords")
predict(encoder, txt, type = "tokenize", "ids")
Run the code above in your browser using DataLab