model <- system.file(package = "sentencepiece", "models", "nl-fr-dekamer.model")
model <- sentencepiece_load_model(file = model)
txt <- c("De eigendomsoverdracht aan de deelstaten is ingewikkeld.",
"On est d'accord sur le prix de la biere?")
sentencepiece_encode(model, x = txt, type = "subwords")
sentencepiece_encode(model, x = txt, type = "ids")
## Examples using subword regularisation
model <- system.file(package = "sentencepiece", "models", "nl-fr-dekamer-unigram.model")
model <- sentencepiece_load_model(file = model)
txt <- c("Goed zo",
"On est d'accord")
sentencepiece_encode(model, x = txt, type = "subwords", nbest = 4)
sentencepiece_encode(model, x = txt, type = "ids", nbest = 4)
sentencepiece_encode(model, x = txt, type = "subwords", nbest = 2)
sentencepiece_encode(model, x = txt, type = "ids", nbest = 2)
sentencepiece_encode(model, x = txt, type = "subwords", nbest = 1)
sentencepiece_encode(model, x = txt, type = "ids", nbest = 1)
sentencepiece_encode(model, x = txt, type = "subwords", nbest = 4, alpha = 0.1)
sentencepiece_encode(model, x = txt, type = "ids", nbest = 4, alpha = 0.1)
sentencepiece_encode(model, x = txt, type = "subwords", nbest = -1, alpha = 0.1)
sentencepiece_encode(model, x = txt, type = "ids", nbest = -1, alpha = 0.1)
sentencepiece_encode(model, x = txt, type = "subwords", nbest = -1, alpha = 0)
sentencepiece_encode(model, x = txt, type = "ids", nbest = -1, alpha = 0)
Run the code above in your browser using DataLab