if (FALSE) {
# Load model
model <- model_load("path/to/model.gguf")
# Basic tokenization
tokens <- tokenize(model, "Hello, world!")
print(tokens) # e.g., c(15339, 11, 1917, 0)
# Tokenize without special tokens (for model inputs)
raw_tokens <- tokenize(model, "Continue this text", add_special = FALSE)
# Tokenize multiple texts
batch_tokens <- tokenize(model, c("First text", "Second text"))
# Check tokenization of specific phrases
question_tokens <- tokenize(model, "What is AI?")
print(length(question_tokens)) # Number of tokens
}
Run the code above in your browser using DataLab