library(flashlighttext)
lm <- ZeroLM$new()
lexicon <- list(
"hello" = list(c("h", "e", "l", "l", "o", "|")),
"world" = list(c("w", "o", "r", "l", "d", "|")),
"" = list()
)
word_dict <- create_word_dict(lexicon)
token_dict <- Dictionary$new(c("h", "e", "l", "o", "|", "w", "r", "d", ""))
token_dict$add_entry("<1>")
trie <- build_trie(
lm = lm,
token_dict = token_dict,
lexicon = lexicon,
word_dict = word_dict,
separator_idx = token_dict$get_index("|")
)
Run the code above in your browser using DataLab