# With custom dictionary
texts <- c("this is science",
"science is #fascinatingthing",
"this is a scientific approach",
"science is everywhere",
"the beauty of science")
udict <- unigram_dictionary(texts)
unigram_sequence_segmentation('thisisscience', udict)
# With built-in dictionary (English, only lowercase)
unigram_sequence_segmentation('thisisscience')
unigram_sequence_segmentation('thisisscience2024')
unigram_sequence_segmentation('thisisscience2024', simplify=FALSE, omit_zero=FALSE)
Run the code above in your browser using DataLab