# Load the example text of "Enki and the World Order"
path <- system.file("extdata", "enki_and_the_world_order.txt", package = "sumer")
text <- readLines(path, encoding="UTF-8")
cat(text[1:10],sep="\n")
# Find combinations that appear at least 6 times in the text
freq <- ngram_frequencies(text, min_freq = 6)
freq[1:10,]
# Mark these combinations in the text
text_marked <- mark_ngrams(text, freq)
cat(text_marked[1:10], sep="\n")
# You can enter transliterated text
x <- "kij2-sig unu2 gal d-re-e-ne-ka me-te-ac im-mi-ib-jal2"
mark_ngrams(x, freq)
# Find all occurences of a pattern in the annotated text
term <- "IGI.DIB.TU"
(pattern <- mark_ngrams(term, freq))
result <- text_marked[grepl(pattern, text_marked, fixed=TRUE)]
cat(result, sep="\n")
Run the code above in your browser using DataLab