# NOT RUN {
use("polmineR") # activate demo corpora included in the package
# Core methods applied to corpus
count("REUTERS", query = "oil")
count("REUTERS", query = c("oil", "barrel"))
count("REUTERS", query = '"Saudi" "Arab.*"', breakdown = TRUE, cqp = TRUE)
dispersion("REUTERS", query = "oil", s_attribute = "id")
kwic("REUTERS", query = "oil")
cooccurrences("REUTERS", query = "oil")
# Core methods applied to partition
kuwait <- partition("REUTERS", places = "kuwait", regex = TRUE)
count(kuwait, query = "oil")
dispersion(kuwait, query = "oil", s_attribute = "id")
kwic(kuwait, query = "oil", meta = "id")
cooccurrences(kuwait, query = "oil")
# Go back to full text
p <- partition("REUTERS", id = 127)
read(p)
h <- html(p)
h_highlighted <- highlight(h, highlight = list(yellow = "oil"))
h_highlighted
# Generate term document matrix
pb <- partition_bundle("REUTERS", s_attribute = "id")
cnt <- count(pb, p_attribute = "word")
tdm <- as.TermDocumentMatrix(cnt, col = "count")
# }
Run the code above in your browser using DataLab