if (FALSE) {
kco <- KorAPConnection()
# Get statistics for entire corpus (returns S4 object)
stats <- corpusStats(kco)
stats@tokens # Access number of tokens
# Get statistics for newspaper texts from 2017 (as data frame)
df <- corpusStats(kco, "pubDate in 2017 & textType=/Zeitung.*/", as.df = TRUE)
df$documents # Access number of documents
# Compare corpus sizes across years
years <- 2015:2020
sizes <- sapply(years, function(y) {
corpusStats(kco, paste("pubDate in", y))@tokens
})
}
Run the code above in your browser using DataLab