Count n-grams, either of words, or of characters.
ngrams(.Object, ...)# S4 method for partition
ngrams(.Object, n = 2, pAttribute = "word",
char = NULL, progress = FALSE, ...)
# S4 method for partitionBundle
ngrams(.Object, n = 2, char = NULL,
pAttribute = "word", mc = FALSE, progress = FALSE, ...)
object of class partition
further parameters
number of tokens/characters
the p-attribute to use (can be > 1)
if NULL, tokens will be counted, else characters, keeping only those provided by a character vector
logical
logical, whether to use multicore, passed into call to blapply
(see respective documentation)
# NOT RUN {
use("polmineR")
P <- partition("GERMAPARLMINI", date = "2009-10-27")
ngramObject <- ngrams(P, n = 2, pAttribute = "word", char = NULL)
# a more complex scenario: get most frequent ADJA/NN-combinations
ngramObject <- ngrams(P, n = 2, pAttribute = c("word", "pos"), char = NULL)
ngramObject2 <- subset(
ngramObject,
ngramObject[["1_pos"]] == "ADJA" & ngramObject[["2_pos"]] == "NN"
)
ngramObject2@stat[, "1_pos" := NULL, with = FALSE][, "2_pos" := NULL, with = FALSE]
ngramObject3 <- sort(ngramObject2, by = "count")
head(ngramObject3)
# }
Run the code above in your browser using DataLab