dfm(x, ...)## S3 method for class 'character':
dfm(x, verbose = TRUE, toLower = TRUE,
removeNumbers = TRUE, removePunct = TRUE, removeSeparators = TRUE,
removeTwitter = FALSE, stem = FALSE, ignoredFeatures = NULL,
keptFeatures = NULL, matrixType = c("sparse", "dense"),
language = "english", thesaurus = NULL, dictionary = NULL,
valuetype = c("glob", "regex", "fixed"), dictionary_regex = FALSE, ...)
## S3 method for class 'tokenizedTexts':
dfm(x, verbose = TRUE, toLower = TRUE,
stem = FALSE, ignoredFeatures = NULL, keptFeatures = NULL,
matrixType = c("sparse", "dense"), language = "english",
thesaurus = NULL, dictionary = NULL, valuetype = c("glob", "regex",
"fixed"), dictionary_regex = FALSE, ...)
## S3 method for class 'corpus':
dfm(x, verbose = TRUE, groups = NULL, ...)
is.dfm(x)
as.dfm(x)
ngrams
and concatenator
for tokenizing multi-token
sequencesTRUE
FALSE
, preserve #} and \code{@}
characters, see tokenize
TRUE
, stem words}