# return the full dictionary (excluding special)
lma_dict()
# return the standard 7 category lsm categories
lma_dict(1:7)
# return just a few categories without regular expression
lma_dict(neg, ppron, aux, as.regex = FALSE)
# return special specifically
lma_dict(special)
# returning a function
is.ppron <- lma_dict(ppron, as.function = TRUE)
is.ppron(c("i", "am", "you", "were"))
in.lsmcat <- lma_dict(1:7, as.function = TRUE)
in.lsmcat(c("a", "frog", "for", "me"))
## use as a stopword filter
is.stopword <- lma_dict(as.function = TRUE)
dtm <- lma_dtm("Most of these words might not be all that relevant.")
dtm[, !is.stopword(colnames(dtm))]
## use to replace special characters
clean <- lma_dict(special, as.function = gsub)
clean(c(
"\u201Ccurly quotes\u201D", "na\u00EFve", "typographer\u2019s apostrophe",
"en\u2013dash", "em\u2014dash"
))
Run the code above in your browser using DataLab