weight(x, ...)## S3 method for class 'dfm':
weight(x, type = c("frequency", "relFreq", "relMaxFreq",
"logFreq", "tfidf"), smoothing = 0, normalize = TRUE, verbose = TRUE,
...)
tf(x)
tfidf(x)
smoother(x, smoothing)
weighting(object)
## S3 method for class 'dfm':
weighting(object)
smoothing=0
for no smoothing.TRUE
(default) then normalize the dfm by relative
term frequency prior to computing tfidfTRUE
output status messagesweighting
returns a character object describing the type of weighting applied to the dfm.
tf
is a shortcut for weight(x, "relFreq")
tfidf
is a shortcut for weight(x, "tfidf")
smoother(x, smoothing)
is a shortcut for weight(x, "frequency", smoothing)
weighting
queries (but cannot set) the weighting applied to the dfm.
dtm <- dfm(inaugCorpus)
x <- apply(dtm, 1, function(tf) tf/max(tf))
topfeatures(dtm)
normDtm <- weight(dtm)
topfeatures(normDtm)
maxTfDtm <- weight(dtm, type="relMaxFreq")
topfeatures(maxTfDtm)
logTfDtm <- weight(dtm, type="logFreq")
topfeatures(logTfDtm)
tfidfDtm <- weight(dtm, type="tfidf")
topfeatures(tfidfDtm)
# combine these methods for more complex weightings, e.g. as in Section 6.4 of
# Introduction to Information Retrieval
logTfDtm <- weight(dtm, type="logFreq")
wfidfDtm <- weight(logTfDtm, type="tfidf", normalize=FALSE)
testdfm <- dfm(inaugTexts[1:5])
head(testdfm)
for (w in c("frequency", "relFreq", "relMaxFreq", "logFreq", "tfidf")) {
testw <- weight(testdfm, w)
cat("\n\n=== weight() TEST for:", w, "; class:", class(testw), "\n")
head(testw)
}
Run the code above in your browser using DataLab