weight(x, ...)## S3 method for class 'dfm':
weight(x, type = c("frequency", "relFreq", "relMaxFreq",
"logFreq", "tfidf"), smooth = 0, normalize = TRUE, verbose = TRUE, ...)
tf(x)
tfidf(x)
smoother(x, smooth)
weighting(object)
## S3 method for class 'dfm':
weighting(object)
smooth=0
for no smoothing.TRUE
(default) then normalize the dfm by relative
term frequency prior to computing tfidfTRUE
output status messagesweighting
returns a character object describing the type of weighting applied to the dfm.
tf
is a shortcut for weight(x, "relFreq")
tfidf
is a shortcut for weight(x, "tfidf")
smoother
is a shortcut for weight(x, "frequency", smooth)
weighting
queries (but cannot set) the weighting applied to the dfm.
dtm <- dfm(subset(inaugCorpus, Year>1980), verbose=FALSE)
x <- apply(dtm, 1, function(tf) tf/max(tf))
topfeatures(dtm)
normDtm <- weight(dtm)
topfeatures(normDtm)
maxTfDtm <- weight(dtm, type="relMaxFreq")
topfeatures(maxTfDtm)
logTfDtm <- weight(dtm, type="logFreq")
topfeatures(logTfDtm)
tfidfDtm <- weight(dtm, type="tfidf")
topfeatures(tfidfDtm)
# combine these methods for more complex weightings, e.g. as in Section 6.4 of
# Introduction to Information Retrieval
logTfDtm <- weight(dtm, type="logFreq")
wfidfDtm <- weight(logTfDtm, type="tfidf", normalize=FALSE)
testdfm <- dfm(inaugTexts[1:5], verbose=FALSE)
print(testdfm[, 1:5])
for (w in c("frequency", "relFreq", "relMaxFreq", "logFreq", "tfidf")) {
testw <- weight(testdfm, w)
cat("\nweight test for:", w, "; class:", class(testw), "\n")
print(testw[, 1:5])
}
Run the code above in your browser using DataLab