dtm <- dfm(inaugCorpus)
x <- apply(dtm, 1, function(tf) tf/max(tf))
topfeatures(dtm)
normDtm <- weight(dtm, "relFreq")
topfeatures(normDtm)
maxTfDtm <- weight(dtm, type="relMaxFreq")
topfeatures(maxTfDtm)
logTfDtm <- weight(dtm, type="logFreq")
topfeatures(logTfDtm)
tfidfDtm <- weight(dtm, type="tfidf")
topfeatures(tfidfDtm)
# combine these methods for more complex weightings, e.g. as in Section 6.4 of
# Introduction to Information Retrieval
head(logTfDtm <- weight(dtm, type="logFreq"))
head(tfidf(logTfDtm, normalize = FALSE))
testdfm <- dfm(inaugTexts[1:5], verbose = FALSE)
for (w in c("frequency", "relFreq", "relMaxFreq", "logFreq", "tfidf")) {
testw <- weight(testdfm, w)
cat("\n\n=== weight() TEST for:", w, "; class:", class(testw), "\n")
head(testw)
}
Run the code above in your browser using DataLab