# visualize term and document weights
## term weights
term_weights <- c("binary", "log", "sqrt", "count", "amplify")
Weighted <- sapply(term_weights, function(w) lma_weight(1:20, w, FALSE))
if (require(splot)) splot(Weighted ~ 1:20, labx = "Raw Count", lines = "co")
## document weights
doc_weights <- c(
"df", "dflog", "dfmax", "dfmlog", "idf", "ridf",
"normal", "dpois", "ppois", "entropy"
)
weight_range <- function(w, value = 1) {
m <- diag(20)
m[upper.tri(m, TRUE)] <- if (is.numeric(value)) {
value
} else {
unlist(lapply(
1:20, function(v) rep(if (value == "inverted") 21 - v else v, v)
))
}
lma_weight(m, w, FALSE, doc.only = TRUE)
}
if (require(splot)) {
category <- rep(c("df", "idf", "normal", "poisson", "entropy"), c(4, 2, 1, 2, 1))
op <- list(
laby = "Relative (Scaled) Weight", labx = "Document Frequency",
leg = "outside", lines = "connected", mv.scale = TRUE, note = FALSE
)
splot(
sapply(doc_weights, weight_range) ~ 1:20,
options = op, title = "Same Term, Varying Document Frequencies",
sud = "All term frequencies are 1.",
colorby = list(category, grade = TRUE)
)
splot(
sapply(doc_weights, weight_range, value = "sequence") ~ 1:20,
options = op, title = "Term as Document Frequencies",
sud = "Non-zero terms are the number of non-zero terms.",
colorby = list(category, grade = TRUE)
)
splot(
sapply(doc_weights, weight_range, value = "inverted") ~ 1:20,
options = op, title = "Term Opposite of Document Frequencies",
sud = "Non-zero terms are the number of zero terms + 1.",
colorby = list(category, grade = TRUE)
)
}
Run the code above in your browser using DataLab