# With groups
df <- data.frame(
category = rep(c("A","B"), each = 6),
doc_id = rep(c("d1","d2","d3"), times = 4),
word = c("apple","banana","apple","banana","cherry","apple",
"dog","cat","dog","mouse","cat","dog"),
stringsAsFactors = FALSE
)
result <- bind_tf_idf_dt(df, "category", "doc_id", "word")
result
# Without groups
df %>%
filter_dt(category == "A") %>%
bind_tf_idf_dt(doc_col = "doc_id",term_col = "word")
# With counts provided
df %>%
filter_dt(category == "A") %>%
count_dt() %>%
bind_tf_idf_dt(doc_col = "doc_id",term_col = "word",n_col = "n")
df %>%
count_dt() %>%
bind_tf_idf_dt(group_col = "category",
doc_col = "doc_id",
term_col = "word",n_col = "n")
Run the code above in your browser using DataLab