textshape (version 1.6.0)

tidy_dtm: Convert a DocumentTermMatrix/TermDocumentMatrix into Tidy Form

Description

Converts non-zero elements of a DocumentTermMatrix/TermDocumentMatrix into a tidy data set.

Usage

tidy_dtm(x, ...)

tidy_tdm(x, ...)

Arguments

ignored.

Value

Returns a tidied data.frame.

Examples

Run this code
# NOT RUN {
data(simple_dtm)

tidy_dtm(simple_dtm)

# }
# NOT RUN {
if (!require("pacman")) install.packages("pacman")
pacman::p_load_current_gh('trinker/gofastr')
pacman::p_load(tidyverse, magrittr, ggstance)

my_dtm <- with(
    presidential_debates_2012, 
    q_dtm(dialogue, paste(time, tot, sep = "_"))
)

tidy_dtm(my_dtm) %>%
    tidyr::extract(
        col = doc, 
        into = c("time", "turn", "sentence"), 
        regex = "(\\d)_(\\d+)\\.(\\d+)"
    ) %>%
    mutate(
        time = as.numeric(time),
        turn = as.numeric(turn),
        sentence = as.numeric(sentence)
    ) %>%
    tbl_df() %T>%
    print() %>%
    group_by(time, term) %>%
    summarize(n = sum(n)) %>%
    group_by(time) %>%
    arrange(desc(n)) %>%
    slice(1:10) %>%
    ungroup() %>%
    mutate(
        term = factor(paste(term, time, sep = "__"),
            levels = rev(paste(term, time, sep = "__")))
    ) %>%
    ggplot(aes(x = n, y = term)) +
        geom_barh(stat='identity') +
        facet_wrap(~time, ncol=2, scales = 'free_y') +
        scale_y_discrete(labels = function(x) gsub("__.+$", "", x))
# }

Run the code above in your browser using DataCamp Workspace