Some visualizations and algorithms require text to be broken into chunks of
ordered words. chunker
breaks text, optionally by grouping
variables, into equal chunks. The chunk size can be specified by giving
number of words to be in each chunk or the number of chunks.
chunker(text.var, grouping.var = NULL, n.words, n.chunks, as.string = TRUE,
rm.unequal = FALSE)
The text variable
The grouping variables. Default NULL
generates
one word list for all text. Also takes a single grouping variable or a list
of 1 or more grouping variables.
An integer specifying the number of words in each chunk (must specify n.chunks or n.words).
An integer specifying the number of chunks (must specify n.chunks or n.words).
logical. If TRUE
the chunks are returned as a single
string. If FALSE
the chunks are returned as a vector of single words.
logical. If TRUE
final chunks that are unequal in
length to the other chunks are removed.
Returns a list of text chunks.
# NOT RUN {
with(DATA, chunker(state, n.chunks = 10))
with(DATA, chunker(state, n.words = 10))
with(DATA, chunker(state, n.chunks = 10, as.string=FALSE))
with(DATA, chunker(state, n.chunks = 10, rm.unequal=TRUE))
with(DATA, chunker(state, person, n.chunks = 10))
with(DATA, chunker(state, list(sex, adult), n.words = 10))
with(DATA, chunker(state, person, n.words = 10, rm.unequal=TRUE))
## Bigger data
with(hamlet, chunker(dialogue, person, n.chunks = 10))
with(hamlet, chunker(dialogue, person, n.words = 300))
# }
# NOT RUN {
## with polarity hedonmetrics
dat <- with(pres_debates2012[pres_debates2012$person %in% qcv(OBAMA, ROMNEY), ],
chunker(dialogue, list(person, time), n.words = 300))
dat2 <- colsplit2df(list2df(dat, "dialogue", "person&time")[, 2:1])
dat3 <- split(dat2[, -2], dat2$time)
ltruncdf(dat3, 10, 50)
poldat <- lapply(dat3, function(x) with(x, polarity(dialogue, person, constrain = TRUE)))
m <- lapply(poldat, function(x) plot(cumulative(x)))
m <- Map(function(w, x, y, z) {
w + ggtitle(x) + xlab(y) + ylab(z)
},
m,
paste("Debate", 1:3),
list(NULL, NULL, "Duration (300 Word Segment)"),
list(NULL, "Cumulative Average Polarity", NULL)
)
library(gridExtra)
do.call(grid.arrange, m)
## By person
## By person
poldat2 <- Map(function(x, x2){
scores <- with(counts(x), split(polarity, person))
setNames(lapply(scores, function(y) {
y <- list(cumulative_average_polarity = y)
attributes(y)[["constrained"]] <- TRUE
qdap:::plot.cumulative_polarity(y) + xlab(NULL) + ylab(x2)
}), names(scores))
}, poldat, paste("Debate", 1:3))
poldat2 <- lapply(poldat2, function(x) {
x[[2]] <- x[[2]] + ylab(NULL)
x
})
poldat2[[1]] <- Map(function(x, y) {
x + ggtitle(y)
},
poldat2[[1]], qcv(Obama, Romney)
)
library(gridExtra)
do.call(grid.arrange, unlist(poldat2, recursive=FALSE))
# }
Run the code above in your browser using DataLab