word_count: Word Counts

Description

word_count - Transcript apply word counts.

character_count - Transcript apply character counts.

character_table - Computes a table of character counts by grouping . variable(s).

Usage

word_count(
  text.var,
  byrow = TRUE,
  missing = NA,
  digit.remove = TRUE,
  names = FALSE
)
wc(text.var, byrow = TRUE, missing = NA, digit.remove = TRUE, names = FALSE)
character_count(
  text.var,
  byrow = TRUE,
  missing = NA,
  apostrophe.remove = TRUE,
  digit.remove = TRUE,
  count.space = FALSE
)
character_table(
  text.var,
  grouping.var = NULL,
  percent = TRUE,
  prop.by.row = TRUE,
  zero.replace = 0,
  digits = 2,
  ...
)
char_table(
  text.var,
  grouping.var = NULL,
  percent = TRUE,
  prop.by.row = TRUE,
  zero.replace = 0,
  digits = 2,
  ...
)

Value

word_count - returns a word count by row or total.

character_count - returns a character count by row or total.

character_table - returns a list: dataframe of character counts by grouping variable.

raw: Dataframe of the frequency of characters by grouping variable.
prop: Dataframe of the proportion of characters by grouping variable.
rnp: Dataframe of the frequency and proportions of characters by grouping variable.
percent: The value of percent used for plotting purposes.
zero.replace: The value of zero.replace used for plotting purposes.

Arguments

text.var: The text variable
byrow: logical. If TRUE counts by row, if FALSE counts all words.
missing: Value to insert for missing values (empty cells).
digit.remove: logical. If TRUE removes digits before counting words.
names: logical. If TRUE the sentences are given as the names of the counts.
apostrophe.remove: logical. If TRUE apostrophes will be counted in the character count.
count.space: logical. If TRUE spaces are counted as characters.
grouping.var: The grouping variables. Default NULL generates one word list for all text. Also takes a single grouping variable or a list of 1 or more grouping variables.
percent: logical. If TRUE output given as percent. If FALSE the output is proportion.
prop.by.row: logical. If TRUE applies proportional to the row. If FALSE applies by column.
zero.replace: Value to replace 0 values with.
digits: Integer; number of decimal places to round when printing.
...: Other arguments passed to prop.

Examples

Run this code

if (FALSE) {
## WORD COUNT
word_count(DATA$state)
wc(DATA$state)
word_count(DATA$state, names = TRUE)
word_count(DATA$state, byrow=FALSE, names = TRUE)
sum(word_count(DATA$state))

sapply(split(raj$dialogue, raj$person), wc, FALSE) %>%
    sort(decreasing=TRUE) %>% 
    list2df("wordcount", "person") %>%
    `[`(, 2:1)

## PLOT WORD COUNTS
raj2 <- raj
raj2$scaled <- unlist(tapply(wc(raj$dialogue), raj2$act, scale))
raj2$scaled2 <- unlist(tapply(wc(raj$dialogue), raj2$act, scale, scale = FALSE))
raj2$ID <- factor(unlist(tapply(raj2$act, raj2$act, seq_along)))

ggplot(raj2, aes(x = ID, y = scaled, fill =person)) +
    geom_bar(stat="identity") +
    facet_grid(act~.) + 
    ylab("Scaled") + xlab("Turn of Talk") +
    guides(fill = guide_legend(nrow = 5, byrow = TRUE)) +
    theme(legend.position="bottom") +
    ggtitle("Scaled and Centered")


ggplot(raj2, aes(x = ID, y = scaled2, fill =person)) +
    geom_bar(stat="identity") +
    facet_grid(act~.) + 
    ylab("Scaled") + xlab("Turn of Talk") +
    guides(fill = guide_legend(nrow = 5, byrow = TRUE)) +
    theme(legend.position="bottom") +
    ggtitle("Mean Difference")
  
    
raj$wc <- wc(raj$dialogue)
raj$cum.wc <- unlist(with(raj, tapply(wc, act, cumsum)))
raj$turn <- unlist(with(raj, tapply(act, act, seq_along)))
ggplot(raj, aes(y=cum.wc, x=turn)) + 
    geom_step(direction = "hv") + 
    facet_wrap(~act)
        
## CHARACTER COUNTS
character_count(DATA$state)
character_count(DATA$state, byrow=FALSE)
sum(character_count(DATA$state))

## CHARACTER TABLE
x <- character_table(DATA$state, DATA$person)
plot(x)
plot(x, label = TRUE)
plot(x, label = TRUE, text.color = "red")
plot(x, label = TRUE, lab.digits = 1, zero.replace = "PP7")

scores(x)
counts(x)
proportions(x)

plot(scores(x))
plot(counts(x))
plot(proportions(x))

## combine columns
colcomb2class(x, list(vowels = c("a", "e", "i", "o", "u")))

## char_table(DATA$state, DATA$person)
## char_table(DATA$state, DATA$person, percent = TRUE)
## character_table(DATA$state, list(DATA$sex, DATA$adult))

library(ggplot2);library(reshape2)
dat <- character_table(DATA$state, list(DATA$sex, DATA$adult))
dat2 <- colsplit2df(melt(counts(dat)), keep.orig = TRUE)
head(dat2, 15)

ggplot(data = dat2, aes(y = variable, x = value, colour=sex)) +
    facet_grid(adult~.) +
    geom_line(size=1, aes(group =variable), colour = "black") +
    geom_point()

ggplot(data = dat2, aes(x = variable, y = value)) +
    geom_bar(aes(fill = variable), stat = "identity") +
    facet_grid(sex ~ adult, margins = TRUE) +
    theme(legend.position="none")
}

Run the code above in your browser using DataLab

Description

Usage

Value

Arguments

See Also

Examples