word_position: Word Position

Description

Find counts of the positioning of words within a sentence.

Usage

word_position(text.var, match.terms, digits = 2, percent = TRUE,
  zero.replace = 0, ...)

Arguments

text.var

The text variable.

match.terms

A character vector of quoted terms to find the positions of.

digits

Integer; number of decimal places to round when printing.

percent

logical. If TRUE output given as percent. If FALSE the output is proportion.

zero.replace

Value to replace 0 values with.

…

Currently ignored.

Value

Returns a list, of class "word_position", of data frames and information regarding word positions:

raw

raw word position counts in long format (may be more useful for plotting)

count

integer word position counts

prop

proportional word position counts; proportional to each total word uses

rnp

a character combination data frame of count and proportional

zero_replace

value to replace zeros with; mostly internal use

percent

The value of percent used for plotting purposes.

digits

integer value of number of digits to display; mostly internal use

Examples

Run this code

# NOT RUN {
position <- with(DATA, word_position(sent_detect(state), Top25Words))
position
lview(position)
plot(position)
scores(position)
preprocessed(position)
counts(position)
proportions(position)
plot(proportions(position))

stopwords <- unique(c(contractions[[1]], Top200Words))
topwords <- freq_terms(pres_debates2012[["dialogue"]], top = 40, 
    at.least = 4, stopwords = stopwords)[[1]]
word_position(pres_debates2012[["dialogue"]], topwords)
plot(word_position(pres_debates2012[["dialogue"]], topwords), FALSE)
plot(word_position(pres_debates2012[["dialogue"]], topwords), TRUE, scale=FALSE)

wordlist <- c("tax", "health", "rich", "america", "truth", "money", "cost", 
    "governnor", "president", "we", "job", "i", "you", "because", 
    "our", "years")

word_position(pres_debates2012[["dialogue"]], wordlist)

## BY VARIABLES
library(gridExtra)
pres_deb_by_time <- with(pres_debates2012, split(dialogue, time))
out1 <-lapply(pres_deb_by_time, word_position, wordlist)
do.call("grid.arrange", c(lapply(out1, plot), ncol=1))

pres_deb_by_person <- with(pres_debates2012, split(dialogue, person))
out2 <-lapply(pres_deb_by_person, word_position, wordlist)
plots <- lapply(names(out2), function(x) plot(out2[[x]], scale=FALSE) + 
    ggtitle(x))
do.call("grid.arrange", c(plots, ncol=2))

## As a histogram
## theme taken from: http://jonlefcheck.net/2013/03/11/black-theme-for-ggplot2-2/
theme_black <- function(base_size=12,base_family="") {
  theme_grey(base_size=base_size,base_family=base_family) %+replace%
    theme(
      # Specify axis options
      axis.line=element_blank(), 
      axis.text.x=element_text(size=base_size*0.8,color="grey55",
                               lineheight=0.9,vjust=1), 
      axis.text.y=element_text(size=base_size*0.8,color="grey55",
                               lineheight=0.9,hjust=1), 
      axis.ticks=element_line(color="grey55",size = 0.2), 
      axis.title.x=element_text(size=base_size,color="grey55",vjust=1), 
      axis.title.y=element_text(size=base_size,color="grey55",angle=90,
                                vjust=0.5), 
      axis.ticks.length=unit(0.3,"lines"), 
      axis.ticks.margin=unit(0.5,"lines"),
      # Specify legend options
      legend.background=element_rect(color=NA,fill="black"), 
      legend.key=element_rect(color="grey55", fill="black"), 
      legend.key.size=unit(1.2,"lines"), 
      legend.key.height=NULL, 
      legend.key.width=NULL,     
      legend.text=element_text(size=base_size*0.8,color="grey55"), 
      legend.title=element_text(size=base_size*0.8,face="bold",hjust=0,
                                color="grey55"), 
      legend.position="right", 
      legend.text.align=NULL, 
      legend.title.align=NULL, 
      legend.direction="vertical", 
      legend.box=NULL,
      # Specify panel options
      panel.background=element_rect(fill="black",color = NA), 
      panel.border=element_rect(fill=NA,color="grey55"), 
      panel.grid.major=element_blank(), 
      panel.grid.minor=element_blank(), 
      panel.spacing=unit(0.25,"lines"),  
      # Specify facetting options
      strip.background=element_rect(fill="grey30",color="grey10"), 
      strip.text.x=element_text(size=base_size*0.8,color="grey55"), 
      strip.text.y=element_text(size=base_size*0.8,color="grey55",
                                angle=-90), 
      # Specify plot options
      plot.background=element_rect(color="black",fill="black"), 
      plot.title=element_text(size=base_size*1.2,color="grey55"), 
      plot.margin=unit(c(1,1,0.5,0.5),"lines")
    )
}

out3 <- list_df2df(lapply(out2[1:2], preprocessed), "Person")
out3 %>% ggplot(aes(x=position)) + 
    geom_histogram(binwidth = 1, fill="white") +
    facet_grid(Person~word) +
    theme_black() + ylab("Count") + xlab("Position")

## MOVE TO THE MICRO THROUGH QUALITATIVE ANALYSIS
locs <- unlist(setNames(lapply(wordlist, function(x){
     sapply(c("ROMNEY", "OBAMA"), function(y){
         which(pres_debates2012[["person"]] ==y & grepl(x, pres_debates2012[["dialogue"]]))
     })
}), wordlist), recursive=FALSE)

fdl <- reports::folder(pres_context)
Map(function(x, y){
    if (identical(integer(0), x)) return(NULL)
    z <- with(pres_debates2012, trans_context(dialogue, person, inds=x, n.before=1))
    z[["text"]] <- gsub(beg2char(y, "."), 
        paste0("[[", beg2char(y, "."), "]]"), z[["text"]])
    print(z, file=file.path(fdl, sprintf("%s.doc", y)))
}, locs, names(locs))
# }

Run the code above in your browser using DataLab