# NOT RUN {
bw <- sample(lexicon::profanity_alvarez, 4)
mytext <- c(
sprintf('do you like this %s? It is %s. But I hate really bad dogs', bw[1], bw[2]),
'I am the best friend.',
NA,
sprintf('I %s hate this %s', bw[3], bw[4]),
"Do you really like it? I'm not happy"
)
## works on a character vector but not the preferred method avoiding the
## repeated cost of doing sentence boundary disambiguation every time
## `profanity` is run
profanity(mytext)
profanity_by(mytext)
## preferred method avoiding paying the cost
mytext <- get_sentences(mytext)
profanity_by(mytext)
get_sentences(profanity_by(mytext))
(myprofanity <- profanity_by(mytext))
stats::setNames(get_sentences(profanity_by(mytext)),
round(myprofanity[["ave_profanity"]], 3))
brady <- get_sentences(crowdflower_deflategate)
library(data.table)
bp <- profanity_by(brady)
crowdflower_deflategate[bp[ave_profanity > 0,]$element_id, ]
vulgars <- bp[["ave_profanity"]] > 0
stats::setNames(get_sentences(bp)[vulgars],
round(bp[["ave_profanity"]][vulgars], 3))
bt <- data.table(crowdflower_deflategate)[,
source := ifelse(grepl('^RT', text), 'retweet', 'OP')][,
belichick := grepl('\\bb[A-Za-z]+l[A-Za-z]*ch', text, ignore.case = TRUE)][]
prof_bel <- with(bt, profanity_by(text, by = list(source, belichick)))
plot(prof_bel)
# }
Run the code above in your browser using DataLab