# NOT RUN {
bw <- sample(lexicon::profanity_alvarez, 4)
mytext <- c(
sprintf('do you like this %s? It is %s. But I hate really bad dogs', bw[1], bw[2]),
'I am the best friend.',
NA,
sprintf('I %s hate this %s', bw[3], bw[4]),
"Do you really like it? I'm not happy"
)
## works on a character vector but not the preferred method avoiding the
## repeated cost of doing sentence boundary disambiguation every time
## `profanity` is run
profanity(mytext)
## preferred method avoiding paying the cost
mytext2 <- get_sentences(mytext)
profanity(mytext2)
plot(profanity(mytext2))
brady <- get_sentences(crowdflower_deflategate)
brady_swears <- profanity(brady)
brady_swears
hist(brady_swears$profanity)
sum(brady_swears$profanity > 0)
hist(brady_swears$profanity[brady_swears$profanity > 0])
# }
Run the code above in your browser using DataCamp Workspace