# NOT RUN {
bw <- sample(unique(tolower(lexicon::profanity_alvarez)), 4)
mytext <- c(
sprintf('do you like this %s? It is %s. But I hate really bad dogs', bw[1], bw[2]),
'I am the best friend.',
NA,
sprintf('I %s hate this %s', bw[3], bw[4]),
"Do you really like it? I'm not happy"
)
## works on a character vector but not the preferred method avoiding the
## repeated cost of doing sentence boundary disambiguation every time
## `profanity` is run
profanity(mytext)
## preferred method avoiding paying the cost
mytext2 <- get_sentences(mytext)
profanity(mytext2)
plot(profanity(mytext2))
brady <- get_sentences(crowdflower_deflategate)
brady_swears <- profanity(brady)
brady_swears
## Distribution of profanity proportion for all comments
hist(brady_swears$profanity)
sum(brady_swears$profanity > 0)
## Distribution of proportions for those profane comments
hist(brady_swears$profanity[brady_swears$profanity > 0])
combo <- combine_data()
combo_sentences <- get_sentences(crowdflower_deflategate)
racist <- profanity(combo_sentences, profanity_list = lexicon::profanity_racist)
combo_sentences[racist$profanity > 0, ]$text
extract_profanity_terms(
combo_sentences[racist$profanity > 0, ]$text,
profanity_list = lexicon::profanity_racist
)
## Remove jerry, que, and illegal from the list
library(textclean)
racist2 <- profanity(
combo_sentences,
profanity_list = textclean::drop_element_fixed(
lexicon::profanity_racist,
c('jerry', 'illegal', 'que')
)
)
combo_sentences[racist2$profanity > 0, ]$text
# }
Run the code above in your browser using DataLab