# NOT RUN {
mytext <- c(
'do you like it? But I hate really bad dogs',
'I am the best friend.',
"Do you really like it? I'm not a fan",
"It's like a tree."
)
## works on a character vector but not the preferred method avoiding the
## repeated cost of doing sentence boundary disambiguation every time
## `sentiment` is run. For small batches the loss is minimal.
# }
# NOT RUN {
sentiment(mytext)
# }
# NOT RUN {
## preferred method avoiding paying the cost
mytext <- get_sentences(mytext)
sentiment(mytext)
sentiment(mytext, question.weight = 0)
sam_dat <- get_sentences(gsub("Sam-I-am", "Sam I am", sam_i_am))
(sam <- sentiment(sam_dat))
plot(sam)
plot(sam, scale_range = TRUE, low_pass_size = 5)
plot(sam, scale_range = TRUE, low_pass_size = 10)
# }
# NOT RUN {
## legacy transform functions from suuzhet
plot(sam, transformation.function = syuzhet::get_transformed_values)
plot(sam, transformation.function = syuzhet::get_transformed_values,
scale_range = TRUE, low_pass_size = 5)
# }
# NOT RUN {
y <- get_sentences(
"He was not the sort of man that one would describe as especially handsome."
)
sentiment(y)
sentiment(y, n.before=Inf)
# }
# NOT RUN {
## Categorize the polarity (tidyverse vs. data.table):
library(dplyr)
sentiment(mytext) %>%
as_tibble() %>%
mutate(category = case_when(
sentiment < 0 ~ 'Negative',
sentiment == 0 ~ 'Neutral',
sentiment > 0 ~ 'Positive'
) %>%
factor(levels = c('Negative', 'Neutral', 'Positive'))
)
library(data.table)
dt <- sentiment(mytext)[, category := factor(fcase(
sentiment < 0, 'Negative',
sentiment == 0, 'Neutral',
sentiment > 0, 'Positive'
), levels = c('Negative', 'Neutral', 'Positive'))][]
dt
# }
# NOT RUN {
dat <- data.frame(
w = c('Person 1', 'Person 2'),
x = c(paste0(
"Mr. Brown is nasty! He says hello. i give him rage. i will ",
"go at 5 p. m. eastern time. Angry thought in between!go there"
), "One more thought for the road! I am going now. Good day and good riddance."),
y = state.name[c(32, 38)],
z = c(.456, .124),
stringsAsFactors = FALSE
)
sentiment(get_sentences(dat$x))
sentiment(get_sentences(dat))
# }
# NOT RUN {
## tidy approach
library(dplyr)
library(magrittr)
hu_liu_cannon_reviews %>%
mutate(review_split = get_sentences(text)) %$%
sentiment(review_split)
# }
# NOT RUN {
## Emojis
# }
# NOT RUN {
## Load R twitter data
x <- read.delim(system.file("docs/r_tweets.txt", package = "textclean"),
stringsAsFactors = FALSE)
x
library(dplyr); library(magrittr)
## There are 2 approaches
## Approach 1: Replace with words
x %>%
mutate(Tweet = replace_emoji(Tweet)) %$%
sentiment(Tweet)
## Approach 2: Replace with identifier token
combined_emoji <- update_polarity_table(
lexicon::hash_sentiment_jockers_rinker,
x = lexicon::hash_sentiment_emojis
)
x %>%
mutate(Tweet = replace_emoji_identifier(Tweet)) %$%
sentiment(Tweet, polarity_dt = combined_emoji)
## Use With Non-ASCII
## Warning: sentimentr has not been tested with languages other than English.
## The example below is how one might use sentimentr if you believe the
## language you are working with are similar enough in grammar to for
## sentimentr to be viable (likely Germanic languages)
## english_sents <- c(
## "I hate bad people.",
## "I like yummy cookie.",
## "I don't love you anymore; sorry."
## )
## Roughly equivalent to the above English
danish_sents <- stringi::stri_unescape_unicode(c(
"Jeg hader d\\u00e5rlige mennesker.",
"Jeg kan godt lide l\\u00e6kker is.",
"Jeg elsker dig ikke mere; undskyld."
))
danish_sents
## Polarity terms
polterms <- stringi::stri_unescape_unicode(
c('hader', 'd\\u00e5rlige', 'undskyld', 'l\\u00e6kker', 'kan godt', 'elsker')
)
## Make polarity_dt
danish_polarity <- as_key(data.frame(
x = stringi::stri_unescape_unicode(polterms),
y = c(-1, -1, -1, 1, 1, 1)
))
## Make valence_shifters_dt
danish_valence_shifters <- as_key(
data.frame(x='ikke', y="1"),
sentiment = FALSE,
comparison = NULL
)
sentiment(
danish_sents,
polarity_dt = danish_polarity,
valence_shifters_dt = danish_valence_shifters,
retention_regex = "\\d:\\d|\\d\\s|[^\\p{L}',;: ]"
)
## A way to test if you need [:alpha:] vs \p{L} in `retention_regex`:
## 1. Does it wreck some of the non-ascii characters by default?
sentimentr:::make_sentence_df2(danish_sents)
## 2.Does this?
sentimentr:::make_sentence_df2(danish_sents, "\\d:\\d|\\d\\s|[^\\p{L}',;: ]")
## If you answer yes to #1 but no to #2 you likely want \p{L}
# }
Run the code above in your browser using DataLab