# example text
text <- c(
paste(
"Oh, what youth was! What I had and gave away.",
"What I took and spent and saw. What I lost. And now? Ruin."
),
paste(
"God, are you so bored?! You just want what's gone from us all?",
"I miss the you that was too. I love that you."
),
paste(
"Tomorrow! Tomorrow--nay, even tonight--you wait, as I am about to change.",
"Soon I will off to revert. Please wait."
)
)
# make a document-term matrix with pre-specified terms only
lma_patcat(text, c("bored?!", "i lo", ". "), return.dtm = TRUE)
# get counts of sets of letter
lma_patcat(text, list(c("a", "b", "c"), c("d", "e", "f")))
# same thing with regular expressions
lma_patcat(text, list("[abc]", "[def]"), fixed = FALSE)
# match only words
lma_patcat(text, list("i"), boundary = TRUE)
# match only words, ignoring punctuation
lma_patcat(
text, c("you", "tomorrow", "was"),
fixed = FALSE,
boundary = "\\b", return.dtm = TRUE
)
if (FALSE) {
# read in the temporal orientation lexicon from the World Well-Being Project
tempori <- read.csv(paste0(
"https://raw.githubusercontent.com/wwbp/lexica/master/",
"temporal_orientation/temporal_orientation_lexicon.csv"
))
lma_patcat(text, tempori)
# or use the standardized version
tempori_std <- read.dic("wwbp_prospection", dir = "~/Dictionaries")
lma_patcat(text, tempori_std)
## get scores on the same scale by adjusting the standardized values
tempori_std[, -1] <- tempori_std[, -1] / 100 *
select.dict("wwbp_prospection")$selected[, "original_max"]
lma_patcat(text, tempori_std)[, unique(tempori$category)]
}
Run the code above in your browser using DataLab