# NOT RUN {
data(joboffer)
head(joboffer)
sentences <- unique(joboffer[, c("sentence_id", "sentence")])
cat(sentences$sentence)
terminology <- subset(joboffer, upos %in% c("NOUN", "ADJ"), select = c("sentence_id", "lemma"))
head(terminology)
## Textrank for finding the most relevant sentences
tr <- textrank(data = sentences, terminology = terminology)
summary(tr, n = 2)
summary(tr, n = 5, keep.sentence.order = TRUE)
## Using minhash to reduce sentence combinations - relevant if you have a lot of sentences
library(textreuse)
minhash <- minhash_generator(n = 1000, seed = 123456789)
candidates <- textrank_candidates_lsh(x = terminology$lemma, sentence_id = terminology$sentence_id,
minhashFUN = minhash, bands = 500)
tr <- textrank(data = sentences, terminology = terminology, textrank_candidates = candidates)
summary(tr, n = 2)
## You can also reduce the number of sentence combinations by sampling
tr <- textrank(data = sentences, terminology = terminology, max = 100)
summary(tr, n = 2)
# }
Run the code above in your browser using DataLab