# \dontshow{
if(require(Rgraphviz) && require(udpipe) && require(graph))
{
# }
## Construct document/frequency/matrix
library(graph)
library(Rgraphviz)
library(udpipe)
data(brussels_reviews_anno, package = 'udpipe')
exclude <- c(32337682L, 27210436L, 26820445L, 37658826L, 33661134L, 48756422L,
23454554L, 30461127L, 23292176L, 32850277L, 30566303L, 21595142L,
20441279L, 38097066L, 28651065L, 29011387L, 37316020L, 22135291L,
40169379L, 38627667L, 29470172L, 24071827L, 40478869L, 36825304L,
21597085L, 21427658L, 7890178L, 32322472L, 39874379L, 32581310L,
43865675L, 31586937L, 32454912L, 34861703L, 31403168L, 35997324L,
29002317L, 33546304L, 47677695L)
dtm <- brussels_reviews_anno
dtm <- subset(dtm, !doc_id %in% exclude)
dtm <- subset(dtm, xpos %in% c("NN") & language == "nl" & !is.na(lemma))
dtm <- document_term_frequencies(dtm, document = "doc_id", term = "lemma")
dtm <- document_term_matrix(dtm)
dtm <- dtm_remove_lowfreq(dtm, minfreq = 5)
dtm <- dtm_remove_tfidf(dtm, top = 500)
## Plot top 20 correlations, having at least a correlation of 0.01
textplot_correlation_lines(dtm, top_n = 25, threshold = 0.01)
## Plot top 20 correlations
textplot_correlation_lines(dtm, top_n = 25, label = TRUE, lwd = 5)
## Plot top 20 correlations and highlight some terms
textplot_correlation_lines(dtm, top_n = 25, label = TRUE, lwd = 5,
terms_highlight = c("prijs", "privacy"),
main = "Top correlations in topic xyz")
## Plot top 20 correlations and highlight + increase some terms
textplot_correlation_lines(dtm, top_n = 25, label = TRUE, lwd=5,
terms_highlight = c(prijs = 0.8, privacy = 0.1),
col.highlight = "red")
## Plot correlations between specific terms
w <- dtm_colsums(dtm)
w <- head(sort(w, decreasing = TRUE), 100)
textplot_correlation_lines(dtm, terms = names(w), top_n = 20, label = TRUE)
attrs <- textplot_correlation_lines_attrs()
attrs$node$shape <- "rectangle"
attrs$edge$color <- "steelblue"
textplot_correlation_lines(dtm, top_n = 20, label = TRUE,
attrs = attrs)
# \dontshow{
}
# End of main if statement running only if the required packages are installed
# }
Run the code above in your browser using DataLab