Learn R Programming

textplot (version 0.2.2)

plot.BTM: Plot function for a BTM object

Description

Plot biterms as a clustered graph. The graph is constructed by assigning each word to a topic and within a topic of words biterm frequencies are shown.

Usage

# S3 method for BTM
plot(
  x,
  biterms = terms(x, type = "biterms")$biterms,
  top_n = 7,
  which,
  labels = seq_len(x$K),
  title = "Biterm topic model",
  subtitle = list(),
  ...
)

Value

an object of class ggplot

Arguments

x

an object of class BTM with a biterm topic model

biterms

a data.frame with columns term1, term2, topic with all biterms and the topic these were assigned to. Defaults to the biterms used to construct the model.

top_n

integer indicating to limit to displaying the top_n terms for each topic. Defaults to 7.

which

integer vector indicating to display only these topics. See the examples.

labels

a character vector of names. Should be of the same length as the number of topics in the data.

title

character string with the title to use in the plot

subtitle

character string with the subtitle to use in the plot

...

not used

See Also

BTM, textplot_bitermclusters.default

Examples

Run this code
# \dontshow{
if(require(igraph) && require(BTM) && require(ggraph) &&
   require(ggforce) && require(concaveman) &&
   require(data.table) && require(udpipe))
{
# }
library(igraph)
library(BTM)
library(ggraph)
library(ggforce)
library(concaveman)
data(example_btm, package = 'textplot')

model <- example_btm
# \donttest{
plot(model, title = "BTM model", top_n = 3)
plot(model, title = "BTM model", top_n = 3, labels = 1:model$K)
plot(model, title = "BTM model", which = 7:15)
plot(model, title = "BTM model", subtitle = "First 5 topics",
     which = 1:5, top_n = 10)
plot(model, title = "Biterm topic model", subtitle = "First 8 topics",
     which = 1:8, top_n = 7)
# }

topiclabels <- c("Garbage",
  "Data Mining", "Gradient descent", "API's",
  "Random Forests", "Stat models", "Text Mining / NLP",
  "GLM / GAM / Bayesian", "Machine learning", "Variable selection",
  "Regularisation techniques", "Optimisation", "Fuzzy logic",
  "Classification/Regression trees", "Text frequencies",
  "Neural / Deep learning", "Variable selection",
  "Text file handling", "Text matching", "Topic modelling")
plot(model, title = "Biterm topic model", subtitle = "some topics",
     top_n = 7,
     which = c(3, 4, 5, 6, 7, 9, 12, 16, 20),
     labels = topiclabels)

# \donttest{
library(BTM)
library(data.table)
library(udpipe)
## Annotate text with parts of speech tags
data("brussels_reviews", package = "udpipe")
anno <- subset(brussels_reviews, language %in% "nl")
anno <- data.frame(doc_id = anno$id, text = anno$feedback, stringsAsFactors = FALSE)
anno <- udpipe(anno, "dutch", trace = 10)
## Get cooccurrences of nouns / adjectives and proper nouns
biterms <- as.data.table(anno)
biterms <- biterms[, cooccurrence(x = lemma,
                                  relevant = upos %in% c("NOUN", "PROPN", "ADJ"),
                                  skipgram = 2),
                     by = list(doc_id)]
## Build the BTM model
set.seed(123456)
x <- subset(anno, upos %in% c("NOUN", "PROPN", "ADJ"))
x <- x[, c("doc_id", "lemma")]
model <- BTM(x, k = 5, beta = 0.01, iter = 2000, background = TRUE,
             biterms = biterms, trace = 100)
plot(model)
# }

# \dontshow{
}
# End of main if statement running only if the required packages are installed
# }

Run the code above in your browser using DataLab