plot.BTM: Plot function for a BTM object

Description

Plot biterms as a clustered graph. The graph is constructed by assigning each word to a topic and within a topic of words biterm frequencies are shown.

Usage

# S3 method for BTM
plot(
  x,
  biterms = terms(x, type = "biterms")$biterms,
  top_n = 7,
  which,
  labels = seq_len(x$K),
  title = "Biterm topic model",
  subtitle = list(),
  ...
)

Value

an object of class ggplot

Arguments

x: an object of class BTM with a biterm topic model
biterms: a data.frame with columns term1, term2, topic with all biterms and the topic these were assigned to. Defaults to the biterms used to construct the model.
top_n: integer indicating to limit to displaying the top_n terms for each topic. Defaults to 7.
which: integer vector indicating to display only these topics. See the examples.
labels: a character vector of names. Should be of the same length as the number of topics in the data.
title: character string with the title to use in the plot
subtitle: character string with the subtitle to use in the plot
...: not used

Examples

Run this code

# \dontshow{
if(require(igraph) && require(BTM) && require(ggraph) &&
   require(ggforce) && require(concaveman) &&
   require(data.table) && require(udpipe))
{
# }
library(igraph)
library(BTM)
library(ggraph)
library(ggforce)
library(concaveman)
data(example_btm, package = 'textplot')

model <- example_btm
# \donttest{
plot(model, title = "BTM model", top_n = 3)
plot(model, title = "BTM model", top_n = 3, labels = 1:model$K)
plot(model, title = "BTM model", which = 7:15)
plot(model, title = "BTM model", subtitle = "First 5 topics",
     which = 1:5, top_n = 10)
plot(model, title = "Biterm topic model", subtitle = "First 8 topics",
     which = 1:8, top_n = 7)
# }

topiclabels <- c("Garbage",
  "Data Mining", "Gradient descent", "API's",
  "Random Forests", "Stat models", "Text Mining / NLP",
  "GLM / GAM / Bayesian", "Machine learning", "Variable selection",
  "Regularisation techniques", "Optimisation", "Fuzzy logic",
  "Classification/Regression trees", "Text frequencies",
  "Neural / Deep learning", "Variable selection",
  "Text file handling", "Text matching", "Topic modelling")
plot(model, title = "Biterm topic model", subtitle = "some topics",
     top_n = 7,
     which = c(3, 4, 5, 6, 7, 9, 12, 16, 20),
     labels = topiclabels)

# \donttest{
library(BTM)
library(data.table)
library(udpipe)
## Annotate text with parts of speech tags
data("brussels_reviews", package = "udpipe")
anno <- subset(brussels_reviews, language %in% "nl")
anno <- data.frame(doc_id = anno$id, text = anno$feedback, stringsAsFactors = FALSE)
anno <- udpipe(anno, "dutch", trace = 10)
## Get cooccurrences of nouns / adjectives and proper nouns
biterms <- as.data.table(anno)
biterms <- biterms[, cooccurrence(x = lemma,
                                  relevant = upos %in% c("NOUN", "PROPN", "ADJ"),
                                  skipgram = 2),
                     by = list(doc_id)]
## Build the BTM model
set.seed(123456)
x <- subset(anno, upos %in% c("NOUN", "PROPN", "ADJ"))
x <- x[, c("doc_id", "lemma")]
model <- BTM(x, k = 5, beta = 0.01, iter = 2000, background = TRUE,
             biterms = biterms, trace = 100)
plot(model)
# }

# \dontshow{
}
# End of main if statement running only if the required packages are installed
# }

Run the code above in your browser using DataLab

Description

Usage

Value

Arguments

See Also

Examples