# =====================
# surface co-occurrence
# =====================
x <- corp_text("A man, a plan, a canal -- Panama!")
y <- corp_surface(x, span = "2R")
corp_get_counts(y)
## x y H M
## 1: a a 2 4
## 2: a canal 1 5
## 3: a man 1 5
## 4: a panama 1 5
## 5: a plan 1 5
## 6: canal panama 1 0
## 7: man a 1 1
## 8: man plan 1 1
## 9: plan a 1 1
## 10: plan canal 1 1
# filter on nodes
y <- corp_surface(x, span = '2R', nodes = c("canal", "man", "plan"))
corp_get_counts(y)
## x y H M
## 1: canal panama 1 0
## 2: man a 1 1
## 3: man plan 1 1
## 4: plan a 1 1
## 5: plan canal 1 1
# filter on nodes and collocates
y <- corp_surface(x, span = '2R', nodes = c("canal", "man", "plan"),
collocates = c("panama", "a"))
corp_get_counts(y)
## x y H M
## 1: canal panama 1 0
## 2: man a 1 1
## 3: plan a 1 1
# co-occurrence barrier
tokens_with_barrier <- data.frame(
type = c("a", "man", "a", "plan", NA, NA, "a", "canal", "panama"),
start = as.integer(c( 1, 3, 8, 10, NA, NA, 16, 18, 27)),
end = as.integer(c( 1, 5, 8, 13, NA, NA, 16, 22, 32)),
stringsAsFactors = FALSE
)
x <- corp_text("A man, a plan, a canal -- Panama!", tokens = tokens_with_barrier)
y <- corp_surface(x, span = '2R')
corp_get_counts(y)
# x y H M
# 1: a a 1 4
# 2: a canal 1 4
# 3: a man 1 4
# 4: a panama 1 4
# 5: a plan 1 4
# 6: canal panama 1 0
# 7: man a 1 1
# 8: man plan 1 1
Run the code above in your browser using DataLab