x <- data.frame(doc_id = sort(sample.int(n = 10, size = 1000, replace = TRUE)))
x$pos <- sample(c("Art", "N", "Prep", "V", "Adv", "Adj", "Conj",
"Punc", "Num", "Pron", "Int", "Misc"),
size = nrow(x), replace = TRUE)
x <- crf_cbind_attributes(x, terms = "pos", by = "doc_id",
from = -1, to = 1, ngram_max = 3)
head(x)
# \donttest{
## Example on some real data
x <- ner_download_modeldata("conll2002-nl")
x <- crf_cbind_attributes(x, terms = c("token", "pos"),
by = c("doc_id", "sentence_id"),
ngram_max = 3, sep = "|")
# }
Run the code above in your browser using DataLab