pred <- sample(LETTERS, 1000, replace = TRUE)
gold <- sample(LETTERS, 1000, replace = TRUE)
crf_evaluation(pred = pred, obs = gold, labels = LETTERS)
# \donttest{
x <- ner_download_modeldata("conll2002-nl")
x <- crf_cbind_attributes(x, terms = c("token", "pos"),
by = c("doc_id", "sentence_id"))
crf_train <- subset(x, data == "ned.train")
crf_test <- subset(x, data == "testa")
attributes <- grep("token|pos", colnames(x), value=TRUE)
model <- crf(y = crf_train$label,
x = crf_train[, attributes],
group = crf_train$doc_id,
method = "lbfgs")
## Use the model to score on existing tokenised data
scores <- predict(model,
newdata = crf_test[, attributes],
group = crf_test$doc_id)
crf_evaluation(pred = scores$label, obs = crf_test$label)
crf_evaluation(pred = scores$label, obs = crf_test$label,
labels = c("O",
"B-ORG", "I-ORG", "B-PER", "I-PER",
"B-LOC", "I-LOC", "B-MISC", "I-MISC"))
if(require(udpipe)){
library(udpipe)
pred <- txt_recode(scores$label,
from = c("B-ORG", "I-ORG", "B-PER", "I-PER",
"B-LOC", "I-LOC", "B-MISC", "I-MISC"),
to = c("ORG", "ORG", "PER", "PER",
"LOC", "LOC", "MISC", "MISC"))
obs <- txt_recode(crf_test$label,
from = c("B-ORG", "I-ORG", "B-PER", "I-PER",
"B-LOC", "I-LOC", "B-MISC", "I-MISC"),
to = c("ORG", "ORG", "PER", "PER",
"LOC", "LOC", "MISC", "MISC"))
crf_evaluation(pred = pred, obs = obs,
labels = c("ORG", "LOC", "PER", "MISC", "O"))
} # End of main if statement running only if the required packages are installed
# }
Run the code above in your browser using DataLab