pred <- sample(LETTERS, 1000, replace = TRUE)
gold <- sample(LETTERS, 1000, replace = TRUE)
crf_evaluation(pred = pred, obs = gold, labels = LETTERS) 
# \donttest{
x <- ner_download_modeldata("conll2002-nl")
x <- crf_cbind_attributes(x, terms = c("token", "pos"), 
                          by = c("doc_id", "sentence_id"))
crf_train <- subset(x, data == "ned.train")
crf_test <- subset(x, data == "testa")
attributes <- grep("token|pos", colnames(x), value=TRUE)
model <- crf(y = crf_train$label, 
             x = crf_train[, attributes], 
             group = crf_train$doc_id, 
             method = "lbfgs") 
             
## Use the model to score on existing tokenised data
scores <- predict(model, 
                  newdata = crf_test[, attributes], 
                  group = crf_test$doc_id)
crf_evaluation(pred = scores$label, obs = crf_test$label)
crf_evaluation(pred = scores$label, obs = crf_test$label, 
  labels = c("O", 
             "B-ORG", "I-ORG", "B-PER", "I-PER", 
             "B-LOC", "I-LOC", "B-MISC", "I-MISC"))
             
if(require(udpipe)){         
library(udpipe)
pred <- txt_recode(scores$label, 
                   from = c("B-ORG", "I-ORG", "B-PER", "I-PER", 
                            "B-LOC", "I-LOC", "B-MISC", "I-MISC"),
                   to = c("ORG", "ORG", "PER", "PER", 
                          "LOC", "LOC", "MISC", "MISC"))
obs <- txt_recode(crf_test$label, 
                  from = c("B-ORG", "I-ORG", "B-PER", "I-PER", 
                           "B-LOC", "I-LOC", "B-MISC", "I-MISC"),
                  to = c("ORG", "ORG", "PER", "PER", 
                         "LOC", "LOC", "MISC", "MISC"))
crf_evaluation(pred = pred, obs = obs, 
               labels = c("ORG", "LOC", "PER", "MISC", "O"))
} # End of main if statement running only if the required packages are installed
# }
Run the code above in your browser using DataLab