# NOT RUN {
require("NLP")
s <- as.String(paste("Stanford University is located in California.",
"It is a great university."))
s
## Annotators: ssplit, tokenize:
if ( ping_nlp_client() == "pong" ) {
p <- StanfordCoreNLP_Pipeline(NULL)
a <- p(s)
a
## Annotators: ssplit, tokenize, pos, lemma (default):
p <- StanfordCoreNLP_Pipeline()
a <- p(s)
a
## Equivalently:
annotate(s, p)
## Annotators: ssplit, tokenize, parse:
p <- StanfordCoreNLP_Pipeline("parse")
a <- p(s)
a
## Respective formatted parse trees using Penn Treebank notation
## (see <https://catalog.ldc.upenn.edu/docs/LDC95T7/cl93.html>):
ptexts <- sapply(subset(a, type == "sentence")$features, `[[`, "parse")
ptexts
## Read into NLP Tree objects.
ptrees <- lapply(ptexts, Tree_parse)
ptrees
## Basic dependencies:
depends <- lapply(subset(a, type == "sentence")$features, `[[`,
"basic-dependencies")
depends
## Note that the non-zero ids (gid for governor and did for dependent)
## refer to word token positions within the respective sentences, and
## not the ids of these token in the annotation: these can easily be
## matched using the sentence constituents features:
lapply(subset(a, type == "sentence")$features, `[[`, "constituents")
## (Similarly for sentence ids used in dcoref document features.)
## Note also that the dependencies are returned as a data frame
## inheriting from class "Stanford_typed_dependencies" which has print
## and format methods for obtaining the usual formatting.
depends[[1L]]
## Use as.data.frame() to obtain strip this class:
as.data.frame(depends[[1L]])
}
# }
Run the code above in your browser using DataLab