doc = xmlTreeParse(system.file("exampleData", "tagnames.xml", package = "XML"), useInternalNodes = TRUE)
getNodeSet(doc, "/doc//b[@status]")
getNodeSet(doc, "/doc//b[@status='foo']")
els = getNodeSet(doc, "/doc//a[@status]")
sapply(els, function(el) xmlGetAttr(el, "status"))
# Using a namespace
f = system.file("exampleData", "SOAPNamespaces.xml", package = "XML")
z = xmlTreeParse(f, useInternal = TRUE)
getNodeSet(z, "/a:Envelope/a:Body", c("a" = "http://schemas.xmlsoap.org/soap/envelope/"))
getNodeSet(z, "//a:Body", c("a" = "http://schemas.xmlsoap.org/soap/envelope/"))
# Get two items back with namespaces
f = system.file("exampleData", "gnumeric.xml", package = "XML")
z = xmlTreeParse(f, useInternal = TRUE)
getNodeSet(z, "//gmr:Item/gmr:name", c(gmr="http://www.gnome.org/gnumeric/v2"))
#####
# European Central Bank (ECB) exchange rate data
# Data is available from "http://www.ecb.int/stats/eurofxref/eurofxref-hist.xml"
# or locally.
uri = system.file("exampleData", "eurofxref-hist.xml.gz", package = "XML")
doc = xmlTreeParse(uri, useInternalNodes = TRUE)
# The default namespace for all elements is given by
namespaces <- c(ns="http://www.ecb.int/vocabulary/2002-08-01/eurofxref")
# Get the data for Slovenian currency for all time periods.
# Find all the nodes of the form <Cube currency="SIT"...>
slovenia = getNodeSet(doc, "//ns:Cube[@currency='SIT']", namespaces )
# Now we have a list of such nodes, loop over them
# and get the rate attribute
rates = as.numeric( sapply(slovenia, xmlGetAttr, "rate") )
# Now put the date on each element
# find nodes of the form <Cube time=".." ... >
# and extract the time attribute
names(rates) = sapply(getNodeSet(doc, "//ns:Cube[@time]", namespaces ),
xmlGetAttr, "time")
# Or we could turn these into dates with strptime()
strptime(names(rates), "%Y-%m-%d")
# Using xpathApply, we can do
rates = xpathApply(doc, "//ns:Cube[@currency='SIT']", xmlGetAttr, "rate", namespaces = namespaces )
rates = as.numeric(unlist(rates))
# Using an expression rather than a function and ...
rates = xpathApply(doc, "//ns:Cube[@currency='SIT']", quote(xmlGetAttr(x, "rate")), namespaces = namespaces )
#
uri = system.file("exampleData", "namespaces.xml", package = "XML")
d = xmlTreeParse(uri, useInternalNodes = TRUE)
getNodeSet(d, "//c:c", c(c="http://www.c.org"))
# the following, perhaps unexpectedly but correctly, returns an empty
# with no matches
getNodeSet(d, "//defaultNs", "http://www.omegahat.org")
# But if we create our own prefix for the evaluation of the XPath
# expression and use this in the expression, things work as one
# might hope.
getNodeSet(d, "//dummy:defaultNs", c(dummy = "http://www.omegahat.org"))
# And since the default value for the namespaces argument is the
# default namespace of the document with the prefix 'd', we can use
getNodeSet(d, "//d:defaultNs")
# And the syntactic sugar is
d["//d:defaultNs"]
Run the code above in your browser using DataLab