# NOT RUN {
# a file path to an XML file
x <- system.file("examples/elsevier_1.xml", package = "pubchunks")
pub_chunks(x, "title")
pub_chunks(x, "authors")
pub_chunks(x, "acknowledgments")
pub_chunks(x, "refs")
pub_chunks(x, c("title", "refs"))
# }
# NOT RUN {
# works the same with the xml already in a string
xml <- paste0(readLines(x), collapse = "")
pub_chunks(xml, "title")
# also works if you've already read in the XML (with xml2 pkg)
xml <- paste0(readLines(x), collapse = "")
xml <- xml2::read_xml(xml)
pub_chunks(xml, "title")
# Hindawi
x <- system.file("examples/hindawi_1.xml", package = "pubchunks")
pub_chunks(x, "abstract")
pub_chunks(x, "authors")
pub_chunks(x, "aff")
pub_chunks(x, "title")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("abstract", "title", "authors", "refs"))
# Pensoft
x <- system.file("examples/pensoft_1.xml", package = "pubchunks")
pub_chunks(x, "abstract")
pub_chunks(x, "aff")
pub_chunks(x, "title")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("abstract", "title", "authors", "refs"))
# Peerj
x <- system.file("examples/peerj_1.xml", package = "pubchunks")
pub_chunks(x, "abstract")
pub_chunks(x, "authors")
pub_chunks(x, "aff")
pub_chunks(x, "title")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("abstract", "title", "authors", "refs"))
# Frontiers
x <- system.file("examples/frontiers_1.xml", package = "pubchunks")
pub_chunks(x, "authors")
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("doi", "abstract", "title", "authors", "refs", "abstract"))
# eLife
x <- system.file("examples/elife_1.xml", package = "pubchunks")
pub_chunks(x, "authors")
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("doi", "title", "authors", "refs"))
# f1000research
x <- system.file("examples/f1000research_3.xml", package = "pubchunks")
pub_chunks(x, "title")
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
pub_chunks(x, c("doi", "title", "authors", "keywords", "refs"))
# Copernicus
x <- system.file("examples/copernicus_1.xml", package = "pubchunks")
pub_chunks(x, c("doi", "abstract", "title", "authors", "refs"))
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
# MDPI
x <- system.file("examples/mdpi_1.xml", package = "pubchunks")
x <- system.file("examples/mdpi_2.xml", package = "pubchunks")
pub_chunks(x, "title")
pub_chunks(x, "aff")
pub_chunks(x, "refs")$refs
vv <- pub_chunks(x, c("doi", "title", "authors", "keywords", "refs",
"abstract", "categories"))
vv$doi
vv$title
vv$authors
vv$keywords
vv$refs
vv$abstract
vv$categories
# Many inputs at once
x <- system.file("examples/frontiers_1.xml", package = "pubchunks")
y <- system.file("examples/elife_1.xml", package = "pubchunks")
z <- system.file("examples/f1000research_1.xml", package = "pubchunks")
pub_chunks(list(x, y, z), c("doi", "title", "authors", "refs"))
# non-XML files/content are xxx?
# pub_chunks('foo bar')
# Pubmed brief XML files (abstract only)
x <- system.file("examples/pubmed_brief_1.xml", package = "pubchunks")
pub_chunks(x, "title")
# Pubmed full XML files
x <- system.file("examples/pubmed_full_1.xml", package = "pubchunks")
pub_chunks(x, "title")
# using output of fulltext::ft_get()
if (requireNamespace("fulltext", quietly = TRUE)) {
library("fulltext")
# single
x <- fulltext::ft_get('10.7554/eLife.03032')
pub_chunks(fulltext::ft_collect(x), sections="authors")
# many
dois <- c('10.1371/journal.pone.0086169', '10.1371/journal.pone.0155491',
'10.7554/eLife.03032')
x <- fulltext::ft_get(dois)
pub_chunks(fulltext::ft_collect(x), sections="authors")
# as.ft_data() function
x <- ft_collect(as.ft_data())
names(x)
x$cached
pub_chunks(x, "title")
pub_chunks(x, "title") %>% pub_tabularize()
}
# }
Run the code above in your browser using DataLab