chunks
makes it easy to extract sections of an article. You
can extract just authors across all articles, or all references sections, or
the complete text of each article. Then you can pass the output downstream for
vizualization and analysis.
chunks(x, what = "all")tabularize(x)
An object of class ft_data
, the output from a call to
ft_get
What to get, can be one or more in a vector or list. See Details.
A list of output, one for each thing requested
Options for the what
parameter:
front - Publisher, journal and article metadata elements
body - Body of the article
back - Back of the article, acknowledgments, author contributions, references
title - Article title
doi - Article DOI
categories - Publisher's categories, if any
authors - Authors
keywords - Keywords
abstract - Article abstract
executive_summary - Article executive summary
refs - References
refs_dois - References DOIs - if available
publisher - Publisher name
journal_meta - Journal metadata
article_meta - Article metadata
acknowledgments - Acknowledgments
permissions - Article permissions
history - Dates, recieved, published, accepted, etc.
Note that we currently only support PLOS, eLife, and Entrez right now, more to come.
# NOT RUN {
x <- ft_get('10.1371/journal.pone.0086169', from='plos')
chunks(x, what="authors")
library("rplos")
(dois <- searchplos(q="*:*", fl='id',
fq=list('doc_type:full',"article_type:\"research article\""), limit=5)$data$id)
x <- ft_get(dois, from="plos")
x %>% chunks("front")
x %>% chunks("body")
x %>% chunks("back")
x %>% chunks("history")
x %>% chunks(c("doi","history")) %>% tabularize()
x %>% chunks("authors")
x %>% chunks(c("doi","categories"))
x %>% chunks("all")
x %>% chunks("publisher")
x %>% chunks("acknowledgments")
x %>% chunks("permissions")
x %>% chunks("journal_meta")
x %>% chunks("article_meta")
# Coerce list output to a data.frame, where possible
(dois <- searchplos(q="*:*", fl='id',
fq=list('doc_type:full',"article_type:\"research article\""), limit=5)$data$id)
x <- ft_get(dois, from="plos")
x %>% chunks("publisher") %>% tabularize()
x %>% chunks("refs") %>% tabularize()
x %>% chunks(c("doi","publisher")) %>% tabularize()
x %>% chunks(c("doi","publisher","permissions")) %>% tabularize()
x <- ft_get(c("10.3389/fnagi.2014.00130",'10.1155/2014/249309','10.1155/2014/162024'),
from='entrez')
x %>% chunks("doi") %>% tabularize()
x %>% chunks("authors") %>% tabularize()
x %>% chunks(c("doi","publisher","permissions")) %>% tabularize()
x %>% chunks("history") %>% tabularize()
x <- ft_get('10.3389/fnagi.2014.00130', from='entrez')
x %>% chunks("keywords")
# Piping workflow
opts <- list(fq=list('doc_type:full',"article_type:\"research article\""))
ft_search(query='ecology', from='plos', plosopts = opts)$plos$data$id %>%
ft_get(from = "plos") %>%
chunks("publisher")
# Via entrez
res <- ft_get(c("10.3389/fnagi.2014.00130",'10.1155/2014/249309','10.1155/2014/162024'),
from='entrez')
chunks(res, what="abstract")
chunks(res, what="title")
chunks(res, what="keywords")
chunks(res, what="publisher")
(res <- ft_search(query='ecology', from='entrez'))
ft_get(res$entrez$data$doi, from='entrez') %>% chunks("title")
ft_get(res$entrez$data$doi[1:4], from='entrez') %>% chunks("acknowledgments")
ft_get(res$entrez$data$doi[1:4], from='entrez') %>% chunks(c('title','keywords'))
# From eLife
x <- ft_get(c('10.7554/eLife.04251', '10.7554/eLife.04986'), from='elife')
x %>% chunks("abstract")
x %>% chunks("publisher")
x %>% chunks("journal_meta")
x %>% chunks("acknowledgments")
x %>% chunks("refs_dois")
x %>% chunks(c("abstract", "executive_summary"))
# }
Run the code above in your browser using DataLab