Last chance! 50% off unlimited learning
Sale ends in
cr_ft_text(url, type = "xml", path = "~/.crossref", overwrite = TRUE,
read = TRUE, verbose = TRUE, cache = TRUE, ...)cr_ft_plain(url, path = "~/.crossref", overwrite = TRUE, read = TRUE,
verbose = TRUE, ...)
cr_ft_xml(url, path = "~/.crossref", overwrite = TRUE, read = TRUE,
verbose = TRUE, ...)
cr_ft_pdf(url, path = "~/.crossref", overwrite = TRUE, read = TRUE,
cache = FALSE, verbose = TRUE, ...)
~/.crossref/
GET
cr_ft_text
,
cr_ft_pdf
, cr_ft_xml
, cr_ft_plain
are not vectorized.Note that some links returned will not in fact lead you to full text
content as you would understandbly think and expect. That is, if you
use the filter
parameter with e.g., cr_works
and
filter to only full text content, some links may actually give back
only metadata for an article. Elsevier is perhaps the worst offender,
for one because they have a lot of entries in Crossref TDM, but most
of the links that are apparently full text are not in facct full text,
but only metadata.
# pdf link
cr_ft_links(doi = "10.5555/515151", "pdf")
# xml and plain text links
out <- cr_works(filter=c(has_full_text = TRUE))
dois <- out$data$DOI
cr_ft_links(dois[2], "xml")
cr_ft_links(dois[1], "plain")
cr_ft_links(dois[1], "all")
# No links
cr_ft_links(cr_r(1), "xml")
# get full text
## pensoft
out <- cr_members(2258, filter=c(has_full_text = TRUE), works = TRUE)
(links <- cr_ft_links(out$data$DOI[1], "all"))
### xml
cr_ft_text(links, 'xml')
### pdf
cr_ft_text(links, "pdf", read=FALSE)
cr_ft_text(links, "pdf")
### another pensoft e.g.
links <- cr_ft_links("10.3897/phytokeys.42.7604", "all")
pdf_read <- cr_ft_text(url = links, type = "pdf", read=FALSE, verbose = FALSE)
pdf <- cr_ft_text(links, "pdf", verbose = FALSE)
## hindawi
out <- cr_members(98, filter=c(has_full_text = TRUE), works = TRUE)
(links <- cr_ft_links(out$data$DOI[1], "all"))
### xml
cr_ft_text(links, 'xml')
### pdf
cr_ft_text(links, "pdf", read=FALSE)
cr_ft_text(links, "pdf")
## search for works with full text, and with CC-BY 3.0 license
### you can see available licenses with cr_licenses() function
out <-
cr_works(filter = list(has_full_text = TRUE,
license_url="http://creativecommons.org/licenses/by/3.0/"))
(links <- cr_ft_links(out$data$DOI[10], "all"))
cr_ft_text(links, 'xml')
## You can use cr_ft_xml, cr_ft_plain, and cr_ft_pdf to go directly to that format
licenseurl <- "http://creativecommons.org/licenses/by/3.0/"
out <- cr_works(filter = list(has_full_text = TRUE, license_url = licenseurl))
(links <- cr_ft_links(out$data$DOI[10], "all"))
cr_ft_xml(links)
cr_ft_pdf(links)
# Caching, for PDFs
out <- cr_members(2258, filter=c(has_full_text = TRUE), works = TRUE)
(links <- cr_ft_links(out$data$DOI[10], "all"))
cr_ft_text(links, type = "pdf", cache=FALSE)
system.time( cacheyes <- cr_ft_text(links, type = "pdf", cache=TRUE) )
system.time( cacheyes <- cr_ft_text(links, type = "pdf", cache=TRUE) ) # second time is faster
system.time( cacheno <- cr_ft_text(links, type = "pdf", cache=FALSE) )
identical(cacheyes, cacheno)
###################### Things to stay away from
## elife
#### Stay away from eLife for now, they aren't setting content types right, etc.
## elsevier - they don't actually give full text, ha ha, jokes on us!
## requires extra authentication, which we may include later on
# out <- cr_members(78, filter=c(has_full_text = TRUE), works = TRUE)
# links <- cr_ft_links(out$data$DOI[1], "all")
# cr_ft_text(links, 'xml') # notice how this is just metadata
### elsevier articles that are open access
#### one license is for open access articles, but none with full text available
# cr_works(filter=list(license_url="http://www.elsevier.com/open-access/userlicense/1.0/",
# has_full_text=TRUE))
Run the code above in your browser using DataLab