plos_fulltext(doi='10.1371/journal.pone.0086169')
plos_fulltext(c('10.1371/journal.pone.0086169','10.1371/journal.pbio.1001845'))
dois <- searchplos(q = "*:*", fq='doc_type:full', limit=20)$id
out <- plos_fulltext(dois)
out['10.1371/journal.pone.0013747']
out[1:2]
# Extract text from the XML strings
library("XML")
lapply(out[2:3], function(x){
tmp <- xmlParse(x)
xpathApply(tmp, "//abstract", xmlValue)
})
# Make a text corpus
library("tm")
out_parsed <- lapply(out, function(x){
tmp <- xmlParse(x)
xpathApply(tmp, "//body", xmlValue)
})
tmcorpus <- Corpus(VectorSource(out_parsed))
(dtm <- DocumentTermMatrix(tmcorpus))
findFreqTerms(dtm, lowfreq = 50)Run the code above in your browser using DataLab