# Twitter json
mytf1 <- textfile("http://www.kenbenoit.net/files/tweets.json")
summary(corpus(mytf1), 5)
# generic json - needs a textField specifier
mytf2 <- textfile("http://www.kenbenoit.net/files/sotu.json",
textField = "text")
summary(corpus(mytf2))
# text file
mytf3 <- textfile("https://www.gutenberg.org/cache/epub/2701/pg2701.txt", cache = FALSE)
summary(corpus(mytf3))
# XML data
mytf6 <- textfile("http://www.kenbenoit.net/files/plant_catalog.xml",
textField = "COMMON")
summary(corpus(mytf6))
# csv file
write.csv(data.frame(inaugSpeech = texts(inaugCorpus), docvars(inaugCorpus)),
file = "/tmp/inaugTexts.csv", row.names = FALSE)
mytf7 <- textfile("/tmp/inaugTexts.csv", textField = "inaugSpeech")
summary(corpus(mytf7))
# vector of full filenames for a recursive structure
textfile(list.files(path = "~/Desktop/texts", pattern = "\\.txt$",
full.names = TRUE, recursive = TRUE))
Run the code above in your browser using DataLab