textfile(file, textField, cache = FALSE, docvarsfrom = c("filenames"),
dvsep = "_", docvarnames = NULL, ...)## S3 method for class 'character,index,ANY,missing,missing,missing':
textfile(file,
textField, cache = FALSE, docvarsfrom = c("filenames"), dvsep = "_",
docvarnames = NULL, ...)
## S3 method for class 'character,missing,ANY,missing,missing,missing':
textfile(file,
textField, cache = FALSE, docvarsfrom = c("filenames"), dvsep = "_",
docvarnames = NULL, ...)
## S3 method for class 'character,missing,ANY,character,ANY,ANY':
textfile(file,
textField = NULL, cache = FALSE, docvarsfrom = c("headers"),
dvsep = "_", docvarnames = NULL, ...)
.csv
and .json
.TRUE
, write the object to a temporary file and store
the temporary filename in the corpusSource-class object definition.
If FALSE
, return the data in the object. Caching the filtextfile
inputs are filenames and the elements
of the filenames are document variables, separated by a delimiter
(dvsep
). This allows easy assignment odocvarsfrom="filenames"
is useddocvars
, if
docvarsfrom
is specified. If this argument is not used, default
docvar names will be used (docvar1
, docvar2
, ...).textfile
is
called.# Twitter json
mytf1 <- textfile("http://www.kenbenoit.net/files/tweets.json")
summary(corpus(mytf1), 5)
# generic json - needs a textField specifier
mytf2 <- textfile("http://www.kenbenoit.net/files/sotu.json",
textField = "text")
summary(corpus(mytf2))
# text file
mytf3 <- textfile(unzip(system.file("extdata", "pg2701.txt.zip", package = "quanteda")))
summary(corpus(mytf3))
# XML data
mytf6 <- textfile("http://www.kenbenoit.net/files/plant_catalog.xml",
textField = "COMMON")
summary(corpus(mytf6))
# csv file
write.csv(data.frame(inaugSpeech = texts(inaugCorpus), docvars(inaugCorpus)),
file = "/tmp/inaugTexts.csv", row.names = FALSE)
mytf7 <- textfile("/tmp/inaugTexts.csv", textField = "inaugSpeech")
summary(corpus(mytf7))
# vector of full filenames for a recursive structure
textfile(list.files(path = "~/Desktop/texts", pattern = "\\.txt$",
full.names = TRUE, recursive = TRUE))
Run the code above in your browser using DataLab