#
# create a corpus from texts
corpus(inaugTexts)
# create a corpus from texts and assign meta-data and document variables
ukimmigCorpus <- corpus(ukimmigTexts,
docvars=data.frame(party=names(ukimmigTexts)),
enc="UTF-8")
# the fifth column of this csv file is the text field
mytexts <- textfile("http://www.kenbenoit.net/files/text_example.csv", textField=5)
str(mytexts)
mycorp <- corpus(mytexts)
mycorp2 <- corpus(textfile("http://www.kenbenoit.net/files/text_example.csv", textField="Title"))
identical(texts(mycorp), texts(mycorp2))
identical(docvars(mycorp), docvars(mycorp2))
#
## import a tm VCorpus
if (require(tm)) {
data(crude) # load in a tm example VCorpus
mytmCorpus <- corpus(crude)
summary(mytmCorpus, showmeta=TRUE)
}
Run the code above in your browser using DataLab