# NOT RUN {
# create some files
td = tempfile()
dir.create(td)
write( c("dog", "cat", "mouse"), file=paste(td, "D1", sep="/") )
write( c("hamster", "mouse", "sushi"), file=paste(td, "D2", sep="/") )
write( c("dog", "monster", "monster"), file=paste(td, "D3", sep="/") )
# read them, create a document-term matrix
textmatrix(td)
# read them, drop german stopwords
data(stopwords_de)
textmatrix(td, stopwords=stopwords_de)
# read them based on a controlled vocabulary
voc = c("dog", "mouse")
textmatrix(td, vocabulary=voc, minWordLength=1)
# clean up
unlink(td, recursive=TRUE)
# }
Run the code above in your browser using DataLab