dtm <- dfm(inaugCorpus)
dim(dtm)
dtmReduced <- trim(dtm, minCount = 10, minDoc = 2) # only words occuring >=5 times and in >=2 docs
dim(dtmReduced)
topfeatures(dtmReduced, decreasing = FALSE)
dtmSampled <- trim(dtm, minCount = 20, nsample = 50) # sample 50 words over 20 count
dtmSampled # 57 x 50 words
topfeatures(dtmSampled)
Run the code above in your browser using DataLab