# NOT RUN {
# }
# NOT RUN {
library(tm)
library(wordcloud)
# pre-process data
corp <- Corpus(VectorSource(ipo), readerControl=list(language="en"))
corp <- tm_map(corp, removePunctuation)
corp <- tm_map(corp, tolower)
corp <- tm_map(corp, removeNumbers)
corp <- tm_map(corp, function(x)removeWords(x,stopwords()))
f <- corp[1] # facebook
g <- corp[2] # google
l <- corp[3] # linkedin
tmat <- TermDocumentMatrix(f)
m <- as.matrix(tmat)
freq <- rowSums(m)
words <- rownames(m)
words.ord <- sort.int(freq, decreasing = T, index.return = F)
barplot(words.ord[1:15], las = 2)
wordcloud(words, freq, min.freq = 100, col='blue')
tmat <- TermDocumentMatrix(c(f, g))
m <- as.matrix(tmat)
comparison.cloud(m, max.words = 100)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab