# NOT RUN {
# similarities for documents
dfmat <- dfm(corpus_subset(data_corpus_inaugural, Year > 1980),
remove_punct = TRUE, remove = stopwords("english"))
(tstat1 <- textstat_simil(dfmat, method = "cosine", margin = "documents"))
as.matrix(tstat1)
as.list(tstat1)
# similarities for for specific documents
textstat_simil(dfmat, selection = "2017-Trump", margin = "documents")
textstat_simil(dfmat, selection = "2017-Trump", method = "cosine", margin = "documents")
textstat_simil(dfmat, selection = c("2009-Obama" , "2013-Obama"), margin = "documents")
# compute some term similarities
tstat2 <- textstat_simil(dfmat, selection = c("fair", "health", "terror"), method = "cosine",
margin = "features")
head(as.matrix(tstat2), 10)
as.list(tstat2, n = 8)
# create a dfm from inaugural addresses from Reagan onwards
dfmat <- dfm(corpus_subset(data_corpus_inaugural, Year > 1990),
remove = stopwords("english"), stem = TRUE, remove_punct = TRUE)
# distances for documents
(tstat1 <- textstat_dist(dfmat, margin = "documents"))
as.matrix(tstat1)
# distances for specific documents
textstat_dist(dfmat, "2017-Trump", margin = "documents")
(tstat2 <- textstat_dist(dfmat, c("2009-Obama" , "2013-Obama"), margin = "documents"))
as.list(tstat2)
# }
Run the code above in your browser using DataLab