# load example word embeddings
data(ft_wv_sample)
# load example text
data(jfk_speech)
# minimal preprocessing
jfk_speech$sentence <- tolower(jfk_speech$sentence)
jfk_speech$sentence <- gsub("[[:punct:]]+", " ", jfk_speech$sentence)
# create DTM
dtm <- dtm_builder(jfk_speech, sentence, sentence_id)
dsm_prj <- doc_similarity(dtm, method = "projection")
dsm_cos <- doc_similarity(dtm, method = "cosine")
dsm_wmd <- doc_similarity(dtm, method = "wmd", wv = ft_wv_sample)
dsm_cen <- doc_similarity(dtm, method = "centroid", wv = ft_wv_sample)
Run the code above in your browser using DataLab