# NOT RUN {
# Load a pre-formatted dtm and topic model
data(nih_sample_dtm)
# Get a sample of documents
dtm <- nih_sample_dtm[ sample(1:nrow(nih_sample_dtm), 20) , ]
# re-create a character vector of documents from the DTM
lex <- Dtm2Docs(dtm)
# Format for input to lda::lda.collapsed.gibbs.sampler
lex <- lda::lexicalize(lex, vocab=colnames(dtm))
# Fit the model from lda::lda.collapsed.gibbs.sampler
lda <- lda::lda.collapsed.gibbs.sampler(documents = lex, K = 5,
vocab = colnames(dtm),
num.iterations=200,
alpha=0.1, eta=0.05)
# Format the result to get phi and theta matrices
lda <- FormatRawLdaOutput(lda_result=lda, docnames=rownames(dtm), smooth=TRUE)
# }
Run the code above in your browser using DataLab