tc = create_tcorpus(c("First text first sentence. First text first sentence.",
"Second text first sentence"), doc_column = 'id', split_sentences = TRUE)
## Perform additional preprocessing on the 'token' column, and save as the 'feature' column
tc$preprocess('token', 'feature', remove_stopwords = TRUE, use_stemming = TRUE)
tc$tokens
## default: regular sparse matrix, using the Matrix package
m = get_dtm(tc, 'feature')
class(m)
m
## alternatively, create quanteda ('quanteda_dfm') or tm ('tm_dtm') class for DTM
# \donttest{
m = get_dtm(tc, 'feature', form = 'quanteda_dfm')
class(m)
m
# }
## create DTM with sentences as rows (instead of documents)
m = get_dtm(tc, 'feature', context_level = 'sentence')
nrow(m)
## use weighting
m = get_dtm(tc, 'feature', weight = 'norm_tfidf')
Run the code above in your browser using DataLab