# NOT RUN {
library(tm)
reut21578 <- system.file("texts", "crude", package = "tm")
reuters.tm <- VCorpus(DirSource(reut21578), list(reader = readReut21578XMLasPlain))
library(tidytext)
reuters.tibble <- tidy(reuters.tm)
reuters.tibble[["topics_cat"]] <- sapply(
reuters.tibble[["topics_cat"]],
function(x) paste(reuters.tibble[["topics_cat"]], collapse = "|")
)
reuters.tibble[["places"]] <- sapply(
reuters.tibble[["places"]],
function(x) paste(x, collapse = "|")
)
reuters.tidy <- unnest_tokens(
reuters.tibble, output = "word", input = "text", to_lower = FALSE
)
encode(
reuters.tidy, name = "reuters2",
sAttributes = c("language", "places")
)
# }
Run the code above in your browser using DataLab