# NOT RUN {
## Example from 13.1 of _An Introduction to Information Retrieval_
corp <- quanteda::corpus(c(d1 = "Chinese Beijing Chinese",
d2 = "Chinese Chinese Shanghai",
d3 = "Chinese Macao",
d4 = "Tokyo Japan Chinese",
d5 = "London England Chinese",
d6 = "Chinese Chinese Chinese Tokyo Japan"),
docvars = data.frame(train = factor(c("Y", "Y", "Y",
"N", "N", NA))))
dfmat <- quanteda::dfm(corp, tolower = FALSE)
## simulate bigger sample as classification on small samples is problematic
set.seed(1)
dfmat <- quanteda::dfm_sample(dfmat, 50, replace = TRUE)
## train model
(tmod1 <- textmodel_lr(dfmat, quanteda::docvars(dfmat, "train")))
summary(tmod1)
coef(tmod1)
## predict probability and classes
predict(tmod1, type = "prob")
predict(tmod1)
# }
Run the code above in your browser using DataLab