# Create a vector of strings
documents <- c("This is a good thing!",
"This is a very good thing!",
"This is okay.",
"This is a bad thing.",
"This is a very bad thing.")
response <- c(1, 0.5, 0, -0.5, -1)
# Generate dictionary with LASSO regularization
dictionary <- generateDictionary(documents, response)
# Show dictionary
dictionary
summary(dictionary)
plot(dictionary)
# Compute in-sample performance
sentiment <- predict(dictionary, documents)
compareToResponse(sentiment, response)
plotSentimentResponse(sentiment, response)
# Generate new dictionary with tf weighting innstead of tf-idf
library(tm)
dictionary <- generateDictionary(documents, response, weighting=weightTf)
sentiment <- predict(dictionary, documents)
compareToResponse(sentiment, response)
# Use instead lambda.min from the LASSO estimation
dictionary <- generateDictionary(documents, response, s="lambda.min")
sentiment <- predict(dictionary, documents)
compareToResponse(sentiment, response)
# Generate dictionary without LASSO intercept
dictionary <- generateDictionary(documents, response, intercept=FALSE)
dictionary$intercept
## Not run: ------------------------------------
# imdb <- loadImdb()
#
# # Generate Dictionary
# dictionary_imdb <- generateDictionary(imdb$Corpus, imdb$Rating, family="poisson")
# summary(dictionary_imdb)
#
# compareDictionaries(dictionary_imdb,
# loadDictionaryGI())
#
# # Show estimated coefficients with Kernel Density Estimation (KDE)
# plot(dictionary_imdb)
# plot(dictionary_imdb) + xlim(c(-0.1, 0.1))
#
# # Compute in-sample performance
# pred_sentiment <- predict(dict_imdb, imdb$Corpus)
# compareToResponse(pred_sentiment, imdb$Rating)
#
# # Test a different sparsity parameter
# dictionary_imdb <- generateDictionary(imdb$Corpus, imdb$Rating, family="poisson", sparsity=0.99)
# summary(dictionary_imdb)
# pred_sentiment <- predict(dict_imdb, imdb$Corpus)
# compareToResponse(pred_sentiment, imdb$Rating)
## ---------------------------------------------
Run the code above in your browser using DataLab