if (FALSE) {
tc = tc_sotu_udpipe$copy()
tc$udpipe_clauses()
subject_verb_predicate = aggregate_rsyntax(tc, 'clause', txt=TRUE)
head(subject_verb_predicate)
## We can also add specific aggregation functions
## count number of tokens in predicate
aggregate_rsyntax(tc, 'clause',
agg_label('predicate', n = length(token_id)))
## same, but with txt for only the subject label
aggregate_rsyntax(tc, 'clause', txt='subject',
agg_label('predicate', n = length(token_id)))
## example application: sentiment scores for specific subjects
# first use queries to code subjects
tc$code_features(column = 'who',
query = c('I# I~s ',
'we# we americans '))
# then use dictionary to get sentiment scores
dict = melt_quanteda_dict(quanteda::data_dictionary_LSD2015)
dict$sentiment = ifelse(dict$code %in% c('negative','neg_positive'), -1, 1)
tc$code_dictionary(dict)
sent = aggregate_rsyntax(tc, 'clause', txt='predicate',
agg_label('subject', subject = na.omit(who)[1]),
agg_label('predicate', sentiment = mean(sentiment, na.rm=TRUE)))
head(sent)
sent[,list(sentiment=mean(sentiment, na.rm=TRUE), n=.N), by='subject']
}
Run the code above in your browser using DataLab