culling(input.table, culling.level = 0)
colnames(input.table)
.delete.stop.words
, stylo.pronouns
# assume there is a matrix containing some frequencies
# (be aware that these counts are entirely fictional):
t1 = c(2, 1, 0, 2, 9, 1, 0, 0, 2, 0)
t2 = c(1, 0, 4, 2, 1, 0, 3, 0, 1, 3)
t3 = c(5, 2, 2, 0, 6, 0, 1, 0, 0, 0)
t4 = c(1, 4, 1, 0, 0, 0, 0, 3, 0, 1)
my.data.table = rbind(t1, t2, t3, t4)
# names of the samples:
rownames(my.data.table) = c("text1", "text2", "text3", "text4")
# names of the variables (e.g. words):
colnames(my.data.table) = c("the", "of", "in", "she", "me", "you",
"them", "if", "they", "he")
# the table looks as follows
print(my.data.table)
# selecting the words that appeared in at laest 50% of samples:
culling(my.data.table, 50)
Run the code above in your browser using DataLab