# NOT RUN {
my_DF = data.frame(var1=factor(c(rep('low', 4),rep('medium', 4),rep('high', 4)),
levels=c('low', 'medium', 'high')), var2=c(1, 2,2, 3,3,3, 4,4,4,4, 3, 2),
var3=rep(c('bbb','aaa','bbb'), 4), stringsAsFactors=FALSE)
aggro(my_DF, c('var3', 'var1'), 'var2', sum)
# Just like with the function count(), the results are grouped by unique combinations of ...
# ...'var3' and 'var1'. Note the following:
# * Column names are given in parenthesis (either single, ', or double, ")
# * Functions are not specified with parenthesis
# * The output is ordered; in principle according to alphanumerical order, except..
# ... when a 'group.by' column is an ordered factor, the factor order is followed.
# However, that said, up to this point, the results are the same as with aggregate():
aggregate(var2~var3+var1, my_DF, sum)
# Yet, it is getting more interesting/useful when either the results are split, ...
# ... or when there are NAs envolved; in both cases, aggro() digresses from aggegate();
# see the following:
# With split.by. Also non-factors can be used for 'split.by':
aggro(my_DF, group.by='var1', num.column='var2', FUN=sum, split.by='var3')
# With NAs. For the 'group.by' variable, NAs are treated as 'factor'.
# When there are NAs in the 'split.by' column, then an extra NA column is returned, ...
# ...specifying the counts of the NAs:
my_DF_w_NA = my_DF # same as above, but now...
my_DF_w_NA$var1[1] <- NA
my_DF_w_NA$var2[c(6,10)] <- NA
my_DF_w_NA$var3[10] <- NA
aggro(my_DF_w_NA, c('var1', 'var3'), 'var2', sum)
# Compare with:
aggregate(var2~var1+var3, my_DF_w_NA, sum)
# And indeed, with a split.by:
my_DF_w_NA$var3[8] <- NA
aggro(my_DF_w_NA, group.by='var1', num.column='var2', FUN=sum, split.by='var3')
# }
Run the code above in your browser using DataLab