# The firs example takes the whole matrix average, while the second takes
# every row average
(mn_mat <- apply_matrix(student_results, mean))
(mn_row <- apply_row(student_results, mean))
# More than one function can be provided. It's a good idea in this case to
# name them
(mn_col <- apply_column(student_results, avr=mean, med=median))
# the dfl/dfw versions returns nice tibbles - if the functions return values
# of the same length.
(mn_l <- apply_column_dfl(student_results, avr=mean, med=median))
(mn_w <- apply_column_dfw(student_results, avr=mean, med=median))
# There is no difference between the two versions for length-1 vector results.
# hese will differ, however
(rg_l <- apply_column_dfl(student_results, rg=range))
(rg_w <- apply_column_dfw(student_results, rg=range))
# More complex examples can be used, by using pronouns and data annotation
(vals <- apply_column(student_results, avr=mean, avr_trim=mean(.j, trim=.05),
reg=lm(.j ~ teacher)))
# You can wrap complex function results, such as for lm, into a list, to use
# the dfl/dfr version
(vals_tidy <- apply_column_dfw(student_results, avr=mean, avr_trim=mean(.j, trim=.05),
reg=list(lm(.j ~ teacher))))
# You can provide complex expressions by using formulas
(r <- apply_column(student_results,
res= ~ {
log_score <- log(.j)
p <- predict(lm(log_score ~ teacher + class))
.j - exp(p)
}))
# the .data pronoun can be useful to use names stored in variables
fn <- function(nm) {
if (!is.character(nm) && length(nm) != 1) stop("this example won't work")
apply_column(student_results, lm(.j ~ .data[[nm]]))
}
fn("teacher")
# You can use variables that are outside the scope of the matrixset object.
# You don't need to do anything special if that variable is not named as an
# annotation
pass_grade <- 0.5
(passed <- apply_row_dfw(student_results, pass = ~ .i >= pass_grade))
# use .env if shares an annotation name
previous_year_score <- 0.5
(passed <- apply_row_dfw(student_results, pass = ~ .i >= .env$previous_year_score))
# Grouping structure makes looping easy. Look at the output format
cl_prof_gr <- row_group_by(student_results, class, teacher)
(gr_summ <- apply_column(cl_prof_gr, avr=mean, med=median))
(gr_summ_tidy <- apply_column_dfw(cl_prof_gr, avr=mean, med=median))
# to showcase how we can play with format
(gr_summ_tidy_long <- apply_column_dfl(cl_prof_gr, summ = ~ c(avr=mean(.j), med=median(.j))))
# It is even possible to combine groupings
cl_prof_program_gr <- column_group_by(cl_prof_gr, program)
(mat_summ <- apply_matrix(cl_prof_program_gr, avr = mean, med = median, rg = range))
# it doesn' make much sense, but this is to showcase format
(summ_gr <- apply_matrix(cl_prof_program_gr, avr = mean, med = median, rg = range))
(summ_gr_long <- apply_column_dfl(cl_prof_program_gr,
ct = ~ c(avr = mean(.j), med = median(.j)),
rg = range))
(summ_gr_wide <- apply_column_dfw(cl_prof_program_gr,
ct = c(avr = mean(.j), med = median(.j)),
rg = range))
# This is an example where you may want to use the .force_name argument
(apply_matrix_dfl(column_group_by(student_results, program), FC = colMeans(.m)))
(apply_matrix_dfl(column_group_by(student_results, program), FC = colMeans(.m),
.force_name = TRUE))
Run the code above in your browser using DataLab