Last chance! 50% off unlimited learning
Sale ends in
cro_mean
, cro_sum
, cro_median
calculate
mean/sum/median by groups. NA's are always omitted.
cro_mean_sd_n
calculates mean, standard deviation and N
simultaneously. Mainly intended for usage with significance_means.
cro_pearson
, cro_spearman
calculate correlation of
first variable in each data.frame in cell_vars
with other variables.
NA's are removed pairwise.
cro_fun
, cro_fun_df
return table with custom summary
statistics defined by fun
argument. NA's treatment depends on your
fun
behavior. To use weight you should have formal weight
argument in fun
and some logic for its processing inside. Several
functions with weight support are provided - see w_mean.
cro_fun
applies fun
on each variable in cell_vars
separately, cro_fun_df
gives to fun
each data.frame in
cell_vars
as a whole. So cro_fun(iris[, -5], iris$Species, fun =
mean)
gives the same result as cro_fun_df(iris[, -5], iris$Species,
fun = colMeans)
. For cro_fun_df
names of cell_vars
will
converted to labels if they are available before the fun
will be applied.
Generally it is recommended that fun
will always return object of the
same form. Row names/vector names of fun
result will appear in the row
labels of the table and column names/names of list will appear in the column
labels. If your fun
returns data.frame/matrix/list with element named
'row_labels' then this element will be used as row labels. And it will have
precedence over rownames.
calc_cro_*
are the same as above but evaluate their arguments
in the context of the first argument data
.
combine_functions
is auxiliary function for combining several
functions into one function for usage with cro_fun
/cro_fun_df
.
Names of arguments will be used as statistic labels. By default, results of
each function are combined with c. But you can provide your own method
function with method
argument. It will be applied as in the expression
do.call(method, list_of_functions_results)
. Particular useful method
is list
. When it used then statistic labels will appear in the column
labels. See examples. Also you may be interested in data.frame
,
rbind
, cbind
methods.
cro_fun(
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL,
fun,
...,
unsafe = FALSE
)cro_fun_df(
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL,
fun,
...,
unsafe = FALSE
)
cro_mean(
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
cro_mean_sd_n(
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL,
weighted_valid_n = FALSE,
labels = NULL
)
cro_sum(
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
cro_median(
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
cro_pearson(
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
cro_spearman(
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
calc_cro_fun(
data,
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL,
fun,
...,
unsafe = FALSE
)
calc_cro_fun_df(
data,
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL,
fun,
...,
unsafe = FALSE
)
calc_cro_mean(
data,
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
calc_cro_mean_sd_n(
data,
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL,
weighted_valid_n = FALSE,
labels = NULL
)
calc_cro_sum(
data,
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
calc_cro_median(
data,
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
calc_cro_pearson(
data,
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
calc_cro_spearman(
data,
cell_vars,
col_vars = total(),
row_vars = total(label = ""),
weight = NULL,
subgroup = NULL
)
combine_functions(..., method = c)
vector/data.frame/list. Variables on which summary function will be computed.
numeric vector. Optional cases weights. Cases with NA's, negative and zero weights are removed before calculations.
logical vector. You can specify subgroup on which table will be computed.
custom summary function. Generally it is recommended that
fun
will always return object of the same form. Rownames/vector
names of fun
result will appear in the row labels of the table and
column names/names of list will appear in the column labels. To use weight
you should have formal weight
argument in fun
and some logic
for its processing inside. For cro_fun_df
fun
will receive
data.table with all names converted to variable labels
(if labels exists). So it is not recommended to rely on original variables
names in your fun
.
further arguments for fun
in
cro_fun
/cro_fun_df
or functions for combine_functions
.
Ignored in cro_fun
/cro_fun_df
if unsafe
is TRUE.
logical/character If not FALSE than fun
will be
evaluated as is. It can lead to significant increase in the performance.
But there are some limitations. For cro_fun
it means that your
function fun
should return vector. If length of this vector is
greater than one than you should provide with unsafe
argument vector
of unique labels for each element of this vector. There will be no attempts
to automatically make labels for the results of fun
. For
cro_fun_df
your function should return vector or list/data.frame
(optionally with 'row_labels' element - statistic labels). If unsafe
is TRUE or not logical then further arguments (...
) for fun
will be ignored.
logical. Should we show weighted valid N in
cro_mean_sd_n
? By default it is FALSE.
character vector of length 3. Labels for mean, standard
deviation and valid N in cro_mean_sd_n
.
data.frame in which context all other arguments will be evaluated
(for calc_cro_*
).
function which will combine results of multiple functions in
combine_functions
. It will be applied as in the expression
do.call(method, list_of_functions_results)
. By default it is
c
.
object of class 'etable'. Basically it's a data.frame but class is needed for custom methods.
# NOT RUN {
data(mtcars)
mtcars = apply_labels(mtcars,
mpg = "Miles/(US) gallon",
cyl = "Number of cylinders",
disp = "Displacement (cu.in.)",
hp = "Gross horsepower",
drat = "Rear axle ratio",
wt = "Weight (1000 lbs)",
qsec = "1/4 mile time",
vs = "Engine",
vs = c("V-engine" = 0,
"Straight engine" = 1),
am = "Transmission",
am = c("Automatic" = 0,
"Manual"=1),
gear = "Number of forward gears",
carb = "Number of carburetors"
)
# Simple example - there is special shortcut for it - 'cro_mean'
calculate(mtcars, cro_fun(list(mpg, disp, hp, wt, qsec),
col_vars = list(total(), am),
row_vars = vs,
fun = mean)
)
# the same result
calc_cro_fun(mtcars, list(mpg, disp, hp, wt, qsec),
col_vars = list(total(), am),
row_vars = vs,
fun = mean
)
# The same example with 'subgroup'
calculate(mtcars, cro_fun(list(mpg, disp, hp, wt, qsec),
col_vars = list(total(), am),
row_vars = vs,
subgroup = vs == 0,
fun = mean)
)
# 'combine_functions' usage
calculate(mtcars, cro_fun(list(mpg, disp, hp, wt, qsec),
col_vars = list(total(), am),
row_vars = vs,
fun = combine_functions(Mean = mean,
'Std. dev.' = sd,
'Valid N' = valid_n)
))
# 'combine_functions' usage - statistic labels in columns
calculate(mtcars, cro_fun(list(mpg, disp, hp, wt, qsec),
col_vars = list(total(), am),
row_vars = vs,
fun = combine_functions(Mean = mean,
'Std. dev.' = sd,
'Valid N' = valid_n,
method = list
)
))
# 'summary' function
calculate(mtcars, cro_fun(list(mpg, disp, hp, wt, qsec),
col_vars = list(total(), am),
row_vars = list(total(), vs),
fun = summary
))
# comparison 'cro_fun' and 'cro_fun_df'
calculate(mtcars, cro_fun(
sheet(mpg, disp, hp, wt, qsec),
col_vars = am,
fun = mean
)
)
# same result
calculate(mtcars, cro_fun_df(
sheet(mpg, disp, hp, wt, qsec),
col_vars = am,
fun = colMeans
)
)
# usage for 'cro_fun_df' which is not possible for 'cro_fun'
# linear regression by groups
calculate(mtcars, cro_fun_df(
sheet(mpg, disp, hp, wt, qsec),
col_vars = am,
fun = function(x){
frm = reformulate(".", response = names(x)[1])
model = lm(frm, data = x)
sheet(
'Coef. estimate' = coef(model),
confint(model)
)
}
))
# }
Run the code above in your browser using DataLab