# NOT RUN {
# a node generator is something an expert can
# write and part-time R users can use.
grouped_regression_node <- function(., group_col = "group", xvar = "x", yvar = "y") {
force(group_col)
formula_str <- paste(yvar, "~", xvar)
f <- function(df, nd = NULL) {
dlist <- split(df, df[[group_col]])
clist <- lapply(dlist,
function(di) {
mi <- lm(as.formula(formula_str), data = di)
ci <- as.data.frame(summary(mi)$coefficients)
ci$Variable <- rownames(ci)
rownames(ci) <- NULL
ci[[group_col]] <- di[[group_col]][[1]]
ci
})
data.table::rbindlist(clist)
}
columns_produced =
c("Variable", "Estimate", "Std. Error", "t value", "Pr(>|t|)", group_col)
rq_df_funciton_node(
., f,
columns_produced = columns_produced,
display_form = paste0(yvar, "~", xvar, " grouped by ", group_col))
}
# work an example
set.seed(3265)
d <- data.frame(x = rnorm(1000),
y = rnorm(1000),
group = sample(letters[1:5], 1000, replace = TRUE),
stringsAsFactors = FALSE)
rquery_pipeline <- local_td(d) %.>%
grouped_regression_node(.)
cat(format(rquery_pipeline))
d %.>% rquery_pipeline
# }
Run the code above in your browser using DataLab