DeclareDesign (version 0.12.0)

declare_estimator: Declare estimator

Description

Declares an estimator which generates estimates and associated statistics.

Usage

declare_estimator(..., handler = estimator_handler,
  label = "estimator")

declare_estimators(..., handler = estimator_handler, label = "estimator")

tidy_estimator(estimator_function)

model_handler(data, ..., model = estimatr::difference_in_means, term = FALSE)

estimator_handler(data, ..., model = estimatr::difference_in_means, term = FALSE, estimand = NULL, label)

Arguments

...

arguments to be captured, and later passed to the handler

handler

a tidy-in, tidy-out function

label

a string describing the step

estimator_function

A function that takes a data.frame as an argument and returns a data.frame with the estimates, summary statistics (i.e., standard error, p-value, and confidence interval) and a label.

data

a data.frame

model

A model function, e.g. lm or glm. By default, the model is the difference_in_means function from the estimatr package.

term

Symbols or literal character vector of term that represent quantities of interest, i.e. Z. If FALSE, return the first non-intercept term; if TRUE return all term. To escape non-standard-evaluation use !!.

estimand

a declare_estimand step object, or a character label, or a list of either

Value

A function that accepts a data.frame as an argument and returns a data.frame containing the value of the estimator and associated statistics.

Custom Estimators

estimator_functions implementations should be tidy (accept and return a data.frame)

model implementations should at a minimum provide S3 methods for summary and confint.

Details

tidy_estimator takes an untidy estimation function, and returns a tidy handler which accepts standard labeling options.

The intent here is to factor out the estimator/estimand labeling so that it can be reused by other model handlers.

Examples

Run this code
# NOT RUN {
# Declare estimand
my_estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0))

# Declare estimator using the default handler using `difference_in_means`
# estimator from `estimatr` package. Returns the first non-intercept term
# as estimate

my_estimator_dim <- declare_estimator(Y ~ Z, estimand = "ATE", label = "DIM")

# Use lm function from base R
my_estimator_lm <- declare_estimator(Y ~ Z, estimand = "ATE",
  model = lm, label = "LM")
# Use lm_robust (linear regression with robust standard errors) from
# `estimatr` package

my_estimator_lm_rob <- declare_estimator(
  Y ~ Z,
  estimand = "ATE",
  model = lm_robust,
  label = "LM_Robust"
)

# Set `term` if estimate of interest is not the first non-intercept variable
my_estimator_lm_rob_x <- declare_estimator(
  Y ~ X + Z,
  estimand = my_estimand,
  term = "Z",
  model = lm_robust
)

# Use glm from base R
my_estimator_glm <- declare_estimator(
  Y ~ X + Z,
  family = "gaussian",
  estimand = my_estimand,
  term = "Z",
  model = glm
)

# A probit
estimator_probit <- declare_estimator(
  Y ~ Z,
  model = glm,
  family = binomial(link = "probit"),
  term = "Z"
)

# Declare estimator using a custom handler

# Define your own estimator and use the `tidy_estimator` function for labeling
# Must have `data` argument that is a data.frame
my_estimator_function <- function(data){
  data.frame(estimate = with(data, mean(Y)))
}

my_estimator_custom <- declare_estimator(
  handler = tidy_estimator(my_estimator_function),
  estimand = my_estimand
)

# Customize labeling

my_estimator_function <- function(data){
  data.frame(
    estimator_label = "foo",
    estimand_label = "bar",
    estimate = with(data, mean(Y)),
    n = nrow(data),
    stringsAsFactors = FALSE
  )
}

my_estimator_custom2 <- declare_estimator(handler = my_estimator_function)


# Examples

# First, set up the rest of a design
set.seed(42)

design_def <-
  declare_population(N = 100, X = rnorm(N), W = rexp(N, 1), noise = rnorm(N)) +
  declare_potential_outcomes(Y ~ .25 * Z + noise) +
  declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_assignment(m = 50) +
  declare_reveal() +
  my_estimator_dim

draw_estimates(design_def)

# Can also use declared estimator on a data.frame
dat <- draw_data(design_def)
my_estimator_dim(dat)

# ----------
# 2. Using existing estimators
# ----------

design <- replace_step(design_def, my_estimator_dim, my_estimator_lm_rob)
draw_estimates(design)

design <- replace_step(design_def, my_estimator_dim, my_estimator_lm)
draw_estimates(design)

design <- replace_step(design_def, my_estimator_dim, my_estimator_glm)
draw_estimates(design)

# ----------
# 3. Using custom estimators
# ----------

design <- replace_step(design_def, my_estimator_dim, my_estimator_custom)

draw_estimates(design)

# The names in your custom estimator return should match with
# your diagnosands when diagnosing a design
my_median <- function(data) data.frame(med = median(data$Y))

my_estimator_median <- declare_estimator(
  handler = tidy_estimator(my_median),
  estimand = my_estimand
)

design <- replace_step(design_def, my_estimator_dim, my_estimator_median)

draw_estimates(design)

my_diagnosand <- declare_diagnosands(med_to_estimand = mean(med - estimand),
  keep_defaults = FALSE)

# }
# NOT RUN {
diagnose_design(design, diagnosands = my_diagnosand, sims = 5,
  bootstrap_sims = FALSE)
# }
# NOT RUN {
# ----------
# 4. Multiple estimators per estimand
# ----------

design_two <- insert_step(design_def,  my_estimator_lm,
  after = my_estimator_dim)

draw_estimates(design_two)

# }
# NOT RUN {
diagnose_design(design_two, sims = 5, bootstrap_sims = FALSE)
# }

Run the code above in your browser using DataLab