# toy data
sample_size <- 30
df <- data.frame(
clnt_id = rep(1:3, each = 10),
service_dt = sample(seq(as.Date("2020-01-01"), as.Date("2020-01-31"), by = 1),
size = sample_size, replace = TRUE
),
diagx = sample(letters, size = sample_size, replace = TRUE),
diagx_1 = sample(c(NA, letters), size = sample_size, replace = TRUE),
diagx_2 = sample(c(NA, letters), size = sample_size, replace = TRUE)
)
# make df a database table
db <- dbplyr::tbl_memdb(df)
# use build_def to make a toy definition
sud_def <- build_def("SUD", # usually a disease name
src_lab = c("src1", "src2"), # identify from multiple sources, e.g., hospitalization, ED visits.
# functions that filter the data with some criteria
def_fn = define_case,
fn_args = list(
vars = starts_with("diagx"),
match = "start", # "start" will be applied to all sources as length = 1
vals = list(c("304"), c("305")),
clnt_id = "clnt_id", # list()/c() could be omitted for single element
# c() can be used in place of list
# if this argument only takes one value for each source
n_per_clnt = c(2, 3)
)
)
# save the definition for re-use
# saveRDS(sud_def, file = some_path)
sud_def %>% execute_def(with_data = list(src1 = df, src2 = db), force_proceed = TRUE)
Run the code above in your browser using DataLab