# make toy data
size <- 30
n <- 10
df1 <- data.frame(
id = sample(1:n, size = size, replace = TRUE),
service_dt = sample(seq(as.Date("2020-01-01"), as.Date("2022-01-31"), by = 1),
size = size
)
) %>%
dplyr::mutate(year = lubridate::year(service_dt))
df2 <- data.frame(
id = rep(1:n, size / n), year = rep(2020:2022, each = n),
status_1 = sample(0:1, size = size, replace = TRUE),
status_2 = sample(0:1, size = size, replace = TRUE)
)
df3 <- data.frame(id = 1:n, sex = sample(c("F", "M"), size = n, replace = TRUE))
# simple joins
# note that for left_join(df1, df2), boths keys have to be used,
# otherwise, error as the relation would not be one-to-one
fetch_var(df1,
keys = c(id, year),
linkage = list(
df2 ~ starts_with("s"), # match both keys without '|'
df3 ~ sex | id
) # match by id only; otherwise failed because df3 has no year
)
# example if some y is remote
# make df2 as database table
db2 <- dbplyr::tbl_memdb(df2)
fetch_var(df1,
keys = c(id, year),
linkage = list(
db2 ~ starts_with("s"),
df3 ~ sex | id
),
copy = TRUE # pass to left_join for forced collection of remote table
)
Run the code above in your browser using DataLab