# See README for a more verbose version
library(tibble)
male_penguins <- tribble(
~name, ~species, ~island, ~flipper_length_mm, ~body_mass_g,
"Giordan", "Gentoo", "Biscoe", 222L, 5250L,
"Lynden", "Adelie", "Torgersen", 190L, 3900L,
"Reiner", "Adelie", "Dream", 185L, 3650L
)
female_penguins <- tribble(
~name, ~species, ~island, ~flipper_length_mm, ~body_mass_g,
"Alonda", "Gentoo", "Biscoe", 211, 4500L,
"Ola", "Adelie", "Dream", 190, 3600L,
"Mishayla", "Gentoo", "Biscoe", 215, 4750L,
)
# apply different checks
power_inner_join(
male_penguins[c("species", "island")],
female_penguins[c("species", "island")],
check = check_specs(implicit_keys = "ignore", duplicate_keys_right = "inform")
)
df1 <- tibble(id = 1:3, value = c(10, NA, 30))
df2 <- tibble(id = 2:4, value = c(22, 32, 42))
# handle conflicted columns when joining
power_left_join(df1, df2, by = "id", conflict = `+`)
# the most frequent use case is to coalesce
power_left_join(df1, df2, by = "id", conflict = coalesce_xy)
power_left_join(df1, df2, by = "id", conflict = coalesce_yx)
# the conflict function is applied colwise by default!
power_left_join(df1, df2, by = "id", conflict = ~ sum(.x, .y, na.rm = TRUE))
# apply conflict function rowwise
power_left_join(df1, df2, by = "id", conflict = rw ~ sum(.x, .y, na.rm = TRUE))
# subset columns without repeating keys
power_inner_join(
male_penguins %>% select_keys_and(name),
female_penguins %>% select_keys_and(female_name = name),
by = c("species", "island")
)
# semi join
power_inner_join(
male_penguins,
female_penguins %>% select_keys_and(),
by = c("species", "island")
)
# agregate without repeating keys
power_left_join(
male_penguins %>% summarize_by_keys(male_weight = mean(body_mass_g)),
female_penguins %>% summarize_by_keys(female_weight = mean(body_mass_g)),
by = c("species", "island")
)
# pack auxiliary colums without repeating keys
power_left_join(
male_penguins %>% pack_along_keys(name = "m"),
female_penguins %>% pack_along_keys(name = "f"),
by = c("species", "island")
)
# fuzzy join
power_inner_join(
male_penguins %>% select_keys_and(male_name = name),
female_penguins %>% select_keys_and(female_name = name),
by = c(~.x$flipper_length_mm < .y$flipper_length_mm, ~.x$body_mass_g > .y$body_mass_g)
)
# fuzzy + equi join
power_inner_join(
male_penguins %>% select_keys_and(male_name = name),
female_penguins %>% select_keys_and(female_name = name),
by = c("island", ~.x$flipper_length_mm > .y$flipper_length_mm)
)
# define new column without repeating computation
power_inner_join(
male_penguins %>% select_keys_and(male_name = name),
female_penguins %>% select_keys_and(female_name = name),
by = ~ (mass_ratio <- .y$body_mass_g / .x$body_mass_g) > 1.2
)
power_inner_join(
male_penguins %>% select_keys_and(male_name = name),
female_penguins %>% select_keys_and(female_name = name),
by = ~ (mass_ratio <- .y$body_mass_g / .x$body_mass_g) > 1.2,
keep = "none"
)
# fill unmatched values
df1 <- tibble(id = 1:3)
df2 <- tibble(id = 1:2, value2 = c(2, NA), value3 = c(NA, 3))
power_left_join(df1, df2, by = "id", fill = 0)
power_left_join(df1, df2, by = "id", fill = list(value2 = 0))
# join recursively
df1 <- tibble(id = 1, a = "foo")
df2 <- tibble(id = 1, b = "bar")
df3 <- tibble(id = 1, c = "baz")
power_left_join(list(df1, df2, df3), by = "id")
power_left_join(df1, list(df2, df3), by = "id")
Run the code above in your browser using DataLab