if (requireNamespace("babynames", quietly = TRUE)) {
baby_names <- data.frame(
name = tolower(unique(babynames::babynames$name))[1:500]
)
baby_names_mispelled <- data.frame(
name_mispelled = gsub("[aeiouy]", "x", baby_names$name)
)
hamming_inner_join(
baby_names,
baby_names_mispelled,
by = c("name" = "name_mispelled"),
threshold = 3,
n_bands = 150,
band_width = 10,
clean = FALSE
)
hamming_left_join(
baby_names,
baby_names_mispelled,
by = c("name" = "name_mispelled"),
threshold = 3,
n_bands = 150,
band_width = 10
)
}
Run the code above in your browser using DataLab