# Create a small sample with real duplicates
df <- data.frame(
id = c(1, 1, 2, 2, 3, 4, 4),
value = c(10, 10, NA, 20, 5, 3, 3),
tag = c("A", "A", NA, "B", "C", "X", NA),
stringsAsFactors = FALSE
)
# Run pickmax with default diff_cutoff (50%)
res <- pickmax(df, key_col = "id", diff_cutoff = 0.5)
# Show the duplicates flagged
print(res$duplicates_df)
# Show records that got split per diff_cutoff
print(res$split_df)
# Show final cleaned dataset
print(res$coalesced_df)
Run the code above in your browser using DataLab