if (requireNamespace("xgboost", quietly = TRUE)) {
df_1 <- data.frame(
"name" = c("James", "Emma", "William", "Olivia", "Thomas",
"Sophie", "Harry", "Amelia", "George", "Isabella"),
"surname" = c("Smith", "Johnson", "Brown", "Taylor", "Wilson",
"Davis", "Clark", "Harris", "Lewis", "Walker")
)
df_2 <- data.frame(
"name" = c("James", "Ema", "Wimliam", "Olivia", "Charlotte",
"Henry", "Lucy", "Edward", "Alice", "Jack"),
"surname" = c("Smith", "Johnson", "Bron", "Tailor", "Moore",
"Evans", "Hall", "Wright", "Green", "King")
)
comparators <- list("name" = jarowinkler_complement(),
"surname" = jarowinkler_complement())
matches <- data.frame("a" = 1:4, "b" = 1:4)
vectors <- comparison_vectors(A = df_1, B = df_2, variables = c("name", "surname"),
comparators = comparators, matches = matches)
model_xgb <- xgboost::xgboost(x = as.matrix(vectors$Omega[, c("gamma_name", "gamma_surname")]),
y = factor(vectors$Omega$match),
objective = "binary:logistic", eval_metric = "logloss",
nrounds = 100, verbosity = 0)
custom_xgb_model <- custom_rec_lin_model(model_xgb, vectors)
custom_xgb_model
}
Run the code above in your browser using DataLab