library(palmerpenguins)
library(rsample)
# Prepare data with complete cases for both categorical variables and response
complete_vars <- c("species", "island", "sex", "body_mass_g")
penguins_complete <- penguins[complete.cases(penguins[, complete_vars]), ]
penguins_cat <- penguins_complete[, c("species", "island", "sex")]
response <- penguins_complete$body_mass_g
# Create training-test split
set.seed(123)
penguins_split <- initial_split(penguins_cat, prop = 0.8)
tr_penguins <- training(penguins_split)
ts_penguins <- testing(penguins_split)
response_tr <- response[penguins_split$in_id]
response_ts <- response[-penguins_split$in_id]
# Basic usage
result <- cdist(tr_penguins)
# With validation data
val_result <- cdist(x = tr_penguins,
validate_x = ts_penguins,
method = "tot_var_dist")
# ...and commensurability
val_result_COMM <- cdist(x = tr_penguins,
validate_x = ts_penguins,
method = "tot_var_dist",
commensurable = TRUE)
# Supervised distance with response variable
sup_result <- cdist(x = tr_penguins,
response = response_tr,
method = "supervised")
# Supervised with validation data
sup_val_result <- cdist(x = tr_penguins,
validate_x = ts_penguins,
response = response_tr,
method = "supervised")
# Commensurable distances with custom weights
comm_result <- cdist(tr_penguins,
commensurable = TRUE,
weights = c(2, 1, 1))
# Different methods per variable
multi_method <- cdist(tr_penguins,
method = c("matching", "goodall_3", "tot_var_dist"))
Run the code above in your browser using DataLab