library(palmerpenguins)
library(rsample)
# Prepare complete data
pengmix <- palmerpenguins::penguins[complete.cases(palmerpenguins::penguins), ]
# Create training-test split
set.seed(123)
pengmix_split <- initial_split(pengmix, prop = 0.8)
tr_pengmix <- training(pengmix_split)
ts_pengmix <- testing(pengmix_split)
# Example 1: Basic usage with validation data
dist_matrix <- mdist(x = tr_pengmix,
validate_x = ts_pengmix)
# Example 2: Gower preset with validation
dist_gower <- mdist(x = tr_pengmix,
validate_x = ts_pengmix,
preset = "gower",
commensurable = TRUE)
# Example 3: Euclidean one-hot preset with validation
dist_onehot <- mdist(x = tr_pengmix,
validate_x = ts_pengmix,
preset = "euclidean_onehot")
# Example 4: Custom preset with standardization
dist_custom <- mdist(x = tr_pengmix,
validate_x = ts_pengmix,
preset = "custom",
distance_cont = "manhattan",
distance_cat = "matching",
commensurable = TRUE,
scaling = "std")
# Example 5: PCA-based scaling with threshold
dist_pca <- mdist(x = tr_pengmix,
validate_x = ts_pengmix,
distance_cont = "euclidean",
scaling = "pc_scores",
threshold = 0.85)
# Example 6: Categorical variables only
cat_vars <- c("species", "island", "sex")
dist_cat <- mdist(tr_pengmix[, cat_vars],
validate_x = ts_pengmix[, cat_vars],
distance_cat = "tot_var_dist")
# Example 7: Continuous variables only
num_vars <- c("bill_length_mm", "bill_depth_mm",
"flipper_length_mm", "body_mass_g")
dist_cont <- mdist(tr_pengmix[, num_vars],
validate_x = ts_pengmix[, num_vars],
distance_cont = "manhattan",
scaling = "std")
# Example 8: Supervised distance with response
response_tr <- tr_pengmix$body_mass_g
dist_sup <- mdist(tr_pengmix,
validate_x = ts_pengmix,
response = response_tr,
distance_cat = "supervised")
Run the code above in your browser using DataLab