library(palmerpenguins)
library(rsample)
penguins_cont <- palmerpenguins::penguins[, c("bill_length_mm",
"bill_depth_mm", "flipper_length_mm", "body_mass_g")]
penguins_cont <- penguins_cont[complete.cases(penguins_cont), ]
# Basic usage
dist_matrix <- ndist(penguins_cont)
# Commensurable distances with standardization
dist_matrix <- ndist(penguins_cont,
commensurable = TRUE,
scaling = "std")
# PCA-based dimensionality reduction
dist_matrix <- ndist(penguins_cont,
scaling = "pc_scores",
threshold = 0.95)
# Mahalanobis distance
dist_matrix <- ndist(penguins_cont,
method = "mahalanobis")
# Weighted Euclidean distance
dist_matrix <- ndist(penguins_cont,
method = "euclidean",
weights = c(1, 0.5, 2, 1))
# Training-test split example with validation data
set.seed(123)
# Create training-test split using rsample
penguins_split <- initial_split(penguins_cont, prop = 0.8)
tr_penguins <- training(penguins_split)
ts_penguins <- testing(penguins_split)
# Basic usage with training data only
dist_matrix <- ndist(tr_penguins)
# Computing distances between test and training sets
val_dist_matrix <- ndist(x = tr_penguins,
validate_x = ts_penguins,
method = "euclidean")
# Using validation data with standardization
val_dist_matrix_std <- ndist(x = tr_penguins,
validate_x = ts_penguins,
scaling = "std",
method = "manhattan")
# Validation with PCA and commensurability
val_dist_matrix_pca <- ndist(x = tr_penguins,
validate_x = ts_penguins,
scaling = "pc_scores",
ncomp = 2,
commensurable = TRUE)
# Validation with robust scaling and custom weights
val_dist_matrix_robust <- ndist(x = tr_penguins,
validate_x = ts_penguins,
scaling = "robust",
weights = c(1, 0.5, 2, 1))
# Mahalanobis distance with validation data
val_dist_matrix_mahal <- ndist(x = tr_penguins,
validate_x = ts_penguins,
method = "mahalanobis")
Run the code above in your browser using DataLab