# \donttest{
# Example 1: Basic multinomial JEDI-MWoE optimization
set.seed(42)
n_obs <- 1500
# Simulate customer segments with 3 risk categories
segments <- c("Premium", "Standard", "Basic", "Economy")
# Class probabilities: 0=LowRisk, 1=MediumRisk, 2=HighRisk
risk_probs <- list(
Premium = c(0.80, 0.15, 0.05), # Mostly LowRisk
Standard = c(0.40, 0.40, 0.20), # Balanced
Basic = c(0.15, 0.35, 0.50), # Mostly HighRisk
Economy = c(0.05, 0.20, 0.75) # Almost all HighRisk
)
cat_feature <- sample(segments, n_obs,
replace = TRUE,
prob = c(0.25, 0.35, 0.25, 0.15)
)
# Generate multinomial target (classes 0, 1, 2)
multinom_target <- sapply(cat_feature, function(segment) {
probs <- risk_probs[[segment]]
sample(0:2, 1, prob = probs)
})
# Apply JEDI-MWoE algorithm
result_mwoe <- ob_categorical_jedi_mwoe(
cat_feature,
multinom_target,
min_bins = 2,
max_bins = 3
)
# Display results
cat("Number of classes:", result_mwoe$n_classes, "\n")
cat("Number of bins:", length(result_mwoe$bin), "\n")
cat("Converged:", result_mwoe$converged, "\n")
cat("Iterations:", result_mwoe$iterations, "\n\n")
# Show bin details
for (i in seq_along(result_mwoe$bin)) {
cat(sprintf("Bin %d (%s):\n", i, result_mwoe$bin[i]))
cat(" Total count:", result_mwoe$count[i], "\n")
cat(" Class counts:", result_mwoe$class_counts[i, ], "\n")
cat(" Class rates:", round(result_mwoe$class_rates[i, ], 3), "\n")
# Show WoE and IV for each class
for (class in 0:(result_mwoe$n_classes - 1)) {
cat(sprintf(
" Class %d: WoE=%.3f, IV=%.4f\n",
class,
result_mwoe$woe[i, class + 1], # R is 1-indexed
result_mwoe$iv[i, class + 1]
))
}
cat("\n")
}
# Show total IV per class
cat("Total IV per class:\n")
for (class in 0:(result_mwoe$n_classes - 1)) {
cat(sprintf(" Class %d: %.4f\n", class, result_mwoe$total_iv[class + 1]))
}
# Example 2: High-cardinality multinomial problem
set.seed(123)
n_obs_hc <- 2000
# Simulate product categories with 4 classes
products <- paste0("Product_", LETTERS[1:15])
cat_feature_hc <- sample(products, n_obs_hc, replace = TRUE)
# Generate 4-class target
multinom_target_hc <- sample(0:3, n_obs_hc,
replace = TRUE,
prob = c(0.3, 0.25, 0.25, 0.2)
)
result_mwoe_hc <- ob_categorical_jedi_mwoe(
cat_feature_hc,
multinom_target_hc,
min_bins = 3,
max_bins = 6,
max_n_prebins = 15,
bin_cutoff = 0.03
)
cat("\nHigh-cardinality example:\n")
cat("Original categories:", length(unique(cat_feature_hc)), "\n")
cat("Final bins:", length(result_mwoe_hc$bin), "\n")
cat("Classes:", result_mwoe_hc$n_classes, "\n")
cat("Converged:", result_mwoe_hc$converged, "\n\n")
# Show merged categories
for (i in seq_along(result_mwoe_hc$bin)) {
n_merged <- length(strsplit(result_mwoe_hc$bin[i], "%;%")[[1]])
if (n_merged > 1) {
cat(sprintf("Bin %d: %d categories merged\n", i, n_merged))
}
}
# Example 3: Laplace smoothing demonstration
set.seed(789)
n_obs_smooth <- 500
# Small sample with sparse categories
categories <- c("A", "B", "C", "D", "E")
cat_feature_smooth <- sample(categories, n_obs_smooth,
replace = TRUE,
prob = c(0.3, 0.25, 0.2, 0.15, 0.1)
)
# Generate 3-class target with class imbalance
multinom_target_smooth <- sample(0:2, n_obs_smooth,
replace = TRUE,
prob = c(0.6, 0.3, 0.1)
) # Class 0 dominant
result_mwoe_smooth <- ob_categorical_jedi_mwoe(
cat_feature_smooth,
multinom_target_smooth,
min_bins = 2,
max_bins = 4,
bin_cutoff = 0.02
)
cat("\nLaplace smoothing demonstration:\n")
cat("Sample size:", n_obs_smooth, "\n")
cat("Classes:", result_mwoe_smooth$n_classes, "\n")
cat("Event distribution:", table(multinom_target_smooth), "\n\n")
# Show how smoothing prevents extreme values
for (i in seq_along(result_mwoe_smooth$bin)) {
cat(sprintf("Bin %d (%s):\n", i, result_mwoe_smooth$bin[i]))
cat(" Counts per class:", result_mwoe_smooth$class_counts[i, ], "\n")
cat(" WoE values:", round(result_mwoe_smooth$woe[i, ], 3), "\n")
cat(" Note: Extreme WoE values prevented by Laplace smoothing\n\n")
}
# Example 4: Class-wise monotonicity
set.seed(456)
n_obs_mono <- 1200
# Feature with predictable class patterns
education <- c("PhD", "Master", "Bachelor", "College", "HighSchool")
# Each education level has a preferred class
preferred_classes <- c(2, 1, 0, 1, 2) # PhD→High(2), Bachelor→Low(0), etc.
cat_feature_mono <- sample(education, n_obs_mono, replace = TRUE)
# Generate target with preferred class bias
multinom_target_mono <- sapply(cat_feature_mono, function(edu) {
pref_class <- preferred_classes[which(education == edu)]
# Create probability vector with preference
probs <- rep(0.1, 3) # Base probability
probs[pref_class + 1] <- 0.8 # Preferred class gets high probability
sample(0:2, 1, prob = probs / sum(probs))
})
result_mwoe_mono <- ob_categorical_jedi_mwoe(
cat_feature_mono,
multinom_target_mono,
min_bins = 3,
max_bins = 5
)
cat("Class-wise monotonicity example:\n")
cat("Education levels:", length(education), "\n")
cat("Final bins:", length(result_mwoe_mono$bin), "\n")
cat("Iterations:", result_mwoe_mono$iterations, "\n\n")
# Check monotonicity for each class
for (class in 0:(result_mwoe_mono$n_classes - 1)) {
woe_series <- result_mwoe_mono$woe[, class + 1]
diffs <- diff(woe_series)
is_mono <- all(diffs >= -1e-6) || all(diffs <= 1e-6)
cat(sprintf("Class %d WoE monotonic: %s\n", class, is_mono))
cat(sprintf(" WoE series: %s\n", paste(round(woe_series, 3), collapse = ", ")))
}
# Example 5: Missing value handling
set.seed(321)
cat_feature_na <- cat_feature
na_indices <- sample(n_obs, 75) # 5% missing
cat_feature_na[na_indices] <- NA
result_mwoe_na <- ob_categorical_jedi_mwoe(
cat_feature_na,
multinom_target,
min_bins = 2,
max_bins = 3
)
# Locate missing value bin
missing_bin_idx <- grep("N/A", result_mwoe_na$bin)
if (length(missing_bin_idx) > 0) {
cat("\nMissing value handling:\n")
cat("Missing value bin:", result_mwoe_na$bin[missing_bin_idx], "\n")
cat("Missing value count:", result_mwoe_na$count[missing_bin_idx], "\n")
cat(
"Class distribution in missing bin:",
result_mwoe_na$class_counts[missing_bin_idx, ], "\n"
)
# Show class rates for missing bin
for (class in 0:(result_mwoe_na$n_classes - 1)) {
cat(sprintf(
" Class %d rate: %.3f\n", class,
result_mwoe_na$class_rates[missing_bin_idx, class + 1]
))
}
}
# Example 6: Convergence behavior
set.seed(555)
n_obs_conv <- 1000
departments <- c("Sales", "IT", "HR", "Finance", "Operations")
cat_feature_conv <- sample(departments, n_obs_conv, replace = TRUE)
multinom_target_conv <- sample(0:2, n_obs_conv, replace = TRUE)
# Test different convergence thresholds
thresholds <- c(1e-3, 1e-6, 1e-9)
for (thresh in thresholds) {
result_conv <- ob_categorical_jedi_mwoe(
cat_feature_conv,
multinom_target_conv,
min_bins = 2,
max_bins = 4,
convergence_threshold = thresh,
max_iterations = 100
)
cat(sprintf("\nThreshold %.0e:\n", thresh))
cat(" Final bins:", length(result_conv$bin), "\n")
cat(" Converged:", result_conv$converged, "\n")
cat(" Iterations:", result_conv$iterations, "\n")
# Show total IV for each class
cat(" Total IV per class:")
for (class in 0:(result_conv$n_classes - 1)) {
cat(sprintf(" %.4f", result_conv$total_iv[class + 1]))
}
cat("\n")
}
# }
Run the code above in your browser using DataLab