# \donttest{
# Example 1: Basic usage with monotonic WoE enforcement
set.seed(123)
n_obs <- 1000
# Simulate education levels with increasing default risk
education <- c("High School", "Associate", "Bachelor", "Master", "PhD")
default_probs <- c(0.20, 0.15, 0.10, 0.06, 0.03)
cat_feature <- sample(education, n_obs,
replace = TRUE,
prob = c(0.30, 0.25, 0.25, 0.15, 0.05)
)
bin_target <- sapply(cat_feature, function(x) {
rbinom(1, 1, default_probs[which(education == x)])
})
# Apply DP binning with ascending monotonicity
result_dp <- ob_categorical_dp(
cat_feature,
bin_target,
min_bins = 2,
max_bins = 4,
monotonic_trend = "ascending"
)
# Display results
print(data.frame(
Bin = result_dp$bin,
WoE = round(result_dp$woe, 3),
IV = round(result_dp$iv, 4),
Count = result_dp$count,
EventRate = round(result_dp$event_rate, 3)
))
cat("Total IV:", round(result_dp$total_iv, 4), "\n")
cat("Converged:", result_dp$converged, "\n")
# Example 2: Comparing monotonicity constraints
result_dp_asc <- ob_categorical_dp(
cat_feature, bin_target,
max_bins = 3,
monotonic_trend = "ascending"
)
result_dp_none <- ob_categorical_dp(
cat_feature, bin_target,
max_bins = 3,
monotonic_trend = "none"
)
cat("\nWith monotonicity:\n")
cat(" Bins:", length(result_dp_asc$bin), "\n")
cat(" Total IV:", round(result_dp_asc$total_iv, 4), "\n")
cat("\nWithout monotonicity:\n")
cat(" Bins:", length(result_dp_none$bin), "\n")
cat(" Total IV:", round(result_dp_none$total_iv, 4), "\n")
# Example 3: High cardinality with pre-binning
set.seed(456)
n_obs_large <- 5000
# Simulate customer segments (high cardinality)
segments <- paste0("Segment_", LETTERS[1:20])
segment_probs <- runif(20, 0.01, 0.20)
cat_feature_hc <- sample(segments, n_obs_large, replace = TRUE)
bin_target_hc <- rbinom(
n_obs_large, 1,
segment_probs[match(cat_feature_hc, segments)]
)
result_dp_hc <- ob_categorical_dp(
cat_feature_hc,
bin_target_hc,
min_bins = 3,
max_bins = 5,
bin_cutoff = 0.03,
max_n_prebins = 10
)
cat("\nHigh cardinality example:\n")
cat(" Original categories:", length(unique(cat_feature_hc)), "\n")
cat(" Final bins:", length(result_dp_hc$bin), "\n")
cat(" Execution time:", result_dp_hc$execution_time_ms, "ms\n")
# Example 4: Handling missing values
set.seed(789)
cat_feature_na <- cat_feature
cat_feature_na[sample(n_obs, 50)] <- NA # Introduce 5% missing
result_dp_na <- ob_categorical_dp(
cat_feature_na,
bin_target,
min_bins = 2,
max_bins = 4
)
# Check if NA was treated as a category
na_bin <- grep("NA", result_dp_na$bin, value = TRUE)
if (length(na_bin) > 0) {
cat("\nNA handling:\n")
cat(" Bin containing NA:", na_bin, "\n")
}
# }
Run the code above in your browser using DataLab