# \donttest{
# Example 1: Basic usage with synthetic credit data
set.seed(42)
n <- 1000
# Simulate occupation categories with varying default rates
occupations <- c(
"Engineer", "Doctor", "Teacher", "Sales",
"Manager", "Clerk", "Other"
)
default_probs <- c(0.05, 0.03, 0.08, 0.15, 0.07, 0.12, 0.20)
feature <- sample(occupations, n,
replace = TRUE,
prob = c(0.15, 0.10, 0.20, 0.18, 0.12, 0.15, 0.10)
)
target <- sapply(feature, function(x) {
rbinom(1, 1, default_probs[which(occupations == x)])
})
# Apply optimal binning with L2 divergence
result <- ob_categorical_dmiv(feature, target,
min_bins = 2,
max_bins = 4,
divergence_method = "l2"
)
# Examine binning results
print(data.frame(
bin = result$bin,
woe = round(result$woe, 3),
count = result$count,
event_rate = round(result$count_pos / result$count, 3)
))
# Example 2: Comparing divergence methods
result_js <- ob_categorical_dmiv(feature, target,
divergence_method = "js",
max_bins = 4
)
result_kl <- ob_categorical_dmiv(feature, target,
divergence_method = "kl",
max_bins = 4
)
cat("Jensen-Shannon bins:", length(result_js$bin), "\n")
cat("Kullback-Leibler bins:", length(result_kl$bin), "\n")
# Example 3: High cardinality feature with pre-binning
set.seed(123)
postal_codes <- paste0("ZIP_", sprintf("%03d", 1:50))
feature_high_card <- sample(postal_codes, 2000, replace = TRUE)
target_high_card <- rbinom(2000, 1, 0.1)
result_prebin <- ob_categorical_dmiv(
feature_high_card,
target_high_card,
max_n_prebins = 15,
max_bins = 5
)
cat("Final bins after pre-binning:", length(result_prebin$bin), "\n")
cat("Algorithm converged:", result_prebin$converged, "\n")
# }
Run the code above in your browser using DataLab