# \donttest{
# Example 1: Basic IV optimization with Bayesian smoothing
set.seed(42)
n_obs <- 1200
# Simulate industry sectors with varying default risk
industries <- c(
"Technology", "Healthcare", "Finance", "Manufacturing",
"Retail", "Energy"
)
default_rates <- c(0.03, 0.05, 0.08, 0.12, 0.18, 0.25)
cat_feature <- sample(industries, n_obs,
replace = TRUE,
prob = c(0.20, 0.18, 0.22, 0.18, 0.12, 0.10)
)
bin_target <- sapply(cat_feature, function(x) {
rbinom(1, 1, default_rates[which(industries == x)])
})
# Apply IVB optimization
result_ivb <- ob_categorical_ivb(
cat_feature,
bin_target,
min_bins = 3,
max_bins = 4
)
# Display results
print(data.frame(
Bin = result_ivb$bin,
WoE = round(result_ivb$woe, 3),
IV = round(result_ivb$iv, 4),
Count = result_ivb$count,
EventRate = round(result_ivb$count_pos / result_ivb$count, 3)
))
cat("\nTotal IV (maximized):", round(result_ivb$total_iv, 4), "\n")
cat("Converged:", result_ivb$converged, "\n")
cat("Iterations:", result_ivb$iterations, "\n")
# Example 2: Comparing IV optimization with other methods
set.seed(123)
n_obs_comp <- 1500
regions <- c("North", "South", "East", "West", "Central")
cat_feature_comp <- sample(regions, n_obs_comp, replace = TRUE)
bin_target_comp <- rbinom(n_obs_comp, 1, 0.15)
# IVB (IV-optimized)
result_ivb_comp <- ob_categorical_ivb(
cat_feature_comp, bin_target_comp,
min_bins = 2, max_bins = 3
)
# GMB (greedy approximation)
result_gmb_comp <- ob_categorical_gmb(
cat_feature_comp, bin_target_comp,
min_bins = 2, max_bins = 3
)
# DP (general optimization)
result_dp_comp <- ob_categorical_dp(
cat_feature_comp, bin_target_comp,
min_bins = 2, max_bins = 3
)
cat("\nMethod comparison:\n")
cat(" IVB total IV:", round(result_ivb_comp$total_iv, 4), "\n")
cat(" GMB total IV:", round(result_gmb_comp$total_iv, 4), "\n")
cat(" DP total IV:", round(result_dp_comp$total_iv, 4), "\n")
cat("\nIVB typically achieves highest IV due to explicit optimization\n")
# }
Run the code above in your browser using DataLab