# \donttest{
# Simulate overdispersed credit scoring data with noise
set.seed(2024)
n <- 10000
# Create feature with multiple regimes and noise
feature <- c(
rnorm(3000, mean = 580, sd = 70), # High-risk cluster
rnorm(4000, mean = 680, sd = 50), # Medium-risk cluster
rnorm(2000, mean = 740, sd = 40), # Low-risk cluster
runif(1000, min = 500, max = 800) # Noise (uniform distribution)
)
target <- c(
rbinom(3000, 1, 0.30), # 30% default rate
rbinom(4000, 1, 0.12), # 12% default rate
rbinom(2000, 1, 0.04), # 4% default rate
rbinom(1000, 1, 0.15) # Noisy segment
)
# Apply MDLP with default parameters
result <- ob_numerical_mdlp(
feature = feature,
target = target,
min_bins = 3,
max_bins = 5,
bin_cutoff = 0.05,
max_n_prebins = 20
)
# Inspect results
print(result$bin)
print(data.frame(
Bin = result$bin,
WoE = round(result$woe, 4),
IV = round(result$iv, 4),
Count = result$count
))
cat(sprintf("\nTotal IV: %.4f\n", result$total_iv))
cat(sprintf("Converged: %s\n", result$converged))
cat(sprintf("Iterations: %d\n", result$iterations))
# Verify monotonicity
is_monotonic <- all(diff(result$woe) >= -1e-10)
cat(sprintf("WoE Monotonic: %s\n", is_monotonic))
# Compare with different Laplace smoothing
result_nosmooth <- ob_numerical_mdlp(
feature = feature,
target = target,
laplace_smoothing = 0.0 # No smoothing (risky for rare bins)
)
result_highsmooth <- ob_numerical_mdlp(
feature = feature,
target = target,
laplace_smoothing = 2.0 # Higher regularization
)
# Compare WoE stability
data.frame(
Bin = seq_along(result$woe),
WoE_default = result$woe,
WoE_no_smooth = result_nosmooth$woe,
WoE_high_smooth = result_highsmooth$woe
)
# Visualize binning structure
oldpar <- par(mfrow = c(1, 2))
# WoE plot
plot(result$woe,
type = "b", col = "blue", pch = 19,
xlab = "Bin", ylab = "WoE",
main = "Weight of Evidence by Bin"
)
grid()
# IV contribution plot
barplot(result$iv,
names.arg = seq_along(result$iv),
col = "steelblue", border = "white",
xlab = "Bin", ylab = "IV Contribution",
main = sprintf("Total IV = %.4f", result$total_iv)
)
grid()
par(oldpar)
# }
Run the code above in your browser using DataLab