# \donttest{
# Example 1: Basic usage with Fisher's Exact Test
set.seed(42)
n_obs <- 800
# Simulate customer segments with different risk profiles
segments <- c("Premium", "Standard", "Basic", "Budget", "Economy")
risk_rates <- c(0.05, 0.10, 0.15, 0.22, 0.30)
cat_feature <- sample(segments, n_obs,
replace = TRUE,
prob = c(0.15, 0.25, 0.30, 0.20, 0.10)
)
bin_target <- sapply(cat_feature, function(x) {
rbinom(1, 1, risk_rates[which(segments == x)])
})
# Apply Fisher's Exact Test binning
result_fetb <- ob_categorical_fetb(
cat_feature,
bin_target,
min_bins = 2,
max_bins = 4
)
# Display results
print(data.frame(
Bin = result_fetb$bin,
WoE = round(result_fetb$woe, 3),
IV = round(result_fetb$iv, 4),
Count = result_fetb$count,
EventRate = round(result_fetb$count_pos / result_fetb$count, 3)
))
cat("\nAlgorithm converged:", result_fetb$converged, "\n")
cat("Iterations performed:", result_fetb$iterations, "\n")
# Example 2: Comparing with ChiMerge method
result_cm <- ob_categorical_cm(
cat_feature,
bin_target,
min_bins = 2,
max_bins = 4
)
cat("\nFisher's Exact Test:\n")
cat(" Final bins:", length(result_fetb$bin), "\n")
cat(" Total IV:", round(sum(result_fetb$iv), 4), "\n")
cat("\nChiMerge:\n")
cat(" Final bins:", length(result_cm$bin), "\n")
cat(" Total IV:", round(sum(result_cm$iv), 4), "\n")
# Example 3: Small sample size (Fisher's advantage)
set.seed(123)
n_obs_small <- 150
# Small sample with sparse categories
occupation <- c(
"Doctor", "Lawyer", "Teacher", "Engineer",
"Sales", "Manager"
)
cat_feature_small <- sample(occupation, n_obs_small,
replace = TRUE,
prob = c(0.10, 0.10, 0.20, 0.25, 0.20, 0.15)
)
bin_target_small <- rbinom(n_obs_small, 1, 0.12)
result_fetb_small <- ob_categorical_fetb(
cat_feature_small,
bin_target_small,
min_bins = 2,
max_bins = 3,
bin_cutoff = 0.03 # Allow smaller bins for small sample
)
cat("\nSmall sample binning:\n")
cat(" Observations:", n_obs_small, "\n")
cat(" Original categories:", length(unique(cat_feature_small)), "\n")
cat(" Final bins:", length(result_fetb_small$bin), "\n")
cat(" Converged:", result_fetb_small$converged, "\n")
# Example 4: High cardinality with rare categories
set.seed(789)
n_obs_hc <- 2000
# Simulate product codes (high cardinality)
product_codes <- paste0("PROD_", sprintf("%03d", 1:30))
cat_feature_hc <- sample(product_codes, n_obs_hc,
replace = TRUE,
prob = c(
rep(0.05, 10), rep(0.02, 10),
rep(0.01, 10)
)
)
bin_target_hc <- rbinom(n_obs_hc, 1, 0.08)
result_fetb_hc <- ob_categorical_fetb(
cat_feature_hc,
bin_target_hc,
min_bins = 3,
max_bins = 6,
bin_cutoff = 0.02,
max_n_prebins = 15
)
cat("\nHigh cardinality example:\n")
cat(" Original categories:", length(unique(cat_feature_hc)), "\n")
cat(" Final bins:", length(result_fetb_hc$bin), "\n")
cat(" Iterations:", result_fetb_hc$iterations, "\n")
# Check for rare category merging
for (i in seq_along(result_fetb_hc$bin)) {
n_merged <- length(strsplit(result_fetb_hc$bin[i], "%;%")[[1]])
if (n_merged > 1) {
cat(" Bin", i, "contains", n_merged, "merged categories\n")
}
}
# Example 5: Missing value handling
set.seed(456)
cat_feature_na <- cat_feature
na_indices <- sample(n_obs, 40) # 5% missing
cat_feature_na[na_indices] <- NA
result_fetb_na <- ob_categorical_fetb(
cat_feature_na,
bin_target,
min_bins = 2,
max_bins = 4
)
# Check NA treatment
na_bin_idx <- grep("NA", result_fetb_na$bin)
if (length(na_bin_idx) > 0) {
cat("\nMissing value handling:\n")
cat(" NA bin:", result_fetb_na$bin[na_bin_idx], "\n")
cat(" NA count:", result_fetb_na$count[na_bin_idx], "\n")
cat(" NA WoE:", round(result_fetb_na$woe[na_bin_idx], 3), "\n")
}
# }
Run the code above in your browser using DataLab