# Example 1: Basic usage with synthetic data
set.seed(123)
n <- 1000
categories <- c("A", "B", "C", "D", "E", "F", "G", "H")
feature <- sample(categories, n, replace = TRUE, prob = c(
0.2, 0.15, 0.15,
0.1, 0.1, 0.1,
0.1, 0.1
))
# Create target with some association to categories
probs <- c(0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85) # increasing probability
target <- sapply(seq_along(feature), function(i) {
cat_idx <- which(categories == feature[i])
rbinom(1, 1, probs[cat_idx])
})
result <- ob_categorical_cm(feature, target)
print(result[c("bin", "woe", "iv", "count")])
# View metadata
print(paste("Total IV:", round(result$metadata$total_iv, 3)))
print(paste("Algorithm converged:", result$converged))
# Example 2: Using Chi2 algorithm for more conservative binning
result_chi2 <- ob_categorical_cm(feature, target,
use_chi2_algorithm = TRUE,
max_bins = 6
)
# Compare number of bins
cat("Standard ChiMerge bins:", result$metadata$n_bins, "\n")
cat("Chi2 algorithm bins:", result_chi2$metadata$n_bins, "\n")
Run the code above in your browser using DataLab