# \donttest{
# =============================================================================
# Example 1: Basic Usage with Mixed Feature Types
# =============================================================================
set.seed(42)
n <- 2000
# Simulate credit scoring data
df <- data.frame(
# Numerical features
age = pmax(18, pmin(80, rnorm(n, 45, 15))),
income = exp(rnorm(n, 10, 0.8)),
debt_ratio = rbeta(n, 2, 5),
credit_history_months = rpois(n, 60),
# Categorical features
education = sample(c("High School", "Bachelor", "Master", "PhD"),
n,
replace = TRUE, prob = c(0.35, 0.40, 0.20, 0.05)
),
employment = sample(c("Employed", "Self-Employed", "Unemployed", "Retired"),
n,
replace = TRUE, prob = c(0.60, 0.20, 0.10, 0.10)
),
# Binary target (default probability varies by features)
target = rbinom(n, 1, 0.15)
)
# Process all features with automatic algorithm selection
result <- obwoe(df, target = "target")
print(result)
# View detailed summary
print(result$summary)
# Access results for a specific feature
age_bins <- result$results$age
print(data.frame(
bin = age_bins$bin,
woe = round(age_bins$woe, 3),
iv = round(age_bins$iv, 4),
count = age_bins$count
))
# =============================================================================
# Example 2: Using a Specific Algorithm
# =============================================================================
# Use MDLP for numerical features (entropy-based)
result_mdlp <- obwoe(df,
target = "target",
feature = c("age", "income"),
algorithm = "mdlp",
min_bins = 3,
max_bins = 6
)
cat("\nMDLP Results:\n")
print(result_mdlp$summary)
# =============================================================================
# Example 3: Custom Control Parameters
# =============================================================================
# Fine-tune algorithm behavior
ctrl <- control.obwoe(
bin_cutoff = 0.02, # Minimum 2% per bin
max_n_prebins = 30, # Allow more initial bins
convergence_threshold = 1e-8
)
result_custom <- obwoe(df,
target = "target",
feature = "debt_ratio",
algorithm = "jedi",
control = ctrl
)
cat("\nCustom JEDI Result:\n")
print(result_custom$results$debt_ratio$bin)
# =============================================================================
# Example 4: Comparing Multiple Algorithms
# =============================================================================
algorithms <- c("jedi", "mdlp", "ewb", "mob")
iv_comparison <- sapply(algorithms, function(algo) {
tryCatch(
{
res <- obwoe(df, target = "target", feature = "income", algorithm = algo)
res$summary$total_iv
},
error = function(e) NA_real_
)
})
cat("\nAlgorithm Comparison (IV for 'income'):\n")
print(sort(iv_comparison, decreasing = TRUE))
# =============================================================================
# Example 5: Feature Selection Based on IV
# =============================================================================
# Process all features and select those with IV > 0.02
result_all <- obwoe(df, target = "target")
strong_features <- result_all$summary[
result_all$summary$total_iv >= 0.02 & !result_all$summary$error,
c("feature", "total_iv", "n_bins")
]
strong_features <- strong_features[order(-strong_features$total_iv), ]
cat("\nFeatures with IV >= 0.02 (predictive):\n")
print(strong_features)
# =============================================================================
# Example 6: Handling Algorithm Compatibility
# =============================================================================
# MDLP only works for numerical - will fail for categorical
result_mixed <- obwoe(df,
target = "target",
algorithm = "mdlp"
)
# Check for errors
cat("\nCompatibility check:\n")
print(result_mixed$summary[, c("feature", "type", "error")])
# }
Run the code above in your browser using DataLab