# load germancredit data
data(germancredit)
# Example I
# binning of two variables in germancredit dataset
# using tree method
bins2_tree = woebin(germancredit, y="creditability",
x=c("credit.amount","housing"), method="tree")
bins2_tree
if (FALSE) {
# using chimerge method
bins2_chi = woebin(germancredit, y="creditability",
x=c("credit.amount","housing"), method="chimerge")
# binning in equal freq/width # only supports numerical variables
numeric_cols = c("duration.in.month", "credit.amount",
"installment.rate.in.percentage.of.disposable.income", "present.residence.since",
"age.in.years", "number.of.existing.credits.at.this.bank",
"number.of.people.being.liable.to.provide.maintenance.for")
bins_freq = woebin(germancredit, y="creditability", x=numeric_cols, method="freq")
bins_width = woebin(germancredit, y="creditability", x=numeric_cols, method="width")
# y can be NULL if no label column in dataset
bins_freq_noy = woebin(germancredit, y=NULL, x=numeric_cols)
# Example II
# setting of stop_limit
# stop_limit = 0.1 (by default)
bins_x1 = woebin(germancredit, y = 'creditability', x = 'foreign.worker', stop_limit = 0.1)
# stop_limit = 'N', each x value is a bin
bins_x1_N = woebin(germancredit, y = 'creditability', x = 'foreign.worker', stop_limit = 'N')
# Example III
# binning of the germancredit dataset
bins_germ = woebin(germancredit, y = "creditability")
# converting bins_germ into a data frame
# bins_germ_df = data.table::rbindlist(bins_germ)
# Example IV
# customizing the breakpoints of binning
library(data.table)
dat = rbind(
setDT(germancredit),
data.table(creditability=sample(c("good","bad"),10,replace=TRUE)),
fill=TRUE)
breaks_list = list(
age.in.years = c(26, 35, 37, "Inf%,%missing"),
housing = c("own", "for free%,%rent")
)
special_values = list(
credit.amount = c(2600, 9960, "6850%,%missing"),
purpose = c("education", "others%,%missing")
)
bins_cus_brk = woebin(dat, y="creditability",
x=c("age.in.years","credit.amount","housing","purpose"),
breaks_list=breaks_list, special_values=special_values)
# Example V
# save breaks_list as a R file
bins2 = woebin(germancredit, y="creditability",
x=c("credit.amount","housing"), save_as='breaks_list')
# Example VI
# setting bin closed on the right
options(scorecard.bin_close_right = TRUE)
binsRight = woebin(germancredit, y = 'creditability', x = 'age.in.years')
binsRight
# setting bin close on the left, the default setting
options(scorecard.bin_close_right = FALSE)
}
Run the code above in your browser using DataLab