suppressMessages(library(PDtoolkit))
data(loans)
#prepare risk factor Purpose for the analysis
loans$Purpose <- ifelse(nchar(loans$Purpose) == 2, loans$Purpose, paste0("0", loans$Purpose))
#artificially add missing values in order to show functions' features
loans$Purpose[1:6] <- NA
#run binning procedure
res <- cat.bin(x = loans$Purpose,
y = loans$Creditability,
sc = NA,
sc.merge = "none",
min.pct.obs = 0.05,
min.avg.rate = 0.05,
max.groups = NA,
force.trend = "modalities")
res[[1]]
#check new risk factor against the original
table(loans$Purpose, res[[2]], useNA = "always")
#repeat the same process with setting max.groups to 4 and force.trend to dr
res <- cat.bin(x = loans$Purpose,
y = loans$Creditability,
sc = NA,
sc.merge = "none",
min.pct.obs = 0.05,
min.avg.rate = 0.05,
max.groups = 4,
force.trend = "dr")
res[[1]]
#check new risk factor against the original
table(loans$Purpose, res[[2]], useNA = "always")
#example of shrinking number of groups for numeric risk factor
#copy exisitng numeric risk factor to new called maturity
loans$maturity <- loans$"Duration of Credit (month)"
#artificially add missing values in order to show functions' features
loans$maturity[1:10] <- NA
#categorize maturity with MAPA algorithim from monobin package
loans$maturity.bin <- cum.bin(x = loans$maturity,
y = loans$Creditability, g = 50)[[2]]
table(loans$maturity.bin)
#run binning procedure to decrease number of bins from the previous step
res <- cat.bin(x = loans$maturity.bin,
y = loans$Creditability,
sc = "SC",
sc.merge = "closest",
min.pct.obs = 0.05,
min.avg.rate = 0.01,
max.groups = 5,
force.trend = "modalities")
res[[1]]
#check new risk factor against the original
table(loans$maturity.bin, res[[2]], useNA = "always")
Run the code above in your browser using DataLab