suppressMessages(library(monobin))
data(gcd)
age.bin <- iso.bin(x = gcd$age, y = gcd$qual)
age.bin[[1]]
table(age.bin[[2]])
# force increasing trend
iso.bin(x = gcd$age, y = gcd$qual, force.trend = "i")[[1]]
#stage by stage example
#inputs
x <- gcd$age #risk factor
y <- gcd$qual #binary dependent variable
min.pct.obs <- 0.05 #minimum percentage of observations per bin
min.avg.rate <- 0.01 #minimum percentage of defaults per bin
#stage 1: isotonic regression
db <- data.frame(x, y)
db <- db[order(db$x), ]
cc.sign <- sign(cor(db$y, db$x, method = "spearman", use = "complete.obs"))
iso.r <- isoreg(x = db$x, y = cc.sign * db$y)
db$y.hat <- iso.r$yf
db.s0 <- db %>%
group_by(bin = y.hat) %>%
summarise(no = n(),
y.sum = sum(y),
y.avg = mean(y),
x.avg = mean(x),
x.min = min(x),
x.max = max(x))
db.s0
#stage 2: merging based on minimum percentage of observations
db.s1 <- db.s0
thr.no <- ceiling(ifelse(nrow(db) * min.pct.obs < 30, 30, nrow(db) * min.pct.obs))
thr.no #threshold for minimum number of observations per bin
repeat {
if (nrow(db.s1) == 1) {break}
values <- db.s1[, "no"]
if (all(values >= thr.no)) {break}
gap <- min(which(values < thr.no))
if (gap == nrow(db.s1)) {
db.s1$bin[(gap - 1):gap] <- db.s1$bin[(gap - 1)]
} else {
db.s1$bin[gap:(gap + 1)] <- db.s1$bin[gap + 1]
}
db.s1 <- db.s1 %>%
group_by(bin) %>%
mutate(
y.avg = weighted.mean(y.avg, no),
x.avg = weighted.mean(x.avg, no)) %>%
summarise(
no = sum(no),
y.sum = sum(y.sum),
y.avg = unique(y.avg),
x.avg = unique(x.avg),
x.min = min(x.min),
x.max = max(x.max))
}
db.s1
#stage 3: merging based on minimum percentage of bad cases
db.s2 <- db.s1
thr.nb <- ceiling(ifelse(nrow(db) * min.avg.rate < 1, 1, nrow(db) * min.avg.rate))
thr.nb #threshold for minimum number of observations per bin
#already each bin has more bad cases than selected threshold hence no need for further merging
all(db.s2$y.sum > thr.nb)
#final result
db.s2
#result of the iso.bin function (formatting and certain metrics has been added)
iso.bin(x = gcd$age, y = gcd$qual)[[1]]
Run the code above in your browser using DataLab