suppressMessages(library(PDtoolkit))
data(loans)
#identify numeric risk factors
num.rf <- sapply(loans, is.numeric)
num.rf <- names(num.rf)[!names(num.rf)%in%"Creditability" & num.rf]
#discretized numeric risk factors using ndr.bin from monobin package
loans[, num.rf] <- sapply(num.rf, function(x)
ndr.bin(x = loans[, x], y = loans[, "Creditability"])[[2]])
str(loans)
#create risk factor priority groups
rf.all <- names(loans)[-1]
set.seed(591)
rf.pg <- data.frame(rf = rf.all, group = sample(1:3, length(rf.all), rep = TRUE))
head(rf.pg)
#bring AUC for each risk factor in order to sort them within groups
bva <- bivariate(db = loans, target = "Creditability")[[1]]
rf.auc <- unique(bva[, c("rf", "auc")])
rf.pg <- merge(rf.pg, rf.auc, by = "rf", all.x = TRUE)
#prioritized risk factors
rf.pg <- rf.pg[order(rf.pg$group, rf.pg$auc), ]
rf.pg <- rf.pg[order(rf.pg$group), ]
rf.pg
res <- stepRPC(start.model = Creditability ~ 1,
risk.profile = rf.pg,
p.value = 0.05,
coding = "WoE",
db = loans)
summary(res$model)$coefficients
res$steps
head(res$dev.db)
Run the code above in your browser using DataLab