suppressMessages(library(PDtoolkit))
library(rpart)
data(loans)
#clustering using common spearman metric
#first we need to categorize numeric risk factors
num.rf <- sapply(loans, is.numeric)
num.rf <- names(num.rf)[!names(num.rf)%in%"Creditability" & num.rf]
loans[, num.rf] <- sapply(num.rf, function(x)
sts.bin(x = loans[, x], y = loans[, "Creditability"])[[2]])
#replace woe in order to convert to all numeric factors
loans.woe <- replace.woe(db = loans, target = "Creditability")[[1]]
cr <- rf.clustering(db = loans.woe[, -which(names(loans.woe)%in%"Creditability")],
metric = "common spearman",
k = NA)
cr
#select one risk factor per cluster with min distance to centorid
cr %>% group_by(clusters) %>%
slice(which.min(dist.to.centroid))
Run the code above in your browser using DataLab