
gains_table
creates a data frame including distribution of total, good, bad, bad rate and approval rate by score bins. It provides both equal width and equal frequency intervals on score binning.
gains_table(score, label, bin_num = 10, bin_type = "freq",
positive = "bad|1", ...)
A list of credit score for actual and expected data samples. For example, score = list(actual = scoreA, expect = scoreE).
A list of label value for actual and expected data samples. For example, label = list(actual = labelA, expect = labelE).
Integer, the number of score bins. Default is 10. If it is 'max', then individual scores are used as bins.
The score is binning by equal frequency or equal width. Accepted values are 'freq' and 'width'. Default is 'freq'.
Value of positive class, default is "bad|1".
Additional parameters.
A data frame
# NOT RUN {
# data preparing ------
# load germancredit data
data("germancredit")
# filter variable via missing rate, iv, identical value rate
dt_f = var_filter(germancredit, "creditability")
# breaking dt into train and test
dt_list = split_df(dt_f, "creditability")
label_list = lapply(dt_list, function(x) x$creditability)
# woe binning ------
bins = woebin(dt_list$train, "creditability")
# converting train and test into woe values
dt_woe_list = lapply(dt_list, function(x) woebin_ply(x, bins))
# glm ------
m1 = glm(creditability ~ ., family = binomial(), data = dt_woe_list$train)
# vif(m1, merge_coef = TRUE)
# Select a formula-based model by AIC
m_step = step(m1, direction="both", trace=FALSE)
m2 = eval(m_step$call)
# vif(m2, merge_coef = TRUE)
# predicted proability
pred_list = lapply(dt_woe_list, function(x) predict(m2, type = 'response', x))
# scorecard ------
card = scorecard(bins, m2)
# credit score, only_total_score = TRUE
score_list = lapply(dt_list, function(x) scorecard_ply(x, card))
# credit score, only_total_score = FALSE
score_list2 = lapply(dt_list, function(x) scorecard_ply(x, card, only_total_score=FALSE))
###### perf_eva examples ######
# Example I, one datset
## predicted p1
perf_eva(pred = pred_list$train, label=dt_list$train$creditability, title = 'train')
## predicted score
# perf_eva(pred = score_list$train, label=dt_list$train$creditability, title = 'train')
# Example II, multiple datsets
## predicted p1
perf_eva(pred = pred_list, label = label_list)
## predicted score
# perf_eva(score_list, label_list)
###### perf_psi examples ######
# Example I # only total psi
psi1 = perf_psi(score = score_list, label = label_list)
psi1$psi # psi data frame
psi1$pic # pic of score distribution
# Example II # both total and variable psi
psi2 = perf_psi(score = score_list, label = label_list)
# psi2$psi # psi data frame
# psi2$pic # pic of score distribution
###### gains_table examples ######
# Example I, input score and label can be a list or a vector
gains_table(score = score_list$train, label = label_list$train)
gains_table(score = score_list, label = label_list)
# Example II, specify the bins number and type
gains_table(score = score_list, label = label_list, bin_num = 20)
gains_table(score = score_list, label = label_list, bin_type = 'width')
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab