# Simple examples:
consistency(conf_train = 0.93,
conf_eval = 0.21) # -0.72 - hardly consistent/transferable model
consistency(conf_train = 0.43,
conf_eval = 0.35) # -0.08 - consistent/transferable model, although not so confident
consistency(conf_train = 0.87,
conf_eval = 0.71) # -0.16 - a consistent/transferable model that is confident as well
consistency(conf_train = 0.67,
conf_eval = 0.78) # 0.11 - positive value might be an artifact
consistency(conf_train = 0.67,
conf_eval = NA_real_) # NA
# Real-life case:
set.seed(12345)
observations <- c(rep(x = FALSE, times = 500),
rep(x = TRUE, times = 500))
predictions <- c(runif(n = 500, min = 0, max = 0.7),
runif(n = 500, min = 0.3, max = 1))
dataset <- data.frame(
observations = observations,
predictions = predictions,
evaluation_mask = c(rep(x = FALSE, times = 250),
rep(x = TRUE, times = 250),
rep(x = FALSE, times = 250),
rep(x = TRUE, times = 250))
)
thresholds_whole <- thresholds(observations = dataset$observations,
predictions = dataset$predictions)
confidence_training <- confidence(observations = dataset$observations[!dataset$evaluation_mask],
predictions = dataset$predictions[!dataset$evaluation_mask],
thresholds = thresholds_whole) # 0.602
confidence_evaluation <- confidence(observations = dataset$observations[dataset$evaluation_mask],
predictions = dataset$predictions[dataset$evaluation_mask],
thresholds = thresholds_whole) # 0.520
consistency(conf_train = confidence_training,
conf_eval = confidence_evaluation) # -0.083 - consistent/transferable model
# Wrong parameterization:
try(consistency(conf_train = 1.3,
conf_eval = 0.5)) # warning
try(consistency(conf_train = 0.6,
conf_eval = c(0.4, 0.5))) # warning
Run the code above in your browser using DataLab