set.seed(12345)
# Using logical observations, default 'thresholds' and 'type' parameter:
observations_1000_logical <- c(rep(x = FALSE, times = 500),
rep(x = TRUE, times = 500))
predictions_1000 <- c(runif(n = 500, min = 0, max = 0.7),
runif(n = 500, min = 0.3, max = 1))
confidence(observations = observations_1000_logical,
predictions = predictions_1000) # 0.561
# Using integer observations, default 'thresholds' parameter,
# both 'positive' and 'neutral' confidence type:
observations_4000_integer <- c(rep(x = 0L, times = 3000),
rep(x = 1L, times = 1000))
predictions_4000 <- c(runif(n = 3000, min = 0, max = 0.8),
runif(n = 1000, min = 0.2, max = 0.9))
confidence(observations = observations_4000_integer,
predictions = predictions_4000, type = "positive") # 0.691
confidence(observations = observations_4000_integer,
predictions = predictions_4000, type = "neutral") # 0.778
# Using some previously selected thresholds:
strict_thresholds <- c(0.1, 0.9)
permissive_thresholds <- c(0.4, 0.5)
percentile_thresholds <- quantile(x = predictions_4000[observations_4000_integer == 1],
probs = c(0.1, 0.9)) # 10th and 90th percentile
confidence(observations = observations_4000_integer,
predictions = predictions_4000,
thresholds = strict_thresholds,
type = "neutral") # 0
confidence(observations = observations_4000_integer,
predictions = predictions_4000,
thresholds = permissive_thresholds,
type = "neutral") # 0.836
confidence(observations = observations_4000_integer,
predictions = predictions_4000,
thresholds = percentile_thresholds,
type = "neutral") # 0.2
# Real-life case
# (thresholds calculated from the whole dataset, confidence from the evaluation subset):
dataset <- data.frame(
observations = observations_4000_integer,
predictions = predictions_4000,
evaluation_mask = c(rep(x = FALSE, times = 250),
rep(x = TRUE, times = 250),
rep(x = FALSE, times = 250),
rep(x = TRUE, times = 250))
)
thresholds_whole <- thresholds(observations = dataset$observations,
predictions = dataset$predictions)
(confidence_evaluation <- confidence(observations = dataset$observations[dataset$evaluation_mask],
predictions = dataset$predictions[dataset$evaluation_mask],
thresholds = thresholds_whole)) # 0.671
# Wrong parameterization:
try(confidence(observations = observations_1000_logical,
predictions = predictions_1000,
type = "pos")) # error
try(confidence(observations = observations_1000_logical,
predictions = predictions_1000,
thresholds = c(0.2, NA_real_))) # warning
try(confidence(observations = observations_1000_logical,
predictions = predictions_1000,
thresholds = c(-0.4, 0.85))) # warning
try(confidence(observations = observations_1000_logical,
predictions = predictions_1000,
thresholds = c(0.6, 0.3))) # warning
try(confidence(observations = observations_1000_logical,
predictions = predictions_4000)) # error
set.seed(12345)
observations_4000_numeric <- c(rep(x = 0, times = 3000),
rep(x = 1, times = 1000))
predictions_4000_strange <- c(runif(n = 3000, min = -0.3, max = 0.4),
runif(n = 1000, min = 0.6, max = 1.5))
try(confidence(observations = observations_4000_numeric,
predictions = predictions_4000_strange,
thresholds = c(0.2, 0.7))) # multiple warnings
mask_of_normal_predictions <- predictions_4000_strange >= 0 & predictions_4000_strange <= 1
confidence(observations = as.integer(observations_4000_numeric)[mask_of_normal_predictions],
predictions = predictions_4000_strange[mask_of_normal_predictions],
thresholds = c(0.2, 0.7)) # OK
Run the code above in your browser using DataLab