# ===== DIAGNOSIS MODE (recommended) =====
# Spatial data: let BORG create valid folds
set.seed(42)
spatial_data <- data.frame(
x = runif(200, 0, 100),
y = runif(200, 0, 100),
response = rnorm(200)
)
result <- borg(spatial_data, coords = c("x", "y"), target = "response")
result$diagnosis
result$folds[[1]] # First fold's train/test indices
# Clustered data
clustered_data <- data.frame(
site = rep(1:20, each = 10),
value = rep(rnorm(20), each = 10) + rnorm(200, sd = 0.5)
)
result <- borg(clustered_data, groups = "site", target = "value")
result$diagnosis@recommended_cv # "group_fold"
# Temporal data
temporal_data <- data.frame(
date = seq(as.Date("2020-01-01"), by = "day", length.out = 200),
value = cumsum(rnorm(200))
)
result <- borg(temporal_data, time = "date", target = "value")
# \donttest{
# Get rsample-compatible output for tidymodels (requires rsample package)
result <- borg(spatial_data, coords = c("x", "y"), output = "rsample")
# }
# ===== VALIDATION MODE =====
# Validate an existing split
data <- data.frame(x = 1:100, y = rnorm(100))
borg(data, train_idx = 1:70, test_idx = 71:100)
# Validate with group constraint
data$patient <- rep(1:10, each = 10)
borg(data, train_idx = 1:50, test_idx = 51:100, groups = "patient")
Run the code above in your browser using DataLab