library(dplyr)
library(tibble)
# Simulate example data
set.seed(123)
x1 <- runif(1000)
x2 <- runif(1000)
y <- rlnorm(1000, meanlog = x1 + x2, sdlog = 0.5)
df <- tibble(x1, x2, y)
# Split into training, calibration, and test sets
df_train <- df %>% slice(1:500)
df_cal <- df %>% slice(501:750)
df_test <- df %>% slice(751:1000)
# Fit a model on the log-scale
mod <- lm(log(y) ~ x1 + x2, data = df_train)
# Generate predictions
pred_cal <- exp(predict(mod, newdata = df_cal))
pred_test <- exp(predict(mod, newdata = df_test))
# Estimate log-normal prediction intervals from calibration data
log_resid_sd <- sqrt(mean((log(pred_cal) - log(df_cal$y))^2))
pinterval_parametric(
pred = pred_test,
dist = "lnorm",
pars = list(meanlog = log(pred_test), sdlog = log_resid_sd)
)
# Alternatively, use calibration data directly to estimate parameters
pinterval_parametric(
pred = pred_test,
calib = pred_cal,
calib_truth = df_cal$y,
dist = "lnorm"
)
# Use the normal distribution with direct parameter input
norm_sd <- sqrt(mean((pred_cal - df_cal$y)^2))
pinterval_parametric(
pred = pred_test,
dist = "norm",
pars = list(mean = pred_test, sd = norm_sd)
)
# Use the gamma distribution with parameters estimated from calibration data
pinterval_parametric(
pred = pred_test,
calib = pred_cal,
calib_truth = df_cal$y,
dist = "gamma"
)
Run the code above in your browser using DataLab