if (FALSE) {
# =============================================================================
# Basic Multi-Series Time Series Modeling
# =============================================================================
# Simulate three time series that have shared seasonal dynamics,
# independent AR(1) trends, and Poisson observations
set.seed(0)
dat <- sim_mvgam(
T = 80,
n_series = 3,
mu = 2,
trend_model = AR(p = 1),
prop_missing = 0.1,
prop_trend = 0.6
)
# Plot key summary statistics for a single series
plot_mvgam_series(data = dat$data_train, series = 1)
# Plot all series together
plot_mvgam_series(data = dat$data_train, series = "all")
# Formulate a model using Stan where series share a cyclic smooth for
# seasonality and each series has an independent AR1 temporal process.
# Note that 'noncentred = TRUE' will likely give performance gains.
# Set run_model = FALSE to inspect the returned objects
mod1 <- mvgam(
formula = y ~ s(season, bs = "cc", k = 6),
data = dat$data_train,
trend_model = AR(),
family = poisson(),
noncentred = TRUE,
run_model = FALSE
)
# View the model code in Stan language
stancode(mod1)
# View the data objects needed to fit the model in Stan
sdata1 <- standata(mod1)
str(sdata1)
# Now fit the model
mod1 <- mvgam(
formula = y ~ s(season, bs = "cc", k = 6),
data = dat$data_train,
trend_model = AR(),
family = poisson(),
noncentred = TRUE,
chains = 2,
silent = 2
)
# Extract the model summary
summary(mod1)
# Plot the historical trend and hindcast distributions for one series
hc_trend <- hindcast(mod1, type = "trend")
plot(hc_trend)
hc_predicted <- hindcast(mod1, type = "response")
plot(hc_predicted)
# Residual diagnostics
plot(mod1, type = "residuals", series = 1)
resids <- residuals(mod1)
str(resids)
# Fitted values and residuals can be added directly to the training data
augment(mod1)
# Compute the forecast using covariate information in data_test
fc <- forecast(mod1, newdata = dat$data_test)
str(fc)
fc_summary <- summary(fc)
head(fc_summary, 12)
plot(fc)
# Plot the estimated seasonal smooth function
plot(mod1, type = "smooths")
# Plot estimated first derivatives of the smooth
plot(mod1, type = "smooths", derivatives = TRUE)
# Plot partial residuals of the smooth
plot(mod1, type = "smooths", residuals = TRUE)
# Plot posterior realisations for the smooth
plot(mod1, type = "smooths", realisations = TRUE)
# Plot conditional response predictions using marginaleffects
conditional_effects(mod1)
plot_predictions(mod1, condition = "season", points = 0.5)
# Generate posterior predictive checks using bayesplot
pp_check(mod1)
# Extract observation model beta coefficient draws as a data.frame
beta_draws_df <- as.data.frame(mod1, variable = "betas")
head(beta_draws_df)
str(beta_draws_df)
# Investigate model fit
mc.cores.def <- getOption("mc.cores")
options(mc.cores = 1)
loo(mod1)
options(mc.cores = mc.cores.def)
# =============================================================================
# Vector Autoregressive (VAR) Models
# =============================================================================
# Fit a model to the portal time series that uses a latent
# Vector Autoregression of order 1
mod <- mvgam(
formula = captures ~ -1,
trend_formula = ~ trend,
trend_model = VAR(cor = TRUE),
family = poisson(),
data = portal_data,
chains = 2,
silent = 2
)
# Plot the autoregressive coefficient distributions;
# use 'dir = "v"' to arrange the order of facets correctly
mcmc_plot(
mod,
variable = 'A',
regex = TRUE,
type = 'hist',
facet_args = list(dir = 'v')
)
# Plot the process error variance-covariance matrix in the same way
mcmc_plot(
mod,
variable = 'Sigma',
regex = TRUE,
type = 'hist',
facet_args = list(dir = 'v')
)
# Calculate Generalized Impulse Response Functions for each series
irfs <- irf(
mod,
h = 12,
cumulative = FALSE
)
# Plot some of them
plot(irfs, series = 1)
plot(irfs, series = 2)
# Calculate forecast error variance decompositions for each series
fevds <- fevd(mod, h = 12)
# Plot median contributions to forecast error variance
plot(fevds)
# =============================================================================
# Dynamic Factor Models
# =============================================================================
# Now fit a model that uses two RW dynamic factors to model
# the temporal dynamics of the four rodent species
mod <- mvgam(
captures ~ series,
trend_model = RW(),
use_lv = TRUE,
n_lv = 2,
data = portal_data,
chains = 2,
silent = 2
)
# Plot the factors
plot(mod, type = 'factors')
# Plot the hindcast distributions
hcs <- hindcast(mod)
plot(hcs, series = 1)
plot(hcs, series = 2)
plot(hcs, series = 3)
plot(hcs, series = 4)
# Use residual_cor() to calculate temporal correlations among the series
# based on the factor loadings
lvcors <- residual_cor(mod)
names(lvcors)
lvcors$cor
# For those correlations whose credible intervals did not include
# zero, plot them as a correlation matrix (all other correlations
# are shown as zero on this plot)
plot(lvcors, cluster = TRUE)
# =============================================================================
# Shared Latent Trends with Custom Trend Mapping
# =============================================================================
# Example of supplying a trend_map so that some series can share
# latent trend processes
sim <- sim_mvgam(n_series = 3)
mod_data <- sim$data_train
# Here, we specify only two latent trends; series 1 and 2 share a trend,
# while series 3 has its own unique latent trend
trend_map <- data.frame(
series = unique(mod_data$series),
trend = c(1, 1, 2)
)
# Fit the model using AR1 trends
mod <- mvgam(
formula = y ~ s(season, bs = "cc", k = 6),
trend_map = trend_map,
trend_model = AR(),
data = mod_data,
return_model_data = TRUE,
chains = 2,
silent = 2
)
# The mapping matrix is now supplied as data to the model in the 'Z' element
mod$model_data$Z
# The first two series share an identical latent trend; the third is different
plot(residual_cor(mod))
plot(mod, type = "trend", series = 1)
plot(mod, type = "trend", series = 2)
plot(mod, type = "trend", series = 3)
# =============================================================================
# Time-Varying (Dynamic) Coefficients
# =============================================================================
# Example of how to use dynamic coefficients
# Simulate a time-varying coefficient for the effect of temperature
set.seed(123)
N <- 200
beta_temp <- vector(length = N)
beta_temp[1] <- 0.4
for (i in 2:N) {
beta_temp[i] <- rnorm(1, mean = beta_temp[i - 1] - 0.0025, sd = 0.05)
}
plot(beta_temp)
# Simulate a covariate called 'temp'
temp <- rnorm(N, sd = 1)
# Simulate some noisy Gaussian observations
out <- rnorm(N,
mean = 4 + beta_temp * temp,
sd = 0.5
)
# Gather necessary data into a data.frame; split into training / testing
data <- data.frame(out, temp, time = seq_along(temp))
data_train <- data[1:180, ]
data_test <- data[181:200, ]
# Fit the model using the dynamic() function
mod <- mvgam(
formula = out ~ dynamic(
temp,
scale = FALSE,
k = 40
),
family = gaussian(),
data = data_train,
newdata = data_test,
chains = 2,
silent = 2
)
# Inspect the model summary, forecast and time-varying coefficient distribution
summary(mod)
plot(mod, type = "smooths")
fc <- forecast(mod, newdata = data_test)
plot(fc)
# Propagating the smooth term shows how the coefficient is expected to evolve
plot_mvgam_smooth(mod, smooth = 1, newdata = data)
abline(v = 180, lty = "dashed", lwd = 2)
points(beta_temp, pch = 16)
# =============================================================================
# Working with Offset Terms
# =============================================================================
# Example showing how to incorporate an offset; simulate some count data
# with different means per series
set.seed(100)
dat <- sim_mvgam(
prop_trend = 0,
mu = c(0, 2, 2),
seasonality = "hierarchical"
)
# Add offset terms to the training and testing data
dat$data_train$offset <- 0.5 * as.numeric(dat$data_train$series)
dat$data_test$offset <- 0.5 * as.numeric(dat$data_test$series)
# Fit a model that includes the offset in the linear predictor as well as
# hierarchical seasonal smooths
mod <- mvgam(
formula = y ~ offset(offset) +
s(series, bs = "re") +
s(season, bs = "cc") +
s(season, by = series, m = 1, k = 5),
data = dat$data_train,
chains = 2,
silent = 2
)
# Inspect the model file to see the modification to the linear predictor (eta)
stancode(mod)
# Forecasts for the first two series will differ in magnitude
fc <- forecast(mod, newdata = dat$data_test)
plot(fc, series = 1, ylim = c(0, 75))
plot(fc, series = 2, ylim = c(0, 75))
# Changing the offset for the testing data should lead to changes in
# the forecast
dat$data_test$offset <- dat$data_test$offset - 2
fc <- forecast(mod, newdata = dat$data_test)
plot(fc)
# Relative Risks can be computed by fixing the offset to the same value
# for each series
dat$data_test$offset <- rep(1, NROW(dat$data_test))
preds_rr <- predict(mod,
type = "link",
newdata = dat$data_test,
summary = FALSE
)
series1_inds <- which(dat$data_test$series == "series_1")
series2_inds <- which(dat$data_test$series == "series_2")
# Relative Risks are now more comparable among series
layout(matrix(1:2, ncol = 2))
plot(preds_rr[1, series1_inds],
type = "l", col = "grey75",
ylim = range(preds_rr),
ylab = "Series1 Relative Risk", xlab = "Time"
)
for (i in 2:50) {
lines(preds_rr[i, series1_inds], col = "grey75")
}
plot(preds_rr[1, series2_inds],
type = "l", col = "darkred",
ylim = range(preds_rr),
ylab = "Series2 Relative Risk", xlab = "Time"
)
for (i in 2:50) {
lines(preds_rr[i, series2_inds], col = "darkred")
}
layout(1)
# =============================================================================
# Binomial Family Models
# =============================================================================
# Example showcasing how cbind() is needed for Binomial observations
# Simulate two time series of Binomial trials
trials <- sample(c(20:25), 50, replace = TRUE)
x <- rnorm(50)
detprob1 <- plogis(-0.5 + 0.9 * x)
detprob2 <- plogis(-0.1 - 0.7 * x)
dat <- rbind(
data.frame(
y = rbinom(n = 50, size = trials, prob = detprob1),
time = 1:50,
series = "series1",
x = x,
ntrials = trials
),
data.frame(
y = rbinom(n = 50, size = trials, prob = detprob2),
time = 1:50,
series = "series2",
x = x,
ntrials = trials
)
)
dat <- dplyr::mutate(dat, series = as.factor(series))
dat <- dplyr::arrange(dat, time, series)
plot_mvgam_series(data = dat, series = "all")
# Fit a model using the binomial() family; must specify observations
# and number of trials in the cbind() wrapper
mod <- mvgam(
formula = cbind(y, ntrials) ~ series + s(x, by = series),
family = binomial(),
data = dat,
chains = 2,
silent = 2
)
summary(mod)
pp_check(mod,
type = "bars_grouped",
group = "series", ndraws = 50
)
pp_check(mod,
type = "ecdf_overlay_grouped",
group = "series", ndraws = 50
)
conditional_effects(mod, type = "link")
# To view predictions on the probability scale,
# use ntrials = 1 in datagrid()
plot_predictions(
mod,
by = c('x', 'series'),
newdata = datagrid(
x = runif(100, -2, 2),
series = unique,
ntrials = 1
),
type = 'expected'
)
# Not needed for general use; cleans up connections for automated testing
closeAllConnections()
}
Run the code above in your browser using DataLab