#################################################################################################
#################################################################################################
#################################################################################################
# \dontshow{
## Set up for R markdown for crayon and cli output if user has packages installed
if(requireNamespace("fansi", quietly = TRUE) &
requireNamespace("crayon", quietly = TRUE) &
requireNamespace("cli", quietly = TRUE))
{
options(crayon.enabled = TRUE)
ansi_aware_handler <- function(x, options)
{
paste0(
"",
fansi::sgr_to_html(x = x, warn = FALSE, term.cap = "256"),
""
)
}
old_hooks <- fansi::set_knit_hooks(knitr::knit_hooks,
which = c("output", "message", "error", "warning"))
knitr::knit_hooks$set(
output = ansi_aware_handler,
message = ansi_aware_handler,
warning = ansi_aware_handler,
error = ansi_aware_handler
)
}
# }
## Modelling Examples
# For a more thorough example of the workflow of this package, please see vignette
# DImulti_workflow using the following code:
# \donttest{
vignette("DImulti_workflow")
# }
## The simMVRM dataset
#
# This simulated dataset contains multiple ecosystem functions (k=3) and multiple time points
# (t=2). The dataset was
# simulated using unstructured Sigma matrices.
# The true values can be found in the help file for the data, ?simMVRM
data(simMVRM)
head(simMVRM)
# We will start the analysis with a call to the package's main function, DImulti().
# We begin the call by specifying the column indices holding the initial species proportions
# (p_i) through 'prop' and the columns which hold the ecosystem response values through 'y'.
# Since our data is multivariate, we include the 'eco_func' parameter, specifying "na" as our
# data is in a wide format (multiple columns in 'y') and "un" to fit an unstructured Sigma
# for our ecosystem functions.
# The data also contains repeated measures, so we include the 'time' parameter, specifying "time"
# as the column containing the time point indicator for each row and "ar1" to fit an AR(1)
# autocorrelation structure for our time points.
# Next, we specify that the experimental unit identifier is in column 1 through 'unit_IDs'. We
# indicate that we do not want to estimate the non-linear parameter theta, but do not provide
# any values, opting for the default value of 1.
# We specify a full pairwise (FULL) interaction structure through 'DImodel' and estimate the
# model using maximum likelihood (ML) as we may compare models.
# Finally, we provide the data object simMVRM through 'data'.
simModel_FULL <- DImulti(prop = 2:5, y = 6:8, eco_func = c("na", "un"), time = c("time", "ar1"),
unit_IDs = 1, estimate_theta = FALSE, DImodel = "FULL", method = "ML",
data = simMVRM)
# simModel_FULL is an object of custom class DImulti, which can be used with a number of S3
# methods and any method compatible with gls objects. We use summary() to examine the model fit,
# including fixed effect coefficients and the variance covariance matrix Sigma.
print(simModel_FULL)
# From this summary, we can see that there are many coefficients, a number of which are not
# statistically significant at an
# alpha level of 0.05, therefore this model may not be ideal for our data. We refit the model
# using an average interaction structure instead as it reduces the number of interaction terms
# to 1 per ecosystem function and time point.
simModel_AV <- DImulti(prop = 2:5, y = 6:8, eco_func = c("na", "un"), time = c("time", "ar1"),
unit_IDs = 1, estimate_theta = FALSE, DImodel = "AV", method = "ML",
data = simMVRM)
print(simModel_AV)
# This model looks like it is a better fit, with less insignificant terms, but we can test this
# formally using a likelihood ratio test, as the DImodels interaction structures are nested in
# nature (with the exception of "ADD" and "FG", which are on the same level in the nesting
# hierarchy).
# This will test the null hypothesis that the likelihood of the two models do not significantly
# differ, in other words, the added parameters of the more complex model are not worth it.
anova(simModel_FULL, simModel_AV)
# As the p-value is greater than our selected alpha value (0.05), we fail to reject the null
# hypothesis and so continue with the simpler model, simModel_AV.
# We can confirm this result by using information criteria such as AIC or BIC.
# We select the model with the lower value as it indicates a better fit.
# We use the second order versions of AIC and BIC (AICc and BICc) as we have a large number of
# terms, which can cause AIC and BIC to favour more complex models.
AICc(simModel_FULL); AICc(simModel_AV)
BICc(simModel_FULL); BICc(simModel_AV)
# We refit our chosen model using the REML estimation method to have unbiased estimates.
simModel_AV <- DImulti(prop = 2:5, y = 6:8, eco_func = c("na", "un"), time = c("time", "ar1"),
unit_IDs = 1, estimate_theta = FALSE, DImodel = "AV", method = "REML",
data = simMVRM)
# We can now examine the variance covariance matrix, Sigma, and the fixed effect coefficients,
# which can be retrieved from our initial summary() check or individually.
coef(simModel_AV)
simModel_AV$vcov
# An example of what we can infer from this is that ecosystem functions Y1 and Y2 have a positive
# covariance at time 1, while Y3 has a negative covariance with both Y1 and Y2 at the same time
# point. This means that maximising Y1 would not negatively impact Y2 but it would be at the cost
# of Y3. However, as our interaction term is positive and significant at this time point for all
# three ecosystem functions, we should be able to include a mixture of species that have positive
# ID effects for each response to help mitigate this trade-off.
# We can also predict from the model if we want to compare responses from different conditions,
# even those not included in the original data.
predict(simModel_AV, newdata = simMVRM[which(simMVRM$plot == 1:2), ])
#################################################################################################
## The Belgium dataset
#
# This real world dataset contains multiple ecosystem functions (k=3) at a single time point
# (t=1). The dataset also contains seeding density as a treatment in the form of a factor with
# two levels (1, -1). More detail can be found at ?dataBEL.
data(dataBEL)
head(dataBEL)
# We begin with the main function DImulti(), passing the initial species proportions column names
# through 'prop' and the response value column name through 'y'.
# As the data is in a long or stacked format, we specify the ecosystem function type through the
# first index of the 'eco_func' parameter, along with specifying that we want an unstructured
# Sigma for these response types.
# The experimental unit ID is stored in the column "Plot" and so we pass this to 'unit_IDs'.
# Rather than estimating the nonlinear parameter theta, we opt to provide a value for each
# ecosystem function type through the parameter 'theta', which will be applied to the
# interaction terms as a power. In this case, we use functional group (FG) interactions, which
# requires an additional argument FG to be provided, specifying which group each species present
# in 'prop' belongs to. In this case, we group the grasses and the legumes.
# We include the treatment Density as an additional fixed effect.
# We opt to use the REML estimation method as we will not be doing any model comparisons.
# Finally, we specify the data object, dataBEL, through 'data'.
belModel <- DImulti(prop = c("G1", "G2", "L1", "L2"), y = "Y", eco_func = c("Var", "un"),
unit_IDs = "Plot", theta = c(0.5, 1, 1.2), DImodel = "FG",
FG = c("Grass", "Grass", "Legume", "Legume"), extra_fixed = ~ Density,
method = "REML", data = dataBEL)
# We can now print the output from our model, stored in belModel with class DImulti.
print(belModel)
#################################################################################################
## The Sweden dataset
#
# This real-world dataset contains a single ecosystem function read at multiple time points
# (k=1 & t=3). The data contains two treatments, TREAT and DENS, each with two levels
# 1 & 2 and High & Low, respectively.
# More details can be found at ?dataSWE
data(dataSWE)
head(dataSWE)
# We transform the "YEARN" column to factors to better act as groups in our models, giving us a
# coefficient per year number as opposed to acting as a continuous scaling factor.
dataSWE$YEARN <- as.factor(dataSWE$YEARN)
# We use the DImulti() function to fit a repeated measures DI model to this data.
# We specify the column indices 5 through 8 for our initial species proportions and the response
# value column name "YIELD".
# As there are multiple time points (repeated measures), we use the parameter 'time', providing
# the column name containing the time identifier through the first index and the desired Sigma
# structure (compound symmetry) through the second.
# The experimental unit ID is stored in column index two, which is passed through 'unit_IDs'.
# The interaction structure chosen is average interaction term, "AV".
# We include both of the treatment terms, TREAT and DENS, as extra fixed effects crossed with
# each ID effect, the interaction effect, and with each other.
# We opt to use the REML estimation method for the model as we will not be doing any model
# comparisons.
# Finally, we specify the data object, dataSWE, through 'data'
SWEmodel <- DImulti(prop = 5:8, y = c("YIELD"), time = c("YEARN", "CS"),
unit_IDs = 2, DImodel = "AV", extra_fixed = ~ 1:(TREAT:DENS),
method = "REML", data = dataSWE)
print(SWEmodel)
Run the code above in your browser using DataLab