aictab: Create Model Selection Tables

Description

This function creates a model selection table based on one of the following information criteria: AIC, AICc, QAIC, QAICc. The table ranks the models based on the selected information criteria and also provides delta AIC and Akaike weights. aictab selects the appropriate function to create the model selection table based on the object class. The current version works with lists containing objects of aov, betareg, clm, clmm, clogit, coxme, coxph, fitdist, fitdistr, glm, glmmTMB, gls, gnls, hurdle, lavaan, lm, lme, lmekin, maxlikeFit, mer, merMod, lmerModLmerTest, multinom, negbin, nlme, nls, polr, rlm, survreg, vglm, and zeroinfl classes as well as various models of unmarkedFit classes but does not yet allow mixing of different classes.

Usage

aictab(cand.set, modnames = NULL, second.ord = TRUE, nobs = NULL,
       sort = TRUE, ...)
# S3 method for AICaov.lm
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICbetareg
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICsclm.clm
aictab(cand.set, modnames = NULL,
       second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICclmm
aictab(cand.set, modnames = NULL,
       second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICclm
aictab(cand.set, modnames = NULL,
       second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICcoxme
aictab(cand.set, modnames = NULL,
       second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICcoxph
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICfitdist
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICfitdistr
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICglm.lm
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)  
# S3 method for AICglmmTMB
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)  
# S3 method for AICgls
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICgnls.gls
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AIChurdle
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AIClavaan
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AIClm
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AIClme
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AIClmekin
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICmaxlikeFit.list
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...) 
# S3 method for AICmer
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AIClmerMod
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AIClmerModLmerTest
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICglmerMod
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...) 
# S3 method for AICnlmerMod
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICmultinom.nnet
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICnegbin.glm.lm
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...) 
# S3 method for AICnlme.lme
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...) 
# S3 method for AICnls
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICpolr
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICrlm.lm
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICsurvreg
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
# S3 method for AICunmarkedFitOccu
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitColExt
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuRN
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitPCount
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitPCO
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitDS
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitGDS
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuFP
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitMPois
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitGMM
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitGPC
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuMulti
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuMS
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitOccuTTD
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitMMO
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICunmarkedFitDSO
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICvglm
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
# S3 method for AICzeroinfl
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

Value

aictab creates an object of class aictab with the following components:

Modname: the name of each model of the candidate model set.
K: the number of estimated parameters for each model.
(Q)AIC(c): the information criterion requested for each model (AIC, AICc, QAIC, QAICc).
Delta_(Q)AIC(c): the appropriate delta AIC component depending on the information criteria selected.
ModelLik: the relative likelihood of the model given the data (exp(-0.5*delta[i])). This is not to be confused with the likelihood of the parameters given the data. The relative likelihood can then be normalized across all models to get the model probabilities.
(Q)AIC(c)Wt: the Akaike weights, also termed "model probabilities" sensu Burnham and Anderson (2002) and Anderson (2008). These measures indicate the level of support (i.e., weight of evidence) in favor of any given model being the most parsimonious among the candidate model set.
Cum.Wt: the cumulative Akaike weights. These are only meaningful if results in table are sorted in decreasing order of Akaike weights (i.e., sort = TRUE).
c.hat: if c.hat was specified as an argument, it is included in the table.
LL: if c.hat = 1 and parameters estimated by maximum likelihood, the log-likelihood of each model.
Quasi.LL: if c.hat > 1, the quasi log-likelihood of each model.
Res.LL: if parameters are estimated by restricted maximum-likelihood (REML), the restricted log-likelihood of each model.

Arguments

cand.set: a list storing each of the models in the candidate model set.
modnames: a character vector of model names to facilitate the identification of each model in the model selection table. If NULL, the function uses the names in the cand.set list of candidate models (i.e., a named list). If no names appear in the list and no character vector is provided, generic names (e.g., Mod1, Mod2) are supplied in the table in the same order as in the list of candidate models.
second.ord: logical. If TRUE, the function returns the second-order Akaike information criterion (i.e., AICc).
nobs: this argument allows to specify a numeric value other than total sample size to compute the AICc (i.e., nobs defaults to total number of observations). This is relevant only for mixed models or various models of unmarkedFit classes where sample size is not straightforward. In such cases, one might use total number of observations or number of independent clusters (e.g., sites) as the value of nobs.
sort: logical. If TRUE, the model selection table is ranked according to the (Q)AIC(c) values.
c.hat: value of overdispersion parameter (i.e., variance inflation factor) such as that obtained from c_hat. Note that values of c.hat different from 1 are only appropriate for binomial GLM's with trials > 1 (i.e., success/trial or cbind(success, failure) syntax), with Poisson GLM's, single-season occupancy models (MacKenzie et al. 2002), dynamic occupancy models (MacKenzie et al. 2003), or N-mixture models (Royle 2004, Dail and Madsen 2011). If c.hat > 1, aictab will return the quasi-likelihood analogue of the information criteria requested and multiply the variance-covariance matrix of the estimates by this value (i.e., SE's are multiplied by sqrt(c.hat)). This option is not supported for generalized linear mixed models of the mer or merMod classes.
...: additional arguments passed to the function.

Author

Marc J. Mazerolle

Details

aictab internally creates a new class for the cand.set list of candidate models, according to the contents of the list. The current function is implemented for clogit, coxme, coxph, fitdist, fitdistr, glm, glmmTMB, gls, gnls, hurdle, lavaan, lm, lme, lmekin, maxlikeFit, mer, merMod, lmerModLmerTest, multinom, negbin, nlme, nls, polr, rlm, survreg, vglm, and zeroinfl classes as well as various unmarkedFit classes.

The function constructs a model selection table based on one of the four information criteria: AIC, AICc, QAIC, and QAICc.

Ten guidelines for model selection:

1) Carefully construct your candidate model set. Each model should represent a specific (interesting) hypothesis to test.

2) Keep your candidate model set short. It is ill-advised to consider as many models as there are data.

3) Check model fit. Use your global model (most complex model) or subglobal models to determine if the assumptions are valid. If none of your models fit the data well, information criteria will only indicate the most parsimonious of the poor models.

4) Avoid data dredging (i.e., looking for patterns after an initial round of analysis).

5) Avoid overfitting models. You should not estimate too many parameters for the number of observations available in the sample.

6) Be careful of missing values. Remember that values that are missing only for certain variables change the data set and sample size, depending on which variable is included in any given model. I suggest to remove missing cases before starting model selection.

7) Use the same response variable for all models of the candidate model set. It is inappropriate to run some models with a transformed response variable and others with the untransformed variable. A workaround is to use a different link function for some models (e.g., identity vs log link).

8) When dealing with models with overdispersion, use the same value of c-hat for all models in the candidate model set. For binomial models with trials > 1 (i.e., success/trial or cbind(success, failure) syntax) or with Poisson GLM's, you should estimate the c-hat from the most complex model (global model). If c-hat > 1, you should use the same value for each model of the candidate model set (where appropriate) and include it in the count of parameters (K). Similarly, for negative binomial models, you should estimate the dispersion parameter from the global model and use the same value across all models.

9) Burnham and Anderson (2002) recommend to avoid mixing the information-theoretic approach and notions of significance (i.e., P values). It is best to provide estimates and a measure of their precision (standard error, confidence intervals).

10) Determining the ranking of the models is just the first step. Akaike weights sum to 1 for the entire model set and can be interpreted as the weight of evidence in favor of a given model being the best one given the candidate model set considered and the data at hand. Models with large Akaike weights have strong support. Evidence ratios, importance values, and confidence sets for the best model are all measures that assist in interpretation. In cases where the top ranking model has an Akaike weight > 0.9, one can base inference on this single most parsimonious model. When many models rank highly (i.e., delta (Q)AIC(c) < 4), one should model-average effect sizes for the parameters with most support across the entire set of models. Model averaging consists in making inference based on the whole set of candidate models, instead of basing conclusions on a single 'best' model. It is an elegant way of making inference based on the information contained in the entire model set.

References

Anderson, D. R. (2008) Model-based Inference in the Life Sciences: a primer on evidence. Springer: New York.

Burnham, K. P., Anderson, D. R. (2002) Model Selection and Multimodel Inference: a practical information-theoretic approach. Second edition. Springer: New York.

Burnham, K. P., Anderson, D. R. (2004) Multimodel inference: understanding AIC and BIC in model selection. Sociological Methods and Research 33, 261--304.

Dail, D., Madsen, L. (2011) Models for estimating abundance from repeated counts of an open population. Biometrics 67, 577--587.

MacKenzie, D. I., Nichols, J. D., Lachman, G. B., Droege, S., Royle, J. A., Langtimm, C. A. (2002) Estimating site occupancy rates when detection probabilities are less than one. Ecology 83, 2248--2255.

MacKenzie, D. I., Nichols, J. D., Hines, J. E., Knutson, M. G., Franklin, A. B. (2003) Estimating site occupancy, colonization, and local extinction when a species is detected imperfectly. Ecology 84, 2200--2207.

Mazerolle, M. J. (2006) Improving data analysis in herpetology: using Akaike's Information Criterion (AIC) to assess the strength of biological hypotheses. Amphibia-Reptilia 27, 169--180.

Royle, J. A. (2004) N-mixture models for estimating population size from spatially replicated counts. Biometrics 60, 108--115.

Examples

Run this code

##Mazerolle (2006) frog water loss example
data(dry.frog)

##setup a subset of models of Table 1
Cand.models <- list( )
Cand.models[[1]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2,
                       data = dry.frog)
Cand.models[[2]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2 +
                       Shade:Substrate, data = dry.frog)
Cand.models[[3]] <- lm(log_Mass_lost ~ cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[4]] <- lm(log_Mass_lost ~ Shade + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[5]] <- lm(log_Mass_lost ~ Substrate + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)

##create a vector of names to trace back models in set
Modnames <- paste("mod", 1:length(Cand.models), sep = " ")

##generate AICc table
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)
##round to 4 digits after decimal point and give log-likelihood
print(aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE),
      digits = 4, LL = TRUE)



if (FALSE) {
##Burnham and Anderson (2002) flour beetle data
data(beetle)
##models as suggested by Burnham and Anderson p. 198          
Cand.set <- list( )
Cand.set[[1]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link = "logit"), weights = Number_tested,
                     data = beetle)
Cand.set[[2]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link = "probit"), weights = Number_tested,
                     data = beetle)
Cand.set[[3]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link ="cloglog"), weights = Number_tested,
                     data = beetle)

##check c-hat
c_hat(Cand.set[[1]])
c_hat(Cand.set[[2]])
c_hat(Cand.set[[3]])
##lowest value of c-hat < 1 for these non-nested models, thus use
##c.hat = 1 
       
##set up named list
names(Cand.set) <- c("logit", "probit", "cloglog")

##compare models
##model names will be taken from the list if modnames is not specified
res.table <- aictab(cand.set = Cand.set, second.ord = FALSE)
##note that delta AIC and Akaike weights are identical to Table 4.7
print(res.table, digits = 2, LL = TRUE) #print table with 2 digits and
##print log-likelihood in table
print(res.table, digits = 4, LL = FALSE) #print table with 4 digits and
##do not print log-likelihood
}


##two-way ANOVA with interaction
data(iron)
##full model
m1 <- lm(Iron ~ Pot + Food + Pot:Food, data = iron)
##additive model
m2 <- lm(Iron ~ Pot + Food, data = iron)
##null model
m3 <- lm(Iron ~ 1, data = iron)

##candidate models
Cand.aov <- list(m1, m2, m3)
Cand.names <- c("full", "additive", "null")
aictab(Cand.aov, Cand.names)



##single-season occupancy model example modified from ?occu
if (FALSE) {
require(unmarked)
##single season example modified from ?occu
data(frogs)
pferUMF <- unmarkedFrameOccu(pfer.bin)
##add fake covariates
siteCovs(pferUMF) <- data.frame(sitevar1 = rnorm(numSites(pferUMF)),
                                sitevar2 = runif(numSites(pferUMF))) 
     
##observation covariates
obsCovs(pferUMF) <- data.frame(obsvar1 = rnorm(numSites(pferUMF) *
                                 obsNum(pferUMF))) 

##set up candidate model set
fm1 <- occu(~ obsvar1 ~ sitevar1, pferUMF)
fm2 <- occu(~ 1 ~ sitevar1, pferUMF)
fm3 <- occu(~ obsvar1 ~ sitevar2, pferUMF)
fm4 <- occu(~ 1 ~ sitevar2, pferUMF)

##assemble models in named list (alternative to using 'modnames' argument)
Cand.mods <- list("fm1" = fm1, "fm2" = fm2, "fm3" = fm3, "fm4" = fm4)

##compute table
aictab(cand.set = Cand.mods, second.ord = TRUE)

detach(package:unmarked)
}

Run the code above in your browser using DataLab