modavg: Compute Model-averaged Parameter Estimate (Multimodel Inference)

Description

This function model-averages the estimate of a parameter of interest among a set of candidate models, computes the unconditional standard error and unconditional confidence intervals as described in Buckland et al. (1997) and Burnham and Anderson (2002). This model-averaged estimate is also referred to as a natural average of the estimate by Burnham and Anderson (2002, p. 152).

Usage

modavg(cand.set, parm, modnames = NULL, second.ord = TRUE, nobs = NULL, 
       uncond.se = "revised", conf.level = 0.95, exclude = NULL, warn =
       TRUE, ...) 
## S3 method for class 'AICaov.lm':
modavg(cand.set, parm, modnames = NULL, second.ord =
        TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
        exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICsclm.clm':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICclmm':
modavg(cand.set, parm, modnames = NULL, second.ord 
        = TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
        exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICcoxme':
modavg(cand.set, parm, modnames = NULL, second.ord
        = TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
        exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICcoxph':
modavg(cand.set, parm, modnames = NULL, second.ord
        = TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
        exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICglm.lm':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        gamdisp = NULL, \dots)
## S3 method for class 'AICgls':
modavg(cand.set, parm, modnames = NULL, second.ord =
           TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
           exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AIClm':
modavg(cand.set, parm, modnames = NULL, second.ord =
        TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
        exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AIClme':
modavg(cand.set, parm, modnames = NULL, second.ord =
        TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
        exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AIClmekin':
modavg(cand.set, parm, modnames = NULL,
         second.ord = TRUE, nobs = NULL, uncond.se = "revised",
         conf.level = 0.95, exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICmaxlikeFit.list':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        \dots)
## S3 method for class 'AICmer':
modavg(cand.set, parm, modnames = NULL, second.ord =
        TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
        exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AIClmerMod':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICglmerMod':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICmultinom.nnet':
modavg(cand.set, parm, modnames = NULL, 
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        \dots)
## S3 method for class 'AICpolr':
modavg(cand.set, parm, modnames = NULL, second.ord
        = TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
        exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICrlm.lm':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICvglm':
modavg(cand.set, parm, modnames = NULL, second.ord
         = TRUE, nobs = NULL, uncond.se = "revised", conf.level = 0.95,
         exclude = NULL, warn = TRUE, c.hat = 1, \dots)
## S3 method for class 'AICzeroinfl':
modavg(cand.set, parm, modnames = NULL,
         second.ord = TRUE, nobs = NULL, uncond.se = "revised",
         conf.level = 0.95, exclude = NULL, warn = TRUE, \dots)
## S3 method for class 'AICunmarkedFitOccu':
modavg(cand.set, parm, modnames = NULL, 
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitColExt':
modavg(cand.set, parm, modnames =
        NULL, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitOccuRN':
modavg(cand.set, parm, modnames =
        NULL, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitPCount':
modavg(cand.set, parm, modnames =
        NULL, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitPCO':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitDS':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitGDS':
modavg(cand.set, parm, modnames = NULL,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitOccuFP':
modavg(cand.set, parm, modnames =
        NULL, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitMPois':
modavg(cand.set, parm, modnames =
        NULL, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitFitGMM':
modavg(cand.set, parm, modnames =
       NULL, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
       conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
       parm.type = NULL, \dots)
## S3 method for class 'AICunmarkedFitFitGPC':
modavg(cand.set, parm, modnames =
        NULL, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, exclude = NULL, warn = TRUE, c.hat = 1,
        parm.type = NULL, \dots)

Arguments

cand.set

a list storing each of the models in the candidate model set.

parm

the parameter of interest, enclosed between quotes, for which a model-averaged estimate is required. For a categorical variable, the label of the estimate must be included as it appears in the output (see 'Details' below).

modnames

a character vector of model names to facilitate the identification of each model in the model selection table. If NULL, the function uses the names in the cand.set list of candidate models. If no names appear in the list, generic names (e.g.,

second.ord

logical. If TRUE, the function returns the second-order Akaike information criterion (i.e., AICc).

nobs

this argument allows to specify a numeric value other than total sample size to compute the AICc (i.e., nobs defaults to total number of observations). This is relevant only for mixed models or various models of unmarkedFit cla

uncond.se

either, "old", or "revised", specifying the equation used to compute the unconditional standard error of a model-averaged estimate. With uncond.se = "old", computations are based on equation 4.9 of Burnham and

conf.level

the confidence level requested for the computation of unconditional confidence intervals.

exclude

this argument excludes models based on the terms specified for the computation of a model-averaged estimate of parm. The exclude argument is set to NULL by default and does not exclude any models other than those w

warn

logical. If TRUE, modavg performs a check and isssues a warning when the value in parm occurs more than once in any given model. This is a check for potential interaction/polynomial terms in the model when such ter

c.hat

value of overdispersion parameter (i.e., variance inflation factor) such as that obtained from c_hat. Note that values of c.hat different from 1 are only appropriate for binomial GLM's with trials > 1 (i.e., success/trial or cbind(success, f

gamdisp

if gamma GLM is used, the dispersion parameter should be specified here to apply the same value to each model.

parm.type

this argument specifies the parameter type on which the effect size will be computed and is only relevant for models of unmarkedFitOccu, unmarkedFitColExt, unmarkedFitOccuFP, unmarkedFitOccuRN,

...

additional arguments passed to the function.

Value

modavg creates an object of class modavg with the following components:
Parameterthe parameter for which a model-averaged estimate was obtained
Mod.avg.tablethe reduced model selection table based on models including the parameter of interest
Mod.avg.betathe model-averaged estimate based on all models including the parameter of interest (see 'Details' above regarding the exclusion of models where parameter of interest is involved in an interaction)
Uncond.SEthe unconditional standard error for the model-averaged estimate (as opposed to the conditional SE based on a single model)
Conf.levelthe confidence level used to compute the confidence interval
Lower.CLthe lower confidence limit
Upper.CLthe upper confidence limit

Details

The parameter for which a model-averaged estimate is requested must be specified with the parm argument and must be identical to its label in the model output (e.g., from summary). For factors, one must specify the name of the variable and the level of interest. modavg includes checks to find variations of interaction terms specified in the parm and exclude arguments. However, to avoid problems, one should specify interaction terms consistently for all models: e.g., either a:b or b:a for all models, but not a mixture of both.

You must exercise caution when some models include interaction or polynomial terms, because main effect terms do not have the same interpretation when they also appear in an interaction/polynomial term in the same model. In such cases, one should exclude models containing interaction terms where the main effect is involved with the exclude argument of modavg. Note that modavg checks for potential cases of multiple instances of a variable appearing more than once in a given model (presumably in an interaction) and issues a warning. To correctly compute the model-averaged estimate of a main effect involved in interaction/polynomial terms, specify the interaction terms(s) that should not appear in the same model with the exclude argument. This will effectively exclude models from the computation of the model-averaged estimate.

When warn = TRUE, modavg looks for matches among the labels of the estimates with identical. It then compares the results to partial matches with regexpr, and issues a warning whenever they are different. As a result, modavg may issue a warning when some variables or levels of categorical variables have nested names (e.g., treat, treat10; L, TL). When this warning is only due to the presence of similarly named variables in the models (and NOT due to interaction terms), you can suppress this warning by setting warn = FALSE.

modavg is implemented for a list containing objects of clm, clmm, clogit, coxme, coxph, glm, gls, lm, lme, lmekin, maxlikeFit, mer, glmerMod, lmerMod, multinom, polr, rlm, vglm, zeroinfl classes as well as various models of unmarkedFit classes.

References

Anderson, D. R. (2008) Model-based Inference in the Life Sciences: a primer on evidence. Springer: New York.

Buckland, S. T., Burnham, K. P., Augustin, N. H. (1997) Model selection: an integral part of inference. Biometrics 53, 603--618.

Burnham, K. P., Anderson, D. R. (2002) Model Selection and Multimodel Inference: a practical information-theoretic approach. Second edition. Springer: New York.

Burnham, K. P., Anderson, D. R. (2004) Multimodel inference: understanding AIC and BIC in model selection. Sociological Methods and Research 33, 261--304.

Dail, D., Madsen, L. (2011) Models for estimating abundance from repeated counts of an open population. Biometrics 67, 577--587.

MacKenzie, D. I., Nichols, J. D., Lachman, G. B., Droege, S., Royle, J. A., Langtimm, C. A. (2002) Estimating site occupancy rates when detection probabilities are less than one. Ecology 83, 2248--2255.

Mazerolle, M. J. (2006) Improving data analysis in herpetology: using Akaike's Information Criterion (AIC) to assess the strength of biological hypotheses. Amphibia-Reptilia 27, 169--180.

Royle, J. A. (2004) N-mixture models for estimating population size from spatially replicated counts. Biometrics 60, 108--115.

Examples

Run this code

##anuran larvae example from Mazerolle (2006)
data(min.trap)
##assign "UPLAND" as the reference level as in Mazerolle (2006)          
min.trap$Type <- relevel(min.trap$Type, ref = "UPLAND") 

##set up candidate models          
Cand.mod <- list( )
##global model          
Cand.mod[[1]] <- glm(Num_anura ~ Type + log.Perimeter + Num_ranatra,
                     family = poisson, offset = log(Effort),
                     data = min.trap) 
Cand.mod[[2]] <- glm(Num_anura ~ Type + log.Perimeter, family = poisson,
                     offset = log(Effort), data = min.trap) 
Cand.mod[[3]] <- glm(Num_anura ~ Type + Num_ranatra, family = poisson,
                     offset = log(Effort), data = min.trap) 
Cand.mod[[4]] <- glm(Num_anura ~ Type, family = poisson,
                     offset = log(Effort), data = min.trap) 
Cand.mod[[5]] <- glm(Num_anura ~ log.Perimeter + Num_ranatra,
                     family = poisson, offset = log(Effort),
                     data = min.trap) 
Cand.mod[[6]] <- glm(Num_anura ~ log.Perimeter, family = poisson,
                     offset = log(Effort), data = min.trap) 
Cand.mod[[7]] <- glm(Num_anura ~ Num_ranatra, family = poisson,
                     offset = log(Effort), data = min.trap) 
Cand.mod[[8]] <- glm(Num_anura ~ 1, family = poisson,
                     offset = log(Effort), data = min.trap) 
          
##check c-hat for global model
c_hat(Cand.mod[[1]]) #uses Pearson's chi-square/df
##note the very low overdispersion: in this case, the analysis could be
##conducted without correcting for c-hat as its value is reasonably close
##to 1  

##assign names to each model
Modnames <- c("type + logperim + invertpred", "type + logperim",
              "type + invertpred", "type", "logperim + invertpred",
              "logperim", "invertpred", "intercept only") 

##compute model-averaged estimate of TypeBOG
modavg(parm = "TypeBOG", cand.set = Cand.mod, modnames = Modnames)
##round to 4 digits after decimal point
print(modavg(parm = "TypeBOG", cand.set = Cand.mod,
             modnames = Modnames), digits = 4)

##compute c-hat estimate based on residual deviance as in Mazerolle
##(2006)
Cand.mod[[1]]$deviance/Cand.mod[[1]]$df.residual

##compute model-averaged estimate of TypeBOG as in Table 4 of
##Mazerolle (2006) 
modavg(parm = "TypeBOG", cand.set = Cand.mod, modnames = Modnames,
       c.hat = 1.11) 



##example with similarly-named variables and interaction terms
set.seed(seed = 4)
resp <- rnorm(n = 40, mean = 3, sd = 1)
size <- rep(c("small", "medsmall", "high", "medhigh"), times = 10)
set.seed(seed = 4)
mass <- rnorm(n = 40, mean = 2, sd = 0.1)
mass2 <- mass^2
age <- rpois(n = 40, lambda = 3.2)
agecorr <- rpois(n = 40, lambda = 2) 
sizecat <- rep(c("a", "ab"), times = 20)
data1 <- data.frame(resp = resp, size = size, sizecat = sizecat,
                    mass = mass, mass2 = mass2, age = age,
                    agecorr = agecorr)

##set up models in list
Cand <- list( )
Cand[[1]] <- lm(resp ~ size + agecorr, data = data1)
Cand[[2]] <- lm(resp ~ size + mass + agecorr, data = data1)
Cand[[3]] <- lm(resp ~ age + mass, data = data1)
Cand[[4]] <- lm(resp ~ age + mass + mass2, data = data1)
Cand[[5]] <- lm(resp ~ mass + mass2 + size, data = data1)
Cand[[6]] <- lm(resp ~ mass + mass2 + sizecat, data = data1)
Cand[[7]] <- lm(resp ~ sizecat, data = data1)
Cand[[8]] <- lm(resp ~ sizecat + mass + sizecat:mass, data = data1)
Cand[[9]] <- lm(resp ~ agecorr + sizecat + mass + sizecat:mass,
                 data = data1) 

##create vector of model names
Modnames <- paste("mod", 1:length(Cand), sep = "")

aictab(cand.set = Cand, modnames = Modnames, sort = TRUE) #correct

##as expected, issues warning as mass occurs sometimes with "mass2" or
##"sizecatab:mass" in some of the models
modavg(cand.set = Cand, parm = "mass", modnames = Modnames)

##no warning issued, because "age" and "agecorr" never appear in same model
modavg(cand.set = Cand, parm = "age", modnames = Modnames)

##as expected, issues warning because warn=FALSE, but it is a very bad
##idea in this example since "mass" occurs with "mass2" and "sizecat:mass"
##in some of the models - results are INCORRECT
modavg(cand.set = Cand, parm = "mass", modnames = Modnames,
                warn = FALSE)

##correctly excludes models with quadratic term and interaction term
##results are CORRECT
modavg(cand.set = Cand, parm = "mass", modnames = Modnames,
       exclude = list("mass2", "sizecat:mass")) 

##correctly computes model-averaged estimate because no other parameter
##occurs simultaneously in any of the models
modavg(cand.set = Cand, parm = "sizesmall", modnames = Modnames) #correct

##as expected, issues a warning because "sizecatab" occurs sometimes in
##an interaction in some models
modavg(cand.set = Cand, parm = "sizecatab",
                modnames = Modnames)

##exclude models with "sizecat:mass" interaction - results are CORRECT
modavg(cand.set = Cand, parm = "sizecatab", modnames = Modnames,
       exclude = list("sizecat:mass"))



##example with generalized linear mixed model
##modified example from ?glmer
require(lme4)
##create proportion of incidence
cbpp$prop <- cbpp$incidence/cbpp$size
##add bogus variable
cbpp$randx <- rnorm(n = nrow(cbpp), mean = 12, sd = 3)

##model with period
gm1 <- glmer(prop ~ period + (1 | herd), family = binomial,
             weights = size, data = cbpp)
gm2 <- glmer(prop ~ period + randx + (1 | herd), family = binomial,
             weights = size, data = cbpp)
##model without period
gm3 <- glmer(prop ~ 1 + (1 | herd), family = binomial,
             weights = size, data = cbpp)
Cands <- list(gm1, gm2, gm3)
Modnames <- c("period", "period + randx", "null")

##model selection
aictab(cand.set = Cands, modnames = Modnames)
##model average for difference between period 1 vs period 4
modavg(cand.set = Cands, modnames = Modnames, parm = "period4")
detach(package:lme4)


##modified example of Cox regression from ?coxph
require(survival)
##Create a simple data set for a time-dependent model
test2 <- list(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8),
              stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17),
              event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0),
              x    =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0),
              height = c(12.3, 10.5, 9.2, 5.6, 8.9,
                11.0, 16.1, 10.2, 9.9, 14.8))
m.cox <- coxph(Surv(start, stop, event) ~ x, test2)
m.cox2 <- coxph(Surv(start, stop, event) ~ x + height, test2)
Cands <- list(m.cox, m.cox2)
Mods <- c("x", "additive")
aictab(Cands, Mods)
modavg(Cands, Mods, parm = "x")



##example with multiple-season occupancy model modified from ?colext
##this is a bit longer
require(unmarked)
data(frogs)
umf <- formatMult(masspcru)
obsCovs(umf) <- scale(obsCovs(umf))
siteCovs(umf) <- rnorm(numSites(umf))
yearlySiteCovs(umf) <- data.frame(year = factor(rep(1:7,
                                    numSites(umf))))

##set up model with constant transition rates
fm <- colext(psiformula = ~ 1, gammaformula = ~ 1, epsilonformula = ~ 1,
             pformula = ~ JulianDate + I(JulianDate^2), data = umf,
             control = list(trace=1, maxit=1e4))

##model with with year-dependent transition rates
fm.yearly <- colext(psiformula = ~ 1, gammaformula = ~ year,
                    epsilonformula = ~ year,
                    pformula = ~ JulianDate + I(JulianDate^2),
                    data = umf)

##store in list and assign model names
Cand.mods <- list(fm, fm.yearly)
Modnames <- c("psi1(.)gam(.)eps(.)p(Date + Date2)",
              "psi1(.)gam(Year)eps(Year)p(Date + Date2)")

##compute model-averaged estimate of occupancy in the first year
modavg(cand.set = Cand.mods, modnames = Modnames, parm = "(Intercept)",
       parm.type = "psi")

##compute model-averaged estimate of Julian Day squared on detectability
modavg(cand.set = Cand.mods, modnames = Modnames,
       parm = "I(JulianDate^2)", parm.type = "detect")


##example of model-averaged estimate of area from distance model
##this is a bit longer
data(linetran) #example modified from ?distsamp
     
ltUMF <- with(linetran, {
  unmarkedFrameDS(y = cbind(dc1, dc2, dc3, dc4),
                  siteCovs = data.frame(Length, area, habitat),
                  dist.breaks = c(0, 5, 10, 15, 20),
                  tlength = linetran$Length * 1000, survey = "line", unitsIn = "m")
})
     
## Half-normal detection function. Density output (log scale). No covariates.
fm1 <- distsamp(~ 1 ~ 1, ltUMF)
     
## Halfnormal. Covariates affecting both density and detection.
fm2 <- distsamp(~ area + habitat ~ area + habitat, ltUMF)

## Hazard function. Covariates affecting both density and detection.
fm3 <- distsamp(~ habitat ~ area + habitat, ltUMF, keyfun="hazard")

##assemble model list
Cands <- list(fm1, fm2, fm3)
Modnames <- paste("mod", 1:length(Cands), sep = "")

##model-average estimate of area on abundance
modavg(cand.set = Cands, modnames = Modnames, parm = "area", parm.type = "lambda")
detach(package:unmarked)

Run the code above in your browser using DataLab