Functions to estimate (generalized) linear and (generalized) linear mixed models, ordinal and ordinal mixed models, and parametric (Weibull) as well as Cox proportional hazards survival models using MCMC sampling, while imputing missing values.
lm_imp(
formula,
data,
n.chains = 3,
n.adapt = 100,
n.iter = 0,
thin = 1,
monitor_params = NULL,
auxvars = NULL,
refcats = NULL,
models = NULL,
no_model = NULL,
trunc = NULL,
ridge = FALSE,
ppc = TRUE,
seed = NULL,
inits = NULL,
parallel = FALSE,
n.cores = NULL,
scale_vars = NULL,
scale_pars = NULL,
hyperpars = NULL,
modelname = NULL,
modeldir = NULL,
keep_model = FALSE,
overwrite = NULL,
quiet = TRUE,
progress.bar = "text",
warn = TRUE,
mess = TRUE,
keep_scaled_mcmc = FALSE,
...
)glm_imp(
formula,
family,
data,
n.chains = 3,
n.adapt = 100,
n.iter = 0,
thin = 1,
monitor_params = NULL,
auxvars = NULL,
refcats = NULL,
models = NULL,
no_model = NULL,
trunc = NULL,
ridge = FALSE,
ppc = TRUE,
seed = NULL,
inits = NULL,
parallel = FALSE,
n.cores = NULL,
scale_vars = NULL,
scale_pars = NULL,
hyperpars = NULL,
modelname = NULL,
modeldir = NULL,
keep_model = FALSE,
overwrite = NULL,
quiet = TRUE,
progress.bar = "text",
warn = TRUE,
mess = TRUE,
keep_scaled_mcmc = FALSE,
...
)
clm_imp(
fixed,
data,
n.chains = 3,
n.adapt = 100,
n.iter = 0,
thin = 1,
monitor_params = NULL,
auxvars = NULL,
refcats = NULL,
models = NULL,
no_model = NULL,
trunc = NULL,
ridge = FALSE,
ppc = TRUE,
seed = NULL,
inits = NULL,
parallel = FALSE,
n.cores = NULL,
scale_vars = NULL,
scale_pars = NULL,
hyperpars = NULL,
modelname = NULL,
modeldir = NULL,
keep_model = FALSE,
overwrite = NULL,
quiet = TRUE,
progress.bar = "text",
warn = TRUE,
mess = TRUE,
keep_scaled_mcmc = FALSE,
...
)
lme_imp(
fixed,
data,
random,
n.chains = 3,
n.adapt = 100,
n.iter = 0,
thin = 1,
monitor_params = NULL,
auxvars = NULL,
refcats = NULL,
models = NULL,
no_model = NULL,
trunc = NULL,
ridge = FALSE,
ppc = TRUE,
seed = NULL,
inits = NULL,
parallel = FALSE,
n.cores = NULL,
scale_vars = NULL,
scale_pars = NULL,
hyperpars = NULL,
modelname = NULL,
modeldir = NULL,
keep_model = FALSE,
overwrite = NULL,
quiet = TRUE,
progress.bar = "text",
warn = TRUE,
mess = TRUE,
keep_scaled_mcmc = FALSE,
...
)
glme_imp(
fixed,
data,
random,
family,
n.chains = 3,
n.adapt = 100,
n.iter = 0,
thin = 1,
monitor_params = NULL,
auxvars = NULL,
refcats = NULL,
models = NULL,
no_model = NULL,
trunc = NULL,
ridge = FALSE,
ppc = TRUE,
seed = NULL,
inits = NULL,
parallel = FALSE,
n.cores = NULL,
scale_vars = NULL,
scale_pars = NULL,
hyperpars = NULL,
modelname = NULL,
modeldir = NULL,
keep_model = FALSE,
overwrite = NULL,
quiet = TRUE,
progress.bar = "text",
warn = TRUE,
mess = TRUE,
keep_scaled_mcmc = FALSE,
...
)
clmm_imp(
fixed,
data,
random,
n.chains = 3,
n.adapt = 100,
n.iter = 0,
thin = 1,
monitor_params = NULL,
auxvars = NULL,
refcats = NULL,
models = NULL,
no_model = NULL,
trunc = NULL,
ridge = FALSE,
ppc = TRUE,
seed = NULL,
inits = NULL,
parallel = FALSE,
n.cores = NULL,
scale_vars = NULL,
scale_pars = NULL,
hyperpars = NULL,
modelname = NULL,
modeldir = NULL,
keep_model = FALSE,
overwrite = NULL,
quiet = TRUE,
progress.bar = "text",
warn = TRUE,
mess = TRUE,
keep_scaled_mcmc = FALSE,
...
)
survreg_imp(
formula,
data,
n.chains = 3,
n.adapt = 100,
n.iter = 0,
thin = 1,
monitor_params = NULL,
auxvars = NULL,
refcats = NULL,
models = NULL,
no_model = NULL,
trunc = NULL,
ridge = FALSE,
ppc = TRUE,
seed = NULL,
inits = NULL,
parallel = FALSE,
n.cores = NULL,
scale_vars = NULL,
scale_pars = NULL,
hyperpars = NULL,
modelname = NULL,
modeldir = NULL,
keep_model = FALSE,
overwrite = NULL,
quiet = TRUE,
progress.bar = "text",
warn = TRUE,
mess = TRUE,
keep_scaled_mcmc = FALSE,
...
)
coxph_imp(
formula,
data,
n.chains = 3,
n.adapt = 100,
n.iter = 0,
thin = 1,
monitor_params = NULL,
auxvars = NULL,
refcats = NULL,
models = NULL,
no_model = NULL,
trunc = NULL,
ridge = FALSE,
ppc = TRUE,
seed = NULL,
inits = NULL,
parallel = FALSE,
n.cores = NULL,
scale_vars = NULL,
scale_pars = NULL,
hyperpars = NULL,
modelname = NULL,
modeldir = NULL,
keep_model = FALSE,
overwrite = NULL,
quiet = TRUE,
progress.bar = "text",
warn = TRUE,
mess = TRUE,
keep_scaled_mcmc = FALSE,
...
)
a two sided model formula (see formula
)
a data.frame
the number of MCMC chains to be used
the number of iterations for adaptation of the MCMC samplers
(see also adapt
)
the number of iterations of the MCMC chain (after adaptation;
see also coda.samples
)
thinning interval (see window.mcmc
)
named vector specifying which parameters should be monitored (see details)
optional one-sided formula of variables that should be used as predictors in the imputation procedure (and will be imputed if necessary) but are not part of the analysis model
optional; either one of "first"
, "last"
, "largest"
(which sets the category for all categorical variables)
or a named list specifying which category should be
used as reference category for each of the categorical variables.
Options are the category label, the category number, or one of
"first" (the first category), "last" (the last category)
or "largest" (chooses the category with the most observations).
Default is "first". (See also set_refcat
)
optional named vector specifying the types of models for
(incomplete) covariates.
This arguments replaces the argument meth
used in earlier versions.
If NULL
(default) models will be determined
automatically based on the class of the respective columns of data
.
names of variables for which no model should be specified. Note that this is only possible for completely observed variables and implies the assumptions of independence between the excluded variable and the incomplete variables.
optional named list specifying the limits of truncation for the distribution of the named incomplete variables (see the vignette ModelSpecification)
logical; should the parameters of the main model be penalized using ridge regression? Default is FALSE
logical: should monitors for posterior predictive checks be set? (not yet used)
optional seed value for reproducibility
optional specification of initial values in the form of a list
or a function (see jags.model
).
If omitted, initial values will be generated automatically by JAGS.
It is an error to supply an initial value for an observed node.
logical; should the chains be sampled using parallel computation? Default is FALSE
number of cores to use for parallel computation; if left empty all except two cores will be used
optional; named vector of (continuous) variables that will
be scaled (such that mean = 0 and sd = 1) to improve
convergence of the MCMC sampling. Default is that all
continuous variables that are not transformed by a function
(e.g. log(), ns()
) will be scaled. Variables
for which a log-normal model is used are
only scaled with regards to the standard deviation, but not
centered. Variables modeled with a Gamma or beta distribution
are not scaled.
If set to FALSE
no scaling will be done.
optional matrix of parameters used for centering and
scaling of continuous covariates. If not specified, this will
be calculated automatically. If FALSE
, no scaling
will be done.
list of hyperparameters, as obtained by default_hyperpars()
;
only needs to be supplied if hyperparameters other than the
default should be used
optional; character string specifying the name of the model file (including the ending, either .R or .txt). If unspecified a random name will be generated.
optional; directory containing the model file or directory in which the model file should be written. If unspecified a temporary directory will be created.
logical; whether the created JAGS model should be saved
or removed from the disk (FALSE
; default) when the
sampling has finished.
logical; whether an existing model file with the specified
<modeldir>/<modelname>
should be overwritten. If set to
FALSE
and a model already exists, that model will be used.
If unspecified (NULL
) and a file exists, the user is
asked for input on how to proceed.
if TRUE
then messages generated during compilation
will be suppressed, as well as the progress bar during adaptation
(see jags.model
)
character string specifying the type of progress bar.
Possible values are "text", "gui", and "none" (see
update
). Note: when sampling is performed
in parallel it is currently not possible to display a
progress bar.
logical; should warnings be given? Default is
TRUE
. (Note: this applies only to warnings
given directly by JointAI.)
logical; should messages be given? Default is
TRUE
. (Note: this applies only to messages
given directly by JointAI.)
should the "original" MCMC sample
(i.e., the scaled version returned by coda.samples()
) be kept?
(The MCMC sample that is re-scaled to the scale of the
data is always kept.)
additional, optional arguments
only for glm_imp
and glmm_imp
:
a description of the distribution and link function to
be used in the model. This can be a character string naming a
family function, a family function or the result of a call to
a family function. (See family
and the
`Details` section below.)
a two sided formula describing the fixed-effects part of the
model (see formula
)
only for multi-level models:
a one-sided formula of the form ~x1 + ... + xn | g
,
where x1 + ... + xn
specifies the model for the random
effects and g
the grouping variable
An object of class JointAI.
See also the vignettes Model Specification, MCMC Settings and Parameter Selection.
glm_imp()
and glme_imp()
gaussian |
with links: identity , log |
binomial |
with links: logit , probit , log , cloglog |
Gamma |
with links: inverse , identity , log |
Implemented imputation models that can be chosen in the argument models
are:
norm |
linear model |
lognorm |
log-normal model for skewed continuous data |
gamma |
gamma model (with log-link) for skewed continuous data |
beta |
beta model (with logit-link) for skewed continuous data in (0, 1) |
logit |
logistic model for binary data |
multilogit |
multinomial logit model for unordered categorical variables |
cumlogit |
cumulative logit model for ordered categorical variables |
lmm |
linear mixed model for continuous longitudinal covariates |
glmm_lognorm |
log-normal mixed model for skewed longitudinal covariates |
glmm_gamma |
Gamma mixed model for skewed longitudinal covariates |
glmm_logit |
logit mixed model for binary longitudinal covariates |
glmm_poisson |
Poisson mixed model for longitudinal count covariates |
monitor_params
)See also the vignette: Parameter Selection
Named vector specifying which parameters should be monitored. This can be done
either directly by specifying the name of the parameter or indirectly by one
of the key words selecting a set of parameters. Except for other
,
in which parameter names are specified directly, parameter (groups) are just
set as TRUE
or FALSE
.
If left unspecified, monitor_params = c("analysis_main" = TRUE)
will be used.
name/key word | what is monitored |
analysis_main |
betas and sigma_y (and D in multi-level models) |
analysis_random |
ranef , D , invD , RinvD |
imp_pars |
alphas , tau_imp , gamma_imp , delta_imp |
imps |
imputed values |
betas |
regression coefficients of the analysis model |
tau_y |
precision of the residuals from the analysis model |
sigma_y |
standard deviation of the residuals from the analysis model |
ranef |
random effects b |
D |
covariance matrix of the random effects |
invD |
inverse of D |
RinvD |
matrix in the prior for invD |
alphas |
regression coefficients in the covariate models |
tau_imp |
precision parameters of the residuals from covariate models |
gamma_imp |
intercepts in ordinal covariate models |
delta_imp |
increments of ordinal intercepts |
monitor_params = c(analysis_main = TRUE, tau_y = TRUE, sigma_y = FALSE)
would monitor the regression parameters betas
and the
residual precision tau_y
instead of the residual standard
deviation sigma_y
.monitor_params = c(imps = TRUE)
would monitor betas
, tau_y
,
and sigma_y
(because analysis_main = TRUE
by default) as well as
the imputed values.
set_refcat
, get_models
,
traceplot
, densplot
,
summary.JointAI
, MC_error
,
GR_crit
,
predict.JointAI
, add_samples
,
JointAIObject
, add_samples
,
parameters
, list_models
Vignettes
# NOT RUN {
# Example 1: Linear regression with incomplete covariates
mod1 <- lm_imp(y ~ C1 + C2 + M1 + B1, data = wideDF, n.iter = 100)
# Example 2: Logistic regression with incomplete covariats
mod2 <- glm_imp(B1 ~ C1 + C2 + M1, data = wideDF,
family = binomial(link = "logit"), n.iter = 100)
# Example 3: Linear mixed model with incomplete covariates
mod3 <- lme_imp(y ~ C1 + B2 + c1 + time, random = ~ time|id,
data = longDF, n.iter = 300)
# }
Run the code above in your browser using DataLab