# NOT RUN {
# }
# NOT RUN {
#------------------------------------------------------------------------------
# Example on modified 'mammalsleep' data set from mice, that has identical
# missing data patterns on the column tuples ('ps','sws') and ('mls','gt')
#------------------------------------------------------------------------------
# run mice on data set 'mammal_data' and obtain a mids object to post-process
mids_mammal <- mice(mammal_data)
# run function, as blocks have not been specified, it will automatically detect
# the column tuples with identical missing data patterns and then impute on
# these
post_mammal <- mice.post.matching(mids_mammal)
# read out which column tuples have been imputed on
post_mammal$blocks
# look into imputations within resulting mice::mids object
post_mammal$midsobj$imp
#------------------------------------------------------------------------------
# Example on original 'mammalsleep' data set from mice, in which we
# want to post-process the imputations in column 'sws' by only imputing values
# from rows whose value in 'pi' matches the value of 'pi' in the row we impute
# on.
#------------------------------------------------------------------------------
# run mice on data set 'mammal_data' and obtain a mids object to post-process
mids_mammal <- mice(mammalsleep)
# run function, specify 'sws' as the column to impute on, and specify 'pi' as
# the observed variable to consider in the matching
post_mammal <- mice.post.matching(mids_mammal, blocks = "sws", match_vars = "pi")
# look into imputations within resulting mice::mids object
post_mammal$midsobj$imp
#------------------------------------------------------------------------------
# Examples illustrating the combined usage of blocks and weights, relating to
# the examples in the input format section above. As before, we want to impute
# on the column tuples ('ps','sws') and ('mls','gt') from mammal_data, while
# this time assigning weights to the first block, in which 'ps' gets a 1.5 times
# higher weight than 'sws'. The second tuple is not weighted.
#------------------------------------------------------------------------------
# run mice() first
mids_mammal <- mice(mammal_data)
## Now there are five options to specify the blocks and weights:
# First option: specify blocks and weights in list format
post_mammal <- mice.post.matching(obj = mids_mammal,
blocks = list(c("sws","ps"), c("mls","gt")),
weights = list(c(3,2), NULL))
# or equivalently, using colums indices:
post_mammal <- mice.post.matching(obj = mids_mammal,
blocks = list(c(4,5), c(7,8)),
weights = list(c(3,2), NULL))
# Second option: specify blocks and weights in vector format
post_mammal <- mice.post.matching(obj = mids_mammal,
blocks = c(0,0,0,1,1,0,2,2,0,0,0),
weights = c(1,1,1,3,2,1,1,1,1,1,1))
# Third option: specify blocks in list format and weights in vector format
post_mammal <- mice.post.matching(obj = mids_mammal,
blocks = list(c("sws","ps"), c("mls","gt")),
weights = c(1,1,1,3,2,1,1,1,1,1,1))
# Fourth option: specify blocks in vector format and weights in list format.
# Note that the block number determines which tuple in the weights list it
# corresponds to, and within each tuple in the list the weight correspondence is
# determinded by left to right order of the data columns
post_mammal <- mice.post.matching(obj = mids_mammal,
blocks = c(0,0,0,1,1,0,2,2,0,0,0),
weights = list(c(3,2), NULL))
# Fifth option: only specify weights in vector format. If the user knows
# beforehand that at least the column tuple he wants to impute and use weights
# on have the same missing value patterns, he can assign weights to these using
# the vector format, while letting mice.post.matching() find all other blocks
# with identical missing value patterns - possibly even more than just
# ('ps','sws') and ('mls','gt')
post_mammal <- mice.post.matching(obj = mids_mammal,
weights = c(1,1,1,3,2,1,1,1,1,1,1))
#------------------------------------------------------------------------------
# Example that illustrates the combined functionalities of mice.binarize(),
# mice.factorize() and mice.post.matching() on the data set 'boys_data', which
# contains the column blocks ('hgt','bmi') and ('hc','gen','phb') that have
# identical missing value patterns, and out of which the columns 'gen' and
# 'phb' are factors. We are going to impute both tuples blockwise, while
# binarizing the factor columns first. Note that we never need to specify any
# blocks or columns to binarize, as these are all determined automatically
#------------------------------------------------------------------------------
# By default, mice.binarize() expands all factor columns that contain NAs,
# so the columns 'gen' and 'phb' are automatically binarized
boys_bin <- mice.binarize(boys_data)
# Run mice on binarized data, note that we need to use boys_bin$data to grab
# the actual binarized data and that we use the output predictor matrix
# boys_bin$pred_matrix which is recommended for obtaining better imputation
# models
mids_boys <- mice(boys_bin$data, predictorMatrix = boys_bin$pred_matrix)
# It is very likely that mice imputed multiple ones among one set of dummy
# variables, so we need to post-process. As recommended, we also use the output
# weights from mice.binarize(), which yield a more balanced weighting on the
# column tuple ('hc','gen','phb') within the matching. As in previous examples,
# both tuples are automatically discovered and imputed on
post_boys <- mice.post.matching(mids_boys, weights = boys_bin$weights)
# Now we can safely retransform to the original data, with non-binarized
# imputations
res_boys <- mice.factorize(post_boys$midsobj, boys_bin$par_list)
# Analyze the distribution of imputed variables, e.g. of the column 'gen',
# using the mice version of with()
with(res_boys, table(gen))
#------------------------------------------------------------------------------
# Similar example to the previous, that also works on 'boys_data' and
# illustrates some more advanced funtionalities of all three functions in miceExt:
# This time we only want to post-process the column block ('gen','phb'), while
# weighting the first of these tuples twice as much as the second. Within the
# matching, we want to avoid matrix computations by using the euclidian distance
# to determine the donor pool, and we want to draw from three donors only.
#------------------------------------------------------------------------------
# Binarize first, we specify blocks in list format with a single block, so we
# can omit an enclosing list. Similarly, we also specify weights in list format.
# Both blocks and weights will be expanded and can be accessed from the output
# to use them in mice.post.matching() later
boys_bin <- mice.binarize(boys_data,
blocks = c("gen", "phb"),
weights = c(2,1))
# Run mice on binarized data, again use the output predictor matrix from
# mice.binarize()
mids_boys <- mice(boys_bin$data, predictorMatrix = boys_bin$pred_matrix)
# Post-process the binarized columns. We use blocks and weights from the previous
# output, and set 'distmetric' and 'donors' as announced in the example
# description
post_boys <- mice.post.matching(mids_boys,
blocks = boys_bin$blocks,
weights = boys_bin$weights,
distmetric = "euclidian",
donors = 3L)
# Finally, we can retransform to the original format
res_boys <- mice.factorize(post_boys$midsobj, boys_bin$par_list)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab