data.graham: Datasets from Grahams Missing Data Book

Description

Datasets from Grahams missing data book (2012).

Usage

data(data.graham.ex3)
data(data.graham.ex6)
data(data.graham.ex8a)
data(data.graham.ex8b)
data(data.graham.ex8c)

Arguments

format

Datasetdata.graham.ex3:'data.frame': 2756 obs. of 20 variables: $ school : int 1 1 1 1 1 1 1 1 1 1 ... $ alc7 : int 1 1 1 7 3 6 1 5 4 3 ... $ rskreb71: int 1 3 1 2 1 NA 1 2 1 2 ... $ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ... $ rskreb73: int NA NA NA NA NA NA NA 2 1 2 ... $ rskreb74: int NA NA NA NA NA NA NA 3 2 4 ... $ likepa71: int 4 2 3 3 2 NA 1 4 3 3 ... $ likepa72: int 5 2 4 2 2 NA 5 3 3 2 ... $ likepa73: int 4 1 3 3 2 NA 1 3 2 3 ... $ likepa74: int 5 3 1 5 4 4 3 4 3 2 ... $ likepa75: int 4 4 4 4 3 3 4 4 3 3 ... $ posatt71: int 1 1 1 1 1 2 1 NA NA NA ... $ posatt72: int 1 2 1 1 1 2 4 NA NA NA ... $ posatt73: int 1 1 1 1 1 2 1 NA NA NA ... $ alc8 : int 1 8 4 8 5 7 1 3 5 3 ... $ rskreb81: int 1 4 1 2 2 3 2 3 1 4 ... $ rskreb82: int NA NA NA NA NA NA NA 3 1 4 ... $ rskreb83: int NA NA NA NA NA NA NA 2 1 2 ... $ rskreb84: int NA NA NA NA NA NA NA 3 2 4 ... $ alc9 : int 3 NA 7 NA 5 7 NA 6 6 7 ...
Datasetdata.graham.ex6:'data.frame': 2756 obs. of 9 variables: $ school : int 1 1 1 1 1 1 1 1 1 1 ... $ program : int 0 0 0 0 0 0 0 0 0 0 ... $ alc7 : int 1 1 1 7 3 6 1 5 4 3 ... $ riskreb7: int 1 3 1 2 1 NA 1 2 1 2 ... $ likepar7: int 4 2 3 3 2 NA 1 4 3 3 ... $ posatt7 : int 1 1 1 1 1 2 1 NA NA NA ... $ alc8 : int 1 8 4 8 5 7 1 3 5 3 ... $ riskreb8: int 1 4 1 2 2 3 2 3 1 4 ... $ alc9 : int 3 NA 7 NA 5 7 NA 6 6 7 ...
Datasetdata.graham.ex8a:'data.frame': 1023 obs. of 20 variables: $ skill1 : int 28 29 27 29 29 NA NA NA 29 NA ... $ skill2 : int NA NA 29 29 NA NA NA NA NA 21 ... $ skill3 : int NA NA 29 29 29 NA 28 10 29 25 ... $ skill4 : int NA 29 25 29 29 28 29 NA NA NA ... $ skill5 : int 29 29 28 28 29 NA 29 10 NA 25 ... $ iplanV1: int 14 18 15 17 16 NA NA NA 18 NA ... $ iplanV2: int NA NA 17 16 NA NA NA NA NA 16 ... $ iplanV3: int NA NA 16 18 18 NA 17 1 18 16 ... $ iplanV4: int NA 18 14 18 14 6 18 NA NA NA ... $ iplanV5: int 13 18 12 18 18 NA 18 3 NA 5 ... $ planA1 : int 1 0 2 8 3 NA NA NA 7 NA ... $ planA2 : int NA NA 0 4 NA NA NA NA NA 6 ... $ planA3 : int NA NA 1 4 7 NA 2 0 1 7 ... $ planA4 : int NA 8 0 4 6 0 0 NA NA NA ... $ planA5 : int 0 7 1 5 7 NA 2 0 NA 6 ... $ planV1 : int NA NA NA NA NA NA NA NA NA NA ... $ planV2 : int NA NA NA NA NA NA NA NA NA 1 ... $ planV3 : int NA NA 1 NA NA NA NA 0 NA 1 ... $ planV4 : int NA NA NA NA 2 NA NA NA NA NA ... $ planV5 : int 2 NA 2 NA NA NA NA 0 NA NA ...
Datasetdata.graham.ex8b:'data.frame': 2570 obs. of 6 variables: $ rskreb71: int 1 3 1 2 1 NA 1 2 1 2 ... $ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ... $ posatt71: int 1 1 1 1 1 2 1 NA NA NA ... $ posatt72: int 1 2 1 1 1 2 4 NA NA NA ... $ posatt73: int 1 1 1 1 1 2 1 NA NA NA ... $ posatt : int 3 4 3 3 3 6 6 NA NA NA ...
Datasetdata.graham.ex8c:'data.frame': 2756 obs. of 16 variables: $ s1 : int 1 1 1 1 1 1 1 1 1 1 ... $ s2 : int 0 0 0 0 0 0 0 0 0 0 ... $ s3 : int 0 0 0 0 0 0 0 0 0 0 ... $ s4 : int 0 0 0 0 0 0 0 0 0 0 ... $ s5 : int 0 0 0 0 0 0 0 0 0 0 ... $ s6 : int 0 0 0 0 0 0 0 0 0 0 ... $ s7 : int 0 0 0 0 0 0 0 0 0 0 ... $ s8 : int 0 0 0 0 0 0 0 0 0 0 ... $ s9 : int 0 0 0 0 0 0 0 0 0 0 ... $ s10 : int 0 0 0 0 0 0 0 0 0 0 ... $ s11 : int 0 0 0 0 0 0 0 0 0 0 ... $ xalc7 : int 1 1 1 7 3 6 1 5 4 3 ... $ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ... $ likepa71: int 4 2 3 3 2 NA 1 4 3 3 ... $ posatt71: int 1 1 1 1 1 2 1 NA NA NA ... $ alc8 : int 1 8 4 8 5 7 1 3 5 3 ...

source

The datasets were downloaded from http://methodology.psu.edu/pubs/books/missing.

References

Graham, J. W. (2012). Missing data. New York: Springer.

Examples

Run this code

library(mitools)
library(mice)
library(Amelia)
library(jomo)

#############################################################################
# EXAMPLE 1: data.graham.8a | Imputation under multivariate normal model
#############################################################################

data(data.graham.ex8a)
dat <- data.graham.ex8a
dat <- dat[,1:10]
vars <- colnames(dat)
V <- length(vars)
# remove persons with completely missing data
dat <- dat[ rowMeans( is.na(dat) ) < 1 , ] 
summary(dat)

# some descriptive statistics
psych::describe(dat)

#**************
# imputation under a multivariate normal model
M <- 7  # number of imputations

#--------- mice package
# define imputation method
impM <- rep("norm" , V)
names(impM) <- vars
# mice imputation
imp1a <- mice::mice( dat , imputationMethod=impM , m=M , maxit=4 )
summary(imp1a)
# convert into a list of datasets
datlist1a <- miceadds::mids2datlist(imp1a)

#--------- Amelia package
imp1b <- Amelia::amelia( dat , m=M )
summary(imp1b)
datlist1b <- imp1b$imputations

#--------- jomo package
imp1c <- jomo::jomo1con(Y = dat , nburn=100, nbetween=10, nimp=M)
str(imp1c)
# convert into a list of datasets
datlist1c <- miceadds::jomo2datlist(imp1c)

#############################################################################
# EXAMPLE 2: data.graham.8b | Imputation with categorical variables
#############################################################################

data(data.graham.ex8b)
dat <- data.graham.ex8b
vars <- colnames(dat)
V <- length(vars)

# descriptive statistics
psych::describe(dat)

#*******************************
# imputation in mice using predictive mean matching
imp1a <- mice( dat , m=5 , maxit=10)
datlist1a <- mitools::imputationList( miceadds::mids2datlist(imp1a) )
print(datlist1a)

#*******************************
# imputation in jomo treating all variables as categorical

# Note that variables must have values from 1 to N
# use categorize function from sirt package here
dat.categ <- sirt::categorize( dat , categorical=colnames(dat) , lowest=1 ) 
dat0 <- dat.categ$data

# imputation in jomo treating all variables as categorical
Y_numcat <- apply( dat0 , 2 , max , na.rm=TRUE )
imp1b <- jomo::jomo1cat(Y_cat = dat0, Y_numcat = Y_numcat, nburn=100, 
                 nbetween=10, nimp=5)

# recode original categories
datlist1b <- sirt::decategorize( imp1b , categ_design = dat.categ$categ_design )
# convert into a list of datasets
datlist1b <- miceadds::jomo2datlist(datlist1b)
datlist1b <- mitools::imputationList( datlist1b )

#****************************
# compare frequency tables for both imputation packages
fun_prop <- function( variable ){
            t1 <- table(variable) 
            t1 / sum(t1) 
                }
                
# variable rskreb71
res1a <-  with( datlist1a , fun_prop(rskreb71) )
res1b <-  with( datlist1b , fun_prop(rskreb71) )
summary( miceadds::NMIcombine(qhat = res1a , NMI = FALSE ) )
summary( miceadds::NMIcombine(qhat = res1b , NMI = FALSE ) )

# variable posatt
res2a <-  with( datlist1a , fun_prop(posatt) )
res2b <-  with( datlist1b , fun_prop(posatt) )
summary( miceadds::NMIcombine(qhat = res2a , NMI = FALSE ) )
summary( miceadds::NMIcombine(qhat = res2b , NMI = FALSE ) )

Run the code above in your browser using DataLab