library(MASS)
# Number of subjects
n <- 700
# Number of total covariates
p <- 40
# Number of missing groups of subjects
ngroup <- 4
# Number of data sources
nsource <- 4
# Starting indexes of covariates in data sources
cov_index=c(1, 13, 25, 37)
# Starting indexes of subjects in missing groups
sub_index=c(1, 31, 251, 471)
# Indexes of missing data sources in missing groups, respectively ('NULL' represents no missing)
miss_source=list(NULL, 3, 2, 1)
# Create a design matrix
set.seed(1)
sigma=diag(1-0.4,p,p)+matrix(0.4,p,p)
X <- mvrnorm(n,rep(0,p),sigma)
# Introduce some block-wise missing
for (i in 1:ngroup) {
if (!is.null(miss_source[[i]])) {
if (i==ngroup) {
if (miss_source[[i]]==nsource) {
X[sub_index[i]:n, cov_index[miss_source[[i]]]:p] = NA
} else {
X[sub_index[i]:n, cov_index[miss_source[[i]]]:(cov_index[miss_source[[i]]+1]-1)] = NA
}
} else {
if (miss_source[[i]]==nsource) {
X[sub_index[i]:(sub_index[i+1]-1), cov_index[miss_source[[i]]]:p] = NA
} else {
X[sub_index[i]:(sub_index[i+1]-1), cov_index[miss_source[[i]]]:
(cov_index[miss_source[[i]]+1]-1)] = NA
}
}
}
}
# Define missing data pattern
miss <- is.na(X)
# Choose response and predictor variables
ind_y <- 25:36
ind_x <- 13:24
# Data that need imputation
newdata <- X[31:250,]
# Use the function
result <- imputeglm.predict(X = X, ind_y = ind_y, ind_x = ind_x, miss = miss, newdata = newdata)
Run the code above in your browser using DataLab