# The data sets were generated as follows
lapply(c("magrittr", "dplyr"), library, character.only = TRUE)
set.seed(1234)
# Simulate training data
nClusters = 10
p = 5
Uj = scale(rnorm(nClusters))
nPop = 1e6
nSample = 1e3
nTest = 1e3
X = replicate(p, rnorm(nPop))
Beta = rnorm(p)
cluster = sample(seq_len(nClusters), nPop, TRUE)
table(cluster)
eta = X %*% Beta + Uj[match(cluster, seq_len(nClusters))]
y = rbinom(nPop, 1, binomial()$linkinv(eta))
Dt = data.frame(y, X, cluster)
colnames(Dt) %<>% tolower
clustertraindata = Dt %>%
filter(cluster %in% 1:5) %>%
group_by(cluster) %>%
sample_n(size = nSample) %>%
as.data.frame
clustertestdata = Dt %>%
filter(cluster %in% 6:10) %>%
group_by(cluster) %>%
sample_n(size = nTest) %>%
as.data.frame
Run the code above in your browser using DataLab