if (FALSE) {
##
## 1. Area-level model with binary response
##
data(DemoData2)
data(DemoMap2)
fit0 <- smoothSurvey(data=DemoData2,
Amat=DemoMap2$Amat, response.type="binary",
responseVar="tobacco.use", strataVar="strata",
weightVar="weights", regionVar="region",
clusterVar = "~clustid+id", CI = 0.95)
summary(fit0)
# if only direct estimates without smoothing is of interest
fit0.dir <- smoothSurvey(data=DemoData2,
Amat=DemoMap2$Amat, response.type="binary",
responseVar="tobacco.use", strataVar="strata",
weightVar="weights", regionVar="region",
clusterVar = "~clustid+id", CI = 0.95, smooth = FALSE)
# posterior draws can be returned with save.draws = TRUE
fit0.draws <- smoothSurvey(data=DemoData2,
Amat=DemoMap2$Amat, response.type="binary",
responseVar="tobacco.use", strataVar="strata",
weightVar="weights", regionVar="region",
clusterVar = "~clustid+id", CI = 0.95, save.draws = TRUE)
# notice the posterior draws are on the latent scale
head(fit0.draws$draws.est[, 1:10])
# Example with region-level covariates
Xmat <- aggregate(age~region, data = DemoData2,
FUN = function(x) mean(x))
fit1 <- smoothSurvey(data=DemoData2,
Amat=DemoMap2$Amat, response.type="binary",
X = Xmat,
responseVar="tobacco.use", strataVar="strata",
weightVar="weights", regionVar="region",
clusterVar = "~clustid+id", CI = 0.95)
# Example with using only direct estimates as input instead of the full data
direct <- fit0$direct[, c("region", "direct.est", "direct.var")]
fit2 <- smoothSurvey(data=NULL, direct.est = direct,
Amat=DemoMap2$Amat, regionVar="region",
responseVar="direct.est", direct.est.var = "direct.var",
response.type = "binary")
# Check it is the same as fit0
plot(fit2$smooth$mean, fit0$smooth$mean)
# Example with using only direct estimates as input,
# and after transformation into a Gaussian smoothing model
# Notice: the output are on the same scale as the input
# and in this case, the logit estimates.
direct.logit <- fit0$direct[, c("region", "direct.logit.est", "direct.logit.var")]
fit3 <- smoothSurvey(data=NULL, direct.est = direct.logit,
Amat=DemoMap2$Amat, regionVar="region",
responseVar="direct.logit.est", direct.est.var = "direct.logit.var",
response.type = "gaussian")
# Check it is the same as fit0
plot(fit3$smooth$mean, fit0$smooth$logit.mean)
# Example with non-spatial smoothing using IID random effects
fit4 <- smoothSurvey(data=DemoData2, response.type="binary",
responseVar="tobacco.use", strataVar="strata",
weightVar="weights", regionVar="region",
clusterVar = "~clustid+id", CI = 0.95)
# Example with missing regions in the raw input
DemoData2.sub <- subset(DemoData2, region != "central")
fit.without.central <- smoothSurvey(data=DemoData2.sub,
Amat=NULL, response.type="binary",
responseVar="tobacco.use", strataVar="strata",
weightVar="weights", regionVar="region",
clusterVar = "~clustid+id", CI = 0.95)
fit.without.central$direct
fit.without.central$smooth
fit.with.central <- smoothSurvey(data=DemoData2.sub,
Amat=NULL, region.list = unique(DemoData2$region),
response.type="binary",
responseVar="tobacco.use", strataVar="strata",
weightVar="weights", regionVar="region",
clusterVar = "~clustid+id", CI = 0.95)
fit.with.central$direct
fit.with.central$smooth
# Using the formula argument, further customizations can be added to the
# model fitted. For example, we can fit the Fay-Harriot model with
# IID effect instead of the BYM2 random effect as follows.
# The "region.struct" and "hyperpc1" are picked to match internal object
# names. Other object names can be inspected from the source of smoothSurvey.
fit5 <- smoothSurvey(data=DemoData2,
Amat=DemoMap2$Amat, response.type="binary",
formula = "f(region.struct, model = 'iid', hyper = hyperpc1)",
pc.u = 1, pc.alpha = 0.01,
responseVar="tobacco.use", strataVar="strata",
weightVar="weights", regionVar="region",
clusterVar = "~clustid+id", CI = 0.95)
# Check it is the same as fit4, notice the region order may be different
regions <- fit5$smooth$region
plot(fit4$smooth[match(regions, fit4$smooth$region),]$logit.mean, fit5$smooth$logit.mean)
##
## 2. Unit-level model with binary response
##
# For unit-level models, we need to create stratification variable within regions
data <- DemoData2
data$urbanicity <- "rural"
data$urbanicity[grep("urban", data$strata)] <- "urban"
# Beta-binomial likelihood is used in this model
fit6 <- smoothSurvey(data=data,
Amat=DemoMap2$Amat, response.type="binary",
X = Xmat, is.unit.level = TRUE,
responseVar="tobacco.use", strataVar.within = "urbanicity",
regionVar="region", clusterVar = "clustid", CI = 0.95)
# We may use aggregated PSU-level counts as input as well
# in the case of modeling a binary outcome
data.agg <- aggregate(tobacco.use~region + urbanicity + clustid,
data = data, FUN = sum)
data.agg.total <- aggregate(tobacco.use~region + urbanicity + clustid,
data = data, FUN = length)
colnames(data.agg.total)[4] <- "total"
data.agg <- merge(data.agg, data.agg.total)
head(data.agg)
fit7 <- smoothSurvey(data=data.agg,
Amat=DemoMap2$Amat, response.type="binary",
X = Xmat, is.unit.level = TRUE, is.agg = TRUE,
responseVar = "tobacco.use", strataVar.within = "urbanicity",
totalVar = "total", regionVar="region", clusterVar = "clustid", CI = 0.95)
# Check it is the same as fit6
plot(fit6$smooth$mean, fit7$smooth$mean)
##
## 3. Area-level model with continuous response
##
# The smoothing model is the same as area-level model with binary response
# the continuous direct estimates are smoothed instead of
# their logit-transformed versions for binary response.
fit8 <- smoothSurvey(data=DemoData2, Amat=DemoMap2$Amat,
response.type="gaussian", responseVar="age", strataVar="strata",
weightVar="weights", regionVar="region",
pc.u.phi = 0.5, pc.alpha.phi = 0.5,
clusterVar = "~clustid+id", CI = 0.95)
##
## 4. Unit-level model with continuous response
## (or nested error models)
# The unit-level model assumes for each of the i-th unit,
# Y_{i} ~ intercept + region_effect + IID_i
# where IID_i is the error term specific to i-th unit
# When more than one level of cluster sampling is carried out,
# they are ignored here. Only the input unit is considered.
# So here we do not need to specify clusterVar any more.
fit9 <- smoothSurvey(data= data,
Amat=DemoMap2$Amat, response.type="gaussian",
is.unit.level = TRUE, responseVar="age", strataVar.within = NULL,
regionVar="region", clusterVar = NULL, CI = 0.95)
# To compare, we may also model PSU-level responses. As an illustration,
data.median <- aggregate(age~region + urbanicity + clustid,
data = data, FUN = median)
fit10 <- smoothSurvey(data= data.median,
Amat=DemoMap2$Amat, response.type="gaussian",
is.unit.level = TRUE, responseVar="age", strataVar.within = NULL,
regionVar="region", clusterVar = "clustid", CI = 0.95)
# To further incorporate within-area stratification
fit11 <- smoothSurvey(data = data,
Amat = DemoMap2$Amat, response.type = "gaussian",
is.unit.level = TRUE, responseVar="age", strataVar.within = "urbanicity",
regionVar = "region", clusterVar = NULL, CI = 0.95)
# Notice the usual output is now stratified within each region
# The aggregated estimates require strata proportions for each region
# For illustration, we set strata population proportions below
prop <- data.frame(region = unique(data$region),
urban = 0.3,
rural = 0.7)
fit12 <- smoothSurvey(data=data,
Amat=DemoMap2$Amat, response.type="gaussian",
is.unit.level = TRUE, responseVar="age", strataVar.within = "urbanicity",
regionVar="region", clusterVar = NULL, CI = 0.95,
weight.strata = prop)
# aggregated outcome
head(fit12$smooth.overall)
# Compare aggregated outcome with direct aggregating posterior means.
# There could be small differences if only 1000 posterior draws are taken.
est.urb <- subset(fit11$smooth, strata == "urban")
est.rural <- subset(fit11$smooth, strata == "rural")
est.mean.post <- est.urb$mean * 0.3 + est.rural$mean * 0.7
plot(fit12$smooth.overall$mean, est.mean.post)
##
## 6. Unit-level model with continuous response and unit-level covariate
##
# For area-level prediction, area-level covariate mean needs to be
# specified in X argument. And unit-level covariate names are specified
# in X.unit argument.
set.seed(1)
sim <- data.frame(region = rep(c(1, 2, 3, 4), 1000),
X1 = rnorm(4000), X2 = rnorm(4000))
Xmean <- aggregate(.~region, data = sim, FUN = sum)
sim$Y <- rnorm(4000, mean = sim$X1 + 0.3 * sim$X2 + sim$region)
samp <- sim[sample(1:4000, 20), ]
fit.sim <- smoothSurvey(data=samp ,
X.unit = c("X1", "X2"),
X = Xmean, Amat=NULL, response.type="gaussian",
is.unit.level = TRUE, responseVar="Y", regionVar = "region",
pc.u = 1, pc.alpha = 0.01, CI = 0.95)
}
Run the code above in your browser using DataLab