if (FALSE) {
## ---------------------- Example 1 ------------------------##
## First phase is a stratified multistage sample ##
## Second phase is a simple random sample ##
##----------------------------------------------------------##
data('library_multistage_sample', package = 'svrep')
# Load first-phase sample
twophase_sample <- library_multistage_sample
# Select second-phase sample
set.seed(2022)
twophase_sample[['SECOND_PHASE_SELECTION']] <- sampling::srswor(
n = 100,
N = nrow(twophase_sample)
) |> as.logical()
# Declare survey design
twophase_design <- twophase(
method = "full",
data = twophase_sample,
# Identify the subset of first-phase elements
# which were selected into the second-phase sample
subset = ~ SECOND_PHASE_SELECTION,
# Describe clusters, probabilities, and population sizes
# at each phase of sampling
id = list(~ PSU_ID + SSU_ID,
~ 1),
probs = list(~ PSU_SAMPLING_PROB + SSU_SAMPLING_PROB,
NULL),
fpc = list(~ PSU_POP_SIZE + SSU_POP_SIZE,
NULL)
)
# Get quadratic form matrix for the first phase design
first_phase_sigma <- get_design_quad_form(
design = twophase_design$phase1$full,
variance_estimator = "Stratified Multistage SRS"
)
# Subset to only include cases sampled in second phase
first_phase_sigma <- first_phase_sigma[twophase_design$subset,
twophase_design$subset]
# Get quadratic form matrix for the second-phase design
second_phase_sigma <- get_design_quad_form(
design = twophase_design$phase2,
variance_estimator = "Ultimate Cluster"
)
# Get second-phase joint probabilities
n <- twophase_design$phase2$fpc$sampsize[1,1]
N <- twophase_design$phase2$fpc$popsize[1,1]
second_phase_joint_probs <- Matrix::Matrix((n/N)*((n-1)/(N-1)),
nrow = n, ncol = n)
diag(second_phase_joint_probs) <- rep(n/N, times = n)
# Get quadratic form for entire two-phase variance estimator
twophase_quad_form <- make_twophase_quad_form(
sigma_1 = first_phase_sigma,
sigma_2 = second_phase_sigma,
phase_2_joint_probs = second_phase_joint_probs
)
# Use for variance estimation
rep_factors <- make_gen_boot_factors(
Sigma = twophase_quad_form,
num_replicates = 500
)
library(survey)
combined_weights <- 1/twophase_design$prob
twophase_rep_design <- svrepdesign(
data = twophase_sample |>
subset(SECOND_PHASE_SELECTION),
type = 'other',
repweights = rep_factors,
weights = combined_weights,
combined.weights = FALSE,
scale = attr(rep_factors, 'scale'),
rscales = attr(rep_factors, 'rscales')
)
svymean(x = ~ LIBRARIA, design = twophase_rep_design)
## ---------------------- Example 2 ------------------------##
## First phase is a stratified systematic sample ##
## Second phase is nonresponse, modeled as Poisson sampling ##
##----------------------------------------------------------##
data('library_stsys_sample', package = 'svrep')
# Determine quadratic form for full first-phase sample variance estimator
full_phase1_quad_form <- make_quad_form_matrix(
variance_estimator = "SD2",
cluster_ids = library_stsys_sample[,'FSCSKEY',drop=FALSE],
strata_ids = library_stsys_sample[,'SAMPLING_STRATUM',drop=FALSE],
strata_pop_sizes = library_stsys_sample[,'STRATUM_POP_SIZE',drop=FALSE],
sort_order = library_stsys_sample$SAMPLING_SORT_ORDER
)
# Identify cases included in phase two sample
# (in this example, respondents)
phase2_inclusion <- (
library_stsys_sample$RESPONSE_STATUS == "Survey Respondent"
)
phase2_sample <- library_stsys_sample[phase2_inclusion,]
# Estimate response propensities
response_propensities <- glm(
data = library_stsys_sample,
family = quasibinomial('logit'),
formula = phase2_inclusion ~ 1,
weights = 1/library_stsys_sample$SAMPLING_PROB
) |>
predict(type = "response",
newdata = phase2_sample)
# Estimate conditional joint inclusion probabilities for second phase
phase2_joint_probs <- outer(response_propensities, response_propensities)
diag(phase2_joint_probs) <- response_propensities
# Determine quadratic form for variance estimator of second phase
# (Horvitz-Thompson estimator for nonresponse modeled as Poisson sampling)
phase2_quad_form <- make_quad_form_matrix(
variance_estimator = "Horvitz-Thompson",
joint_probs = phase2_joint_probs
)
# Create combined quadratic form for entire design
twophase_quad_form <- make_twophase_quad_form(
sigma_1 = full_phase1_quad_form[phase2_inclusion, phase2_inclusion],
sigma_2 = phase2_quad_form,
phase_2_joint_probs = phase2_joint_probs
)
combined_weights <- 1/(phase2_sample$SAMPLING_PROB * response_propensities)
# Use for variance estimation
rep_factors <- make_gen_boot_factors(
Sigma = twophase_quad_form,
num_replicates = 500
)
library(survey)
twophase_rep_design <- svrepdesign(
data = phase2_sample,
type = 'other',
repweights = rep_factors,
weights = combined_weights,
combined.weights = FALSE,
scale = attr(rep_factors, 'scale'),
rscales = attr(rep_factors, 'rscales')
)
svymean(x = ~ LIBRARIA, design = twophase_rep_design)
}
Run the code above in your browser using DataLab