############
## Example 1
############
# Two-stage cluster sampling
# Uses the 'swissmunicipalities' data for drawing a sample of units
data(swissmunicipalities)
# the variable 'REG' (region) has 7 categories;
# it is used as clustering variable in the first-stage sample
# the variable 'CT' (canton) has 26 categories;
# it is used as clustering variable in the second-stage sample
# 4 clusters (regions) are selected in the first-stage
# 1 canton is selected in the second-stage from each sampled cluster
# the method is simple random sampling without replacement
# (equal probability, without replacement)
m=mstage(swissmunicipalities,stage=list("cluster","cluster"), varnames=list("REG","CT"),
size=list(4,c(1,1,1,1)), method="srswor")
# the first stage is m$'1' and the second stage is m$'2'
# extracts the observed data
# the order of the columns is different from the order in the swsissmunicipalities database
getdata(swissmunicipalities, m)
############
## Example 2
############
# Two-stage element sampling
# Example from An and Watts (New SAS procedures for Analysis of Sample Survey Data)
# Generates artificial data (a 235X3 matrix with 3 columns: state, region, income).
# The variable "state" has 2 categories ('nc' and 'sc').
# The variable "region" has 3 categories (1, 2 and 3).
# The variable "income" is generated using the U(0,1) distribution.
data=rbind(matrix(rep("nc",165),165,1,byrow=TRUE),matrix(rep("sc",70),70,1,byrow=TRUE))
data=cbind.data.frame(data,c(rep(1,100),rep(2,50),rep(3,15),rep(1,30),rep(2,40)),
100*runif(235))
names(data)=c("state","region","income")
# the method is simple random sampling with replacement
# 25 units are drawn in the first-stage
# in the second-stage, 10 units are drawn from the already 25 selected units
m=mstage(data,size=list(25,10),method="srswr")
# extracts the observed data
getdata(data,m)
############
## Example 3
############
# One-stage stratified cluster sampling
# The same data as in Example 2
# the variable 'state' is used as stratification variable
# 20 states are drawn in the first stratum and 10 states in the second stratum
# the variable 'region' is used as clustering variable
# 1 cluster (region) is drawn in each stratum
m=mstage(data, stage=list("stratified","cluster"), varnames=list("state","region"),
size=list(c(20,10),c(1,1)),method="srswor")
# extracts the observed data
getdata(data,m)
############
## Example 4
############
# Two-stage cluster sampling
# The same data as in Example 1
data(swissmunicipalities)
# in the first-stage, the clustering variable is 'REG' (region) with 7 categories
# each region is selected with the probability 1/7
# in the second-stage, the clustering variable is 'CT'(canton) with 26 categories
# in the region 1, there are 3 cantons and each canton is selected with the probability 1/3
# in the region 2, there are 5 cantons and each canton is selected with the probability 1/5
# in the region 3, there are 3 cantons and each canton is selected with the probability 1/3
# in the region 4, there is 1 canton, which it is selected with the probability 1
# in the region 5, there are 7 cantons and each canton is selected with the probability 1/7
# in the region 6, there are 6 cantons and each canton is selected with the probability 1/6
# in the region 7, there is 1 canton, which it is selected with the probability 1
# it is necessary to use a list of selection probabilities at each stage
# prob is the list of the selection probabilities
# the method is systematic sampling (unequal probabilities, without replacement)
# 4 clusters (regions) are drawn in the first-stage
# 1 cluster (canton) is drawn from each selected region in the second-stage
# ls is the list of sizes
ls=list(4,c(1,1,1,1))
prob=list(rep(4/7,7),list(rep(1/3,3),rep(1/5,5),rep(1/3,3),rep(1,1),rep(1/7,7),
rep(1/6,6),rep(1,1)))
m=mstage(swissmunicipalities,stage=list("cluster","cluster"),varnames=list("REG","CT"),
size=ls, method="systematic",pik=prob)
# extracts the observed data
getdata(swissmunicipalities,m)
Run the code above in your browser using DataLab