Learn R Programming

sampling (version 0.1)

samplecube: Sample cube method

Description

Select a balanced sample (a vector of 0 and 1) or an almost balanced sample. Firstly, the flight phase is applied. Next if needed, the landing phase is applied on the result of the flight phase.

Usage

samplecube(X,pik,order=1,comment=TRUE,method=1)

Arguments

X
matrix of auxiliary variables on which the sample must be balanced.
pik
vector of inclusion probabilities.
order
1, the data are sorted randomly, 2, the data are sorted in decreasing order, 3, the order of the data remains unchanged.
comment
a comment is written during the execution if comment is equal to TRUE.
method
1, for a landing phase by linear programming, 2, for a landing phase by suppression of variables.

encoding

latin1

References

Chauvet, G. and Till�, Y. (2004). A fast algorithm of balanced sampling. Submitted for publication. Chauvet, G. and Till�, Y. (2005). New SAS macros for balanced sampling. In INSEE, editor, Journ�es de M�thodologie Statistique, Paris. Deville, J.-C. and Till�, Y. (2004). Efficient balanced sampling: the cube method. Biometrika, 91, 893-912. Deville, J.-C. and Till�, Y. (2005). Variance approximation under balanced sampling. Journal of Statistical Planning and Inference, 128/2:411--425.

See Also

landingcube, fastflightcube

Examples

Run this code
############
## Example 1
############
# matrix of balancing variables
X=cbind(c(1,1,1,1,1,1,1,1,1),c(1.1,2.2,3.1,4.2,5.1,6.3,7.1,8.1,9.1))
# Vector of inclusion probabilities.
# The sample has the size equal to 3.
pik=c(1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3,1/3)
# Selection of the sample
s=samplecube(X,pik,order=1,comment=TRUE)
# The selected sample
s
############
## Example 2
############
# 2 strata and 2 auxiliary variables
# simulation for the computation of the inclusion probabilities
X=rbind(c(1,0,1,2),c(1,0,2,5),c(1,0,3,7),c(1,0,4,9),
c(1,0,5,1),c(1,0,6,5),c(1,0,7,7),c(1,0,8,6),c(1,0,9,9),
c(1,0,10,3),c(0,1,11,3),c(0,1,12,2),c(0,1,13,3),
c(0,1,14,6),c(0,1,15,8),c(0,1,16,9),c(0,1,17,1),
c(0,1,18,2),c(0,1,19,3),c(0,1,20,4))
pik=rep(1/2,times=20)
ppp=rep(0,times=20)
sim=100
for(i in (1:sim))
	ppp=ppp+samplecube(X,pik,1,FALSE) 
ppp=ppp/sim
print(ppp)
print(pik)
############
## Example 3
############
# unequal probability sampling with cube method
# 1 auxiliary variable : the inclusion probability
N=200
pik=runif(N)
pikfin=samplecube(array(pik,c(N,1)),pik,1,TRUE)
############ 
## Example 4
############
# p auxiliary variables generated randomly
# random inclusion probabilities
N=1000
p=7
x<-rnorm(N*p,10,3)
pik<- runif(N)
X=array(x,c(N,p))
X=cbind(cbind(X,rep(1,times=N)),pik)
pikfin=samplecube(X,pik,1,TRUE)
############ 
## Example 5
############
# strata and an auxiliary variable
N=5000
a=rep(1,times=N)
b=rep(0,times=N)
V1=c(a,b,b)
V2=c(b,a,b)
V3=c(b,b,a)
V4=1:(3*N)
X=cbind(V1,V2,V3)
pik=rep(2/10,times=3*N)
pikfin=samplecube(X,pik,1,TRUE)
############
## Example 6
############
# Selection of a balanced sample in the MU284 population,
# simulation and comparison of the variance with
# unequal probability sampling of fixed sample size.
#####################################################
data(MU284)
# Computation of the inclusion probabilities
pik=inclusionprobabilities(MU284$P75,50)
# Definition of the matrix of balancing variables
X=cbind(MU284$P75,MU284$CS82,MU284$SS82,MU284$S82,MU284$ME84,MU284$REV84)
# Computation of the Horvitz-Thompson estimator for a balanced sample
crossprod(MU284$RMT85/pik , samplecube(X,pik,1,TRUE))
# Computation of the Horvitz-Thompson estimator for an unequal probability sample
crossprod(MU284$RMT85/pik, samplecube(matrix(pik),pik,1,TRUE))
# simulations; for a better accurancy, let sim=50 
sim=8
resu1=rep(0,times=sim)
resu2=rep(0,times=sim)
for(i in 1:sim)
{
cat("Simulation number ",i,"")
resu1[i]=crossprod(MU284$RMT85/pik , samplecube(X,pik,1,FALSE) )
resu2[i]=crossprod(MU284$RMT85/pik, samplecube(matrix(pik),pik,1,FALSE) )
}
# summary and boxplot
summary(resu1)
summary(resu2)
ss=cbind(resu1,resu2)
colnames(ss) <- c("balanced sampling","uneq prob sampling")
boxplot(data.frame(ss), las=1)

Run the code above in your browser using DataLab