#mean auxiliary variables for the populations in the domains
data(JoSAE.domain.data)
#data for the sampled elements
data(JoSAE.sample.data)
plot(biomass.ha~mean.canopy.ht,JoSAE.sample.data)
##small area estimation
#get an overview of the domains
#mean of the response and predictor variables from the sample. For the response this is the sample mean estimator.
tmp <- aggregate(JoSAE.sample.data[,c("biomass.ha", "mean.canopy.ht")]
, by=list(domain.ID=JoSAE.sample.data$domain.ID), mean)
names(tmp)[2:3] <- paste(names(tmp)[2:3], ".bar.sample", sep="")
#number of samples within the domains
tmp1 <- aggregate(cbind(n.i=JoSAE.sample.data$biomass.ha)
, by=list(domain.ID=JoSAE.sample.data$domain.ID), length)
#combine it with the population information of the domains
overview.domains <- cbind(JoSAE.domain.data, tmp[,-1], n.i=tmp1[,-1])
#fit the models
#lm - the auxiliary variable explains forest biomass rather good
summary(fit.lm <- lm(biomass.ha ~ mean.canopy.ht, data=JoSAE.sample.data))
#lme
summary(fit.lme <- lme(biomass.ha ~ mean.canopy.ht, data=JoSAE.sample.data
, random=~1|domain.ID))
#mean lm residual -- needed for GREG
overview.domains$mean.resid.lm <- aggregate(resid(fit.lm)
, by=list(domain.ID=JoSAE.sample.data$domain.ID)
, mean)[,2]
#mean lme residual -- needed for EBLUP.var
overview.domains$mean.resid.lme <- aggregate(resid(fit.lme, level=0)
, by=list(domain.ID=JoSAE.sample.data$domain.ID)
, mean)[,2]
#synthetic estimate
overview.domains$synth <- predict(fit.lm
, newdata=
data.frame(mean.canopy.ht=JoSAE.domain.data$mean.canopy.ht.bar))
#GREG estimate
overview.domains$GREG <- overview.domains$synth + overview.domains$mean.resid.lm
#EBLUP estimate
overview.domains$EBLUP <- predict(fit.lme
, newdata=data.frame(mean.canopy.ht=JoSAE.domain.data$mean.canopy.ht.bar
, domain.ID=JoSAE.domain.data$domain.ID)
, level=1)
#gamma
overview.domains$gamma.i <- JoSAE.gamma.i.f(lme.obj=fit.lme
, n.i=overview.domains$n.i)
#variance of the sample mean estimate
overview.domains$sample.var <-
aggregate(JoSAE.sample.data$biomass.ha
, by=list(domain.ID=JoSAE.sample.data$domain.ID), var)[,-1]/overview.domains$n.i
#variance of the GREG estimate
overview.domains$GREG.var <-
aggregate(resid(fit.lm)
, by=list(domain.ID=JoSAE.sample.data$domain.ID),var)[,-1]/overview.domains$n.i
#variance of the EBLUP
#compute the A.i matrices for all domains (only needed once)
domain.ID <- JoSAE.domain.data$domain.ID
#initialize the result vector
a.i.mats <- vector(mode="list", length=length(domain.ID))
for(i in 1:length(domain.ID)){
print(i)
a.i.mats[[i]] <- eblup.mse.f.c2.ai(gamma.i=overview.domains$gamma.i[overview.domains$domain.ID==domain.ID[i]]
, n.i=overview.domains$n.i[overview.domains$domain.ID==domain.ID[i]]
, lme.obj=fit.lme
, X.i=as.matrix(cbind(i=1
, x=JoSAE.sample.data[JoSAE.sample.data$domain.ID==domain.ID[i], "mean.canopy.ht"]
))
)
}
#add all the matrices
sum.A.i.mats <- Reduce("+", a.i.mats)
#the assymptotic var-cov matrix
asy.var.cov.mat <- eblup.mse.f.c3.asyvarcovarmat(n.i=overview.domains$n.i
, lme.obj=fit.lme)
#put together the variance components
###### Some changes are required here, if you apply it to own data!
result <- NULL
for(i in 1:length(domain.ID)){
print(i)
#first comp
mse.c1.tmp <- eblup.mse.f.c1(gamma.i=overview.domains$gamma.i[overview.domains$domain.ID==domain.ID[i]]
, n.i=overview.domains$n.i[overview.domains$domain.ID==domain.ID[i]]
, lme.obj=fit.lme)
#second comp
mse.c2.tmp <- eblup.mse.f.c2(gamma.i=overview.domains$gamma.i[overview.domains$domain.ID==domain.ID[i]]
, X.i=as.matrix(cbind(i=1##cbind!!
, x=JoSAE.sample.data[JoSAE.sample.data$domain.ID==domain.ID[i]
, "mean.canopy.ht"]##change to other varnames if necessary
))
, X.bar.i =as.matrix(rbind(i=1##rbind!!
, x=JoSAE.domain.data[JoSAE.domain.data$domain.ID==domain.ID[i]
, "mean.canopy.ht.bar"]##change to other varnames if necessary
))
, sum.A.i = sum.A.i.mats
)
#third comp
mse.c3.tmp <- eblup.mse.f.c3(n.i=overview.domains$n.i[overview.domains$domain.ID==domain.ID[i]]
, lme.obj=fit.lme
, asympt.var.covar=asy.var.cov.mat)
#third star comp
mse.c3.star.tmp <- eblup.mse.f.c3.star( n.i=overview.domains$n.i[overview.domains$domain.ID==domain.ID[i]]
, lme.obj=fit.lme
, mean.resid.i=overview.domains$mean.resid.lme[overview.domains$domain.ID==domain.ID[i]]
, asympt.var.covar=asy.var.cov.mat)
#save result
result <- rbind(result, data.frame(kommune=domain.ID[i]
, n.i=overview.domains$n.i[overview.domains$domain.ID==domain.ID[i]]
, c1=as.numeric(mse.c1.tmp), c2=as.numeric(mse.c2.tmp)
, c3=as.numeric(mse.c3.tmp), c3star=as.numeric(mse.c3.star.tmp)))
}
#derive the actual EBLUP variances
overview.domains$EBLUP.var.1 <- result$c1 + result$c2 + 2* result$c3star
overview.domains$EBLUP.var.2 <- result$c1 + result$c2 + result$c3 + result$c3star
#display the estimates and the sampling error (sqrt(var)) in two tables
round(data.frame(overview.domains[,c("domain.ID", "n.i", "N.i")],
overview.domains[,c("biomass.ha.bar.sample", "GREG", "synth", "EBLUP")]),2)
#the sampling errors of the eblup is mostly smaller than the one of the greg estimate.
#both are always smaller than the sample mean variance.
round(data.frame(overview.domains[,c("domain.ID", "n.i", "N.i")],
sqrt(overview.domains[,c("sample.var", "GREG.var", "EBLUP.var.1", "EBLUP.var.2")])),2)
##END: small area estimation
## aggregate the results to the large study area that contains all domains
#variance of the sample mean for the whole area
var.all.sampmean <- var(JoSAE.sample.data$biomass.ha)/sum(overview.domains$n.i)
#variance of the GREG estimate for the whole area
var.all.greg <- var(resid(fit.lm))/sum(overview.domains$n.i)
#EBLUP variance for the whole area
#gamma
gamma.all <- JoSAE.gamma.i.f(lme.obj=fit.lme, n.i=sum(overview.domains$n.i))
#first comp
mse.c1.tmp <- eblup.mse.f.c1(gamma.i=gamma.all
, n.i=sum(overview.domains$n.i)
, lme.obj=fit.lme)
#second comp
mse.c2.tmp <- eblup.mse.f.c2(gamma.i=gamma.all
, X.i=as.matrix(cbind(i=1##cbind!!
, x=mean(JoSAE.sample.data[#mean does not need to be weighted because unit-level observations
, "mean.canopy.ht"])##change to other varnames if necessary
))
, X.bar.i =as.matrix(rbind(i=1##rbind!!
, x=weighted.mean(JoSAE.domain.data[
, "mean.canopy.ht.bar"], w=JoSAE.domain.data$N.i)##change to other varnames if necessary
))
, sum.A.i = sum.A.i.mats
)
#third comp
mse.c3.tmp <- eblup.mse.f.c3(n.i=sum(overview.domains$n.i)
, lme.obj=fit.lme
, asympt.var.covar=asy.var.cov.mat)
#third star comp
mse.c3.star.tmp <- eblup.mse.f.c3.star( n.i=sum(overview.domains$n.i)
, lme.obj=fit.lme
, mean.resid.i=weighted.mean(overview.domains$mean.resid.lme, w=overview.domains$n.i)
, asympt.var.covar=asy.var.cov.mat)
#combine to final variance
var.all.eblup <- mse.c1.tmp + mse.c2.tmp + mse.c3.tmp + mse.c3.star.tmp
#combine the 3 estimators:
#EBLUP estimate is almost the same as sample mean -> design consitency
#GREG sampling error (SE) is slightly smaller than EBLUP SE. Both are considerably smaller
#than the sample mean SE.
tmp <- data.frame(
cbind(sample.mean=weighted.mean(overview.domains$biomass.ha.bar.sample, w=overview.domains$N.i)
, GREG.mean=weighted.mean(overview.domains$GREG, w=overview.domains$N.i)
, EBLUP.mean=weighted.mean(overview.domains$EBLUP, w=overview.domains$N.i))
,
sqrt(cbind(SE.sample=var.all.sampmean, SE.GREG=var.all.greg, SE.EBLUP=var.all.eblup[,1]))
)
tmp
##END: aggregate the results to the large study area that contains all domainsRun the code above in your browser using DataLab