# NOT RUN {
# Set up CV folds for a stratified sample and a one-stage cluster sample,
# using data from the `survey` package
library(survey)
data("api", package = "survey")
# stratified sample
dstrat <- svydesign(id = ~1, strata = ~stype, weights = ~pw, data = apistrat,
fpc = ~fpc)
dstrat <- update(dstrat, .foldID = folds.svydesign(dstrat, nfolds = 5))
# Each fold will have observations from every stratum
with(dstrat$variables, table(stype, .foldID))
# Fold sizes should be roughly equal
table(dstrat$variables$.foldID)
#
# one-stage cluster sample
dclus1 <- svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
dclus1 <- update(dclus1, .foldID = folds.svydesign(dclus1, nfolds = 5))
# For any given cluster, all its observations will be in the same fold;
# and each fold should contain roughly the same number of clusters
with(dclus1$variables, table(dnum, .foldID))
# But if cluster sizes are unequal,
# the number of individuals per fold will also vary
table(dclus1$variables$.foldID)
# See the end of `intro` vignette for an example of using such folds
# as part of a custom loop over CV folds
# to tune parameters in a design-consistent random forest model
# }
Run the code above in your browser using DataLab