# NOT RUN {
## Load absolute temperature data set:
data("AbsoluteTemperature")
df <- AbsoluteTemperature
## Find portions for climate zones
pcs <- table(df$z)/dim(df)[1]
## Choose the approximate size of the new sample and compute resample sizes
N <- round(sqrt(nrow(AbsoluteTemperature)))
resamplesizes=as.integer(round(N*pcs))
sum(resamplesizes)
## Create the grouping matrix
groupmat <- data.frame("Group_ID"=1:4,"Resample_Size"=resamplesizes)
groupmat
## Simple resampling:
resample_simple <- grouped_resample(in_data = df, grp_vector = "z",
grp_matrix = groupmat, replace = FALSE, option = "Simple",
number_samples = 1, nworkers = NULL, rseed = 20191220)
cat(dim(resample_simple[[1]]),"\n")
## Dirichlet resampling:
resample_dirichlet <- grouped_resample(in_data = df, grp_vector = "z",
grp_matrix = groupmat, replace = FALSE, option = "Dirichlet",
number_samples = 1, nworkers = NULL, rseed = 20191220)
cat(dim(resample_dirichlet[[1]]),"\n")
##
# ## Work in parallel and create many samples
# ## Choose a random seed
# nseed <- 20191119
# ## Simple
# reslist1 <- grouped_resample(in_data = df, grp_vector = "z", grp_matrix = groupmat,
# replace = FALSE, option = "Simple",
# number_samples = 10, nworkers = NULL,
# rseed = nseed)
# sapply(reslist1, dim)
# ## Dirichlet
# reslist2 <- grouped_resample(in_data = df, grp_vector = "z", grp_matrix = groupmat,
# replace = FALSE, option = "Dirichlet",
# number_samples = 10, nworkers = NULL,
# rseed = nseed)
# sapply(reslist2, dim)
# ## Check for same rows between 1st sample of 'Simple' and 1st sample of 'Dirichlet' ...
# mapply(function(x,y){sum(rownames(x)%in%rownames(y))},reslist1,reslist2)
#
# }
Run the code above in your browser using DataLab