# NOT RUN {
##### Testing "MIXclustering" function with simulated clustered data #####
# for more information about the data...
# }
# NOT RUN {
help(sim.cluster.data)
# }
# NOT RUN {
### Exercise (Ic) from section 5.1 in Carmona et al. (2016)
### using 3 continuous variables
# }
# NOT RUN {
set.seed(0) # for reproducibility
Y_data <- sim.cluster.data[,2:4]
colnames(Y_data) <- paste("Y",1:3,sep=".")
cluster_estim_Ic <- MIXclustering( Y_data,
var_type=c("c","c","c"),
n.iter_out=1000,
n.burn=200,
n.thin=2,
alpha=0.5,
d_0_a = 1, d_1_a = 1,
d_0_b = 1, d_1_b = 1,
d_0_z = 2.1, d_1_z = 30,
d_0_mu = 2.1, d_1_mu = 30
)
cluster_estim_Ic
# Summary of clustering results
summary(cluster_estim_Ic)
# Representation of clustering results
plot(cluster_estim_Ic,type="heatmap")
plot(cluster_estim_Ic,type="chain")
# Comparison with the original clusters in the simulated data
plot(x=jitter(sim.cluster.data$cluster),
y=jitter(cluster_estim_Ic$cluster),
main="",
xlab="Real cluster",
ylab="Model cluster",
pch=19, col="#FF000020")
# }
# NOT RUN {
### Exercise (IIb) from section 5.1 in Carmona et al. (2016)
### Two binary and one continuous variables
# }
# NOT RUN {
set.seed(0) # for reproducibility
Y_data <- matrix(NA,nrow=nrow(sim.cluster.data),ncol=3)
colnames(Y_data) <- paste("Y",1:3,sep=".")
Y_data[,1] <- findInterval( sim.cluster.data[,2], c(-Inf,5,Inf) )-1
Y_data[,2] <- sim.cluster.data[,3]
Y_data[,3] <- findInterval( sim.cluster.data[,4], c(-Inf,3,Inf) )-1
cluster_estim_IIb <- MIXclustering( Y_data,
var_type=c("o","c","o"),
n.iter_out=1000,
n.burn=200,
n.thin=2,
alpha=0.5,
d_0_a = 1, d_1_a = 1,
d_0_b = 1, d_1_b = 1,
d_0_z = 1, d_1_z = 1,
d_0_mu = 1, d_1_mu = 1
)
summary(cluster_estim_IIb)
plot(cluster_estim_IIb,type="heatmap")
plot(cluster_estim_IIb,type="chain")
# }
# NOT RUN {
### Exercise (IIIa) from section 5.1 in Carmona et al. (2016)
### Two binary, one ordinal, and one (non-informative) continuous variables
# }
# NOT RUN {
set.seed(0) # for reproducibility
Y_data <- matrix(NA,nrow=nrow(sim.cluster.data),ncol=4)
colnames(Y_data) <- paste("Y",1:4,sep=".")
Y_data[,1] <- findInterval( sim.cluster.data[,2], c(-Inf,5,Inf) )-1
Y_data[,2] <- findInterval( sim.cluster.data[,3], c(-Inf,4,5,Inf) )-1
Y_data[,3] <- findInterval( sim.cluster.data[,4], c(-Inf,3,Inf) )-1
Y_data[,4] <- rnorm(n=nrow(sim.cluster.data),mean=0,sd=1)
cluster_estim_IIIa <- MIXclustering( Y_data,
var_type=c("m","o","o","c"),
n.iter_out=1000,
n.burn=200,
n.thin=2,
alpha=0.5,
d_0_a = 1, d_1_a = 1,
d_0_b = 1, d_1_b = 1,
d_0_z = 0.1, d_1_z = 0.1,
d_0_mu = 0.1, d_1_mu = 0.1
)
summary(cluster_estim_IIIa)
plot(cluster_estim_IIIa,type="heatmap")
plot(cluster_estim_IIIa,type="chain")
# }
# NOT RUN {
##### Testing "MIXclustering" function with poverty.data #####
# Using entity 15 (Edomex) #
# }
# NOT RUN {
Y_names <- c("ict_norm",
"ic_ali","ic_asalud","ic_cv",
"ic_rezedu","ic_sbv","ic_segsoc",
"niv_ed","tam_loc")
aux_subset <- rep(T,nrow(poverty.data))
aux_subset <- aux_subset & is.element(substr(poverty.data$folioviv,1,2),"15")
Y_data <- poverty.data[aux_subset,Y_names]
sampling_prob_pov_iii <- 4 * mean(poverty.data[aux_subset,"factor_hog"])
sampling_prob_pov_iii <- sampling_prob_pov_iii * 1/poverty.data[aux_subset,"factor_hog"]
cluster_estim_pov_iii <- MIXclustering(x=Y_data,
var_type=c("c","o","o","o","o","o","o","m","m"),
n.iter_out=1000,
n.burn=200,
n.thin=2,
d_0_a = 1, d_1_a = 1,
d_0_b = 1, d_1_b = 1,
d_0_z = 2.1, d_1_z = 30,
d_0_mu = 2.1, d_1_mu = 30,
sampling_prob = sampling_prob_pov_iii
)
summary(cluster_estim_pov_iii)
plot(cluster_estim_pov_iii,type="heatmap")
plot(cluster_estim_pov_iii,type="chain")
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab