# NOT RUN {
# Package loading
require(VarSelLCM)
# Data loading:
# x contains the observed variables
# z the known statu (i.e. 1: absence and 2: presence of heart disease)
data(heart)
z <- heart[,"Class"]
x <- heart[,-13]
# Cluster analysis without variable selection
res_without <- VarSelCluster(x, 2, vbleSelec = FALSE)
# Cluster analysis with variable selection (with parallelisation)
res_with <- VarSelCluster(x, 2, nbcores = 2, initModel=40)
# Confusion matrices and ARI: variable selection decreases the misclassification error rate
print(table(z, res_without@partitions@zMAP))
print(table(z, res_with@partitions@zMAP))
ARI(z, res_without@partitions@zMAP)
ARI(z, res_with@partitions@zMAP)
# Summary of the best model
summary(res_with)
# Opening Shiny application to easily see the results
VarSelShiny(res_with)
# Parameters of the best model
print(res_with)
# Discriminative power of the variables (here, the most discriminative variable is MaxHeartRate)
plot(out, type="bar")
# Boxplot for continuous (or interger) variable
plot(out, y="MaxHeartRate", type="boxplot")
# Empirical and theoretical distributions (to check that clustering is pertinent)
plot(out, y="MaxHeartRate", type="cdf")
# Summary of categorical variable
plot(out, y="Sex")
# Summary of the probabilities of missclassification
plot(out, type="probs-class")
# Imputation by posterior mean for the first observation
not.imputed <- heart[1,-13]
imputed <- VarSelImputation(out)[1,]
rbind(not.imputed, imputed)
# }
Run the code above in your browser using DataLab