X <- iris[, -5]
y <- unclass(iris[, 5])
# Compute scales using different scale estimators.
# the pooled standard deviation is considerably smaller for variable 3 and 4:
sds <- apply(X, 2, sd); round(sds, 2)
ranges <- apply(X, 2, function(y) diff(range(y))); round(ranges, 2)
psds <- PVS(X); round(psds, 2)
# Now cluster using k-means after scaling the data
nbclus <- 3
kmeans.std <- kmeans(X, nbclus, nstart = 100) # no scaling
kmeans.sd <- kmeans(scale(X), nbclus, nstart = 100)
kmeans.rg <- kmeans(scale(X, scale = ranges), nbclus, nstart = 100)
kmeans.psd <- kmeans(scale(X, scale = psds), nbclus, nstart = 100)
# Calculate the Adjusted Rand Index for each of the clustering outcomes
round(mclust::adjustedRandIndex(y, kmeans.std$cluster), 2)
round(mclust::adjustedRandIndex(y, kmeans.sd$cluster), 2)
round(mclust::adjustedRandIndex(y, kmeans.rg$cluster), 2)
round(mclust::adjustedRandIndex(y, kmeans.psd$cluster), 2)
Run the code above in your browser using DataLab