# Generate synthetic data (three clusters well separated)
Z <- rnorm(600)
mues <- rep(c(-3, 0, 3), 200)
X <- matrix(Z + mues, ncol = 2)
# Generate 60 synthetic outliers (contamination level 20%)
X[sample(1:300,60), ] <- matrix(runif( 40, 3 * min(X), 3 * max(X) ),
ncol = 2, nrow = 60)
robust <- ktaucentersfast(
X, centers = X[sample(1:300, 3), ],
max_tol = 1e-3, max_iter = 100)
oldpar <- par(mfrow = c(1, 2))
plot(X,type = "n", main = "ktaucenters (Robust) \n outliers: solid black dots")
points(X[robust$cluster == 1, ], col = 2)
points(X[robust$cluster == 2, ], col = 3)
points(X[robust$cluster == 3, ], col = 4)
points(X[robust$outliers, 1], X[robust$outliers, 2], pch = 19)
# Classical (non Robust) algorithm
non_robust <- kmeans(X, centers = 3, nstart = 100)
plot(X, type = "n", main = "kmeans (Classical)")
points(X[non_robust$cluster == 1, ], col = 2)
points(X[non_robust$cluster == 2, ], col = 3)
points(X[non_robust$cluster == 3, ], col = 4)
par(oldpar)
Run the code above in your browser using DataLab