# Data generated from a mixture of three bivariate Gaussian distributions
if (FALSE) {
N <- 620
centers <- 3
n1 <- 20
n2 <- 100
n3 <- 500
x <- matrix(NA, N,2)
truegroup <- c( rep(1,n1), rep(2, n2), rep(3, n3))
x[1:n1,]=rmvnorm(n1, c(1,5), sigma=diag(2))
x[(n1+1):(n1+n2),]=rmvnorm(n2, c(4,0), sigma=diag(2))
x[(n1+n2+1):(n1+n2+n3),]=rmvnorm(n3, c(6,6), sigma=diag(2))
# Build a similarity matrix from clustering ensembles
H <- 1000
a <- matrix(NA, H, N)
for (h in 1:H){
a[h,] <- kmeans(x,centers)$cluster
}
sim_matr <- matrix(NA, N,N)
for (i in 1:(N-1)){
for (j in (i+1):N){
sim_matr[i,j] <- sum(a[,i]==a[,j])/H
sim_matr[j,i] <- sim_matr[i,j]
}
}
# Obtain a clustering solution via kmeans with multiple random seeds
cl <- KMeans(x, centers)$cluster
# Find three pivots
mus_alg <- MUS(C = sim_matr, clusters = cl)
}
Run the code above in your browser using DataLab