if (FALSE) {
# simulated data
set.seed(222)
nn = rep(200,3)
n = sum(nn)
x = rbind( cbind(rnorm(nn[1],-5,1),rnorm(nn[1],-5,1)),
cbind(rnorm(nn[2],0,1),rnorm(nn[2],0,1)),
cbind(rnorm(nn[3],5,1),rnorm(nn[3],5,1)) )
# maximum number of clusters to search
Kmax = 10
bic = bica = numeric(Kmax)
clusterbic = clusterbica = rep(1,n)
da1 = dabic(x,rep(1,n)) # daBIC
bic[1] = da1$bic # K=1
bica[1] = da1$bica # K=1
for(kk in 2:Kmax){
km = kmeans(scale(x),centers=kk,nstart=25) # K-means clustering
dakk = dabic(x,km$cluster) # daBIC
bic[kk] = dakk$bic # K
bica[kk] = dakk$bica # K
if(min(bic[1:(kk-1)])>bic[kk]) clusterbic = km$cluster # update cluster membership
if(min(bica[1:(kk-1)])>bica[kk]) clusterbica = km$cluster # update cluster membership
}
# Optimal K from BIC
which.min(bic)
# Optimal K from daBIC
which.min(bica)
# plot x colored with optimal cluster membership obtained from daBIC
plot(x,col=clusterbica)
}
Run the code above in your browser using DataLab