# NOT RUN {
N = 3000L # Number of points.
d = 500L # Dimensionality.
K = 50L # Number of clusters.
dat = matrix(rnorm(N * d) + runif(N * d), nrow = d)
# Use kmeans++ initialization.
centroidInd = GMKMcharlie::KMppIni(
X = dat, K, firstSelection = 1L, minkP = 2, stochastic = FALSE,
seed = sample(1e9L, 1), maxCore = 2L, verbose = TRUE)
centroid = dat[, centroidInd]
# Each cluster size should not be greater than N / K * 2.
sizeConstraints = as.integer(rep(N / K * 2, K))
system.time({rst = GMKMcharlie::KMconstrained(
X = dat, centroid = centroid, clusterWeightUB = sizeConstraints,
maxCore = 2L, tailConvergedRelaErr = 1e-6, verbose = TRUE)})
# Size upper bounds vary in [N / K * 1.5, N / K * 2]
sizeConstraints = as.integer(round(runif(K, N / K * 1.5, N / K * 2)))
system.time({rst = GMKMcharlie::KMconstrained(
X = dat, centroid = centroid, clusterWeightUB = sizeConstraints,
maxCore = 2L, tailConvergedRelaErr = 1e-6, verbose = TRUE)})
# }
Run the code above in your browser using DataLab