# NOT RUN {
N = 5000L # Number of points.
d = 500L # Dimensionality.
K = 50L # Number of clusters.
# Create a data matrix, about 95% of which are zeros.
dat = matrix(unlist(lapply(1L : N, function(x)
{
tmp = numeric(d)
# Nonzero entries.
Nnz = as.integer(max(1, d * runif(1, 0, 0.05)))
tmp[sample(d, Nnz)] = runif(Nnz) + rnorm(Nnz)
tmp
})), nrow = d); gc()
# Convert to sparse representation.
# GMKMcharlie::d2s() is equivalent.
sparsedat = apply(dat, 2, function(x)
{
nonz = which(x != 0)
list(nonz, x[nonz])
}); gc()
centroidInd = sample(length(sparsedat), K)
# Test speed using sparse representation.
sparseCentroid = sparsedat[centroidInd]
# Size upper bounds vary in [N / K * 1.5, N / K * 2]
sizeConstraints = as.integer(round(runif(K, N / K * 1.5, N / K * 2)))
system.time({sparseRst = GMKMcharlie::KMconstrainedSparse(
X = sparsedat, d = d, centroid = sparseCentroid,
clusterWeightUB = sizeConstraints,
tailConvergedRelaErr = 1e-6,
maxIter = 100, minkP = 2, maxCore = 2, verbose = TRUE)})
# }
Run the code above in your browser using DataLab