# NOT RUN {
# ===========================================================================
# Play random numbers. See speed.
# ===========================================================================
N = 10000L # Number of points.
d = 500L # Dimensionality.
K = 100L # Number of clusters.
# Create a data matrix, about 95% of which are zeros.
dat = matrix(unlist(lapply(1L : N, function(x)
{
tmp = numeric(d)
# Nonzero entries.
Nnz = as.integer(max(1, d * runif(1, 0, 0.05)))
tmp[sample(d, Nnz)] = runif(Nnz) + rnorm(Nnz)
tmp
})), nrow = d); gc()
# Convert to sparse representation.
# GMKMcharlie::d2s() acheives the same.
sparsedat = apply(dat, 2, function(x)
{
nonz = which(x != 0)
list(nonz, x[nonz])
}); gc()
centroidInd = sample(length(sparsedat), K)
# Test speed using dense representation.
centroid = dat[, centroidInd]
system.time({rst = GMKMcharlie::KM(
X = dat, centroid = centroid, maxIter = 100,
minkP = 2, maxCore = 2, verbose = TRUE)})
# Test speed using sparse representation.
sparseCentroid = sparsedat[centroidInd]
system.time({sparseRst = GMKMcharlie::KMsparse(
X = sparsedat, d = d, centroid = sparseCentroid,
maxIter = 100, minkP = 2, maxCore = 2, verbose = TRUE)})
# }
Run the code above in your browser using DataLab