# Simple example (with one processor, because we don't want to require the
# installation of package nws here:
  x <- big.matrix(100000, 3, init=0, type="double")
  x[seq(1,100000,by=2),] <- rnorm(150000)
  x[seq(2,100000,by=2),] <- rnorm(150000, 5, 1)
  head(x)
  ans <- kmeans.big.matrix(x, 2, nstart=5)    # Sequential multiple starts.
  # To use NWS, try something like the following:
  library(nws)
    s <- sleigh(nwsHost='yourhostname.xxx.yyy.zzz', workerCount=2)
    ans <- kmeans.big.matrix(x, 2, nstart=5, parallel='nws', nwssleigh=s)
    stopSleigh(s)
  # Both the following are run iteratively, but with less memory overhead using
  # kmeans.big.matrix.  Note that this first gc() doesn't reflect the C++
  # memory usage for the big.matrix, but the maximum memory used is about
  # 35 MB after kmeans.big.matrix().
  gc(reset=TRUE)
  time.new <- system.time(print(kmeans.big.matrix(x, 2, nstart=5)$centers))
  gc()
  y <- x[,]
  rm(x)
  # In contrast, the regular kmeans() really burns through the memory:
  gc(reset=TRUE)
  time.old <- system.time(print(kmeans(y, 2, nstart=5)$centers))
  gc()
  # The new kmeans() centers should match the old kmeans() centers, without
  # the memory overhead running more quickly; it isn't a problem with the guts of the 
  # kmeans() implementation (the algorithm is in C, well-implemented), but in
  # the traditional C/R interface and R code managing the objects and nstart:
  time.new
  time.oldRun the code above in your browser using DataLab