### --- maxSE() methods -------------------------------------------
(mets <- eval(formals(maxSE)$method))
fk <- c(2,3,5,4,7,8,5,4)
sk <- c(1,1,2,1,1,3,1,1)/2
## use plot.clusGap():
plot(structure(class="clusGap", list(Tab = cbind(gap=fk, SE.sim=sk))))
## Note that 'firstmax' and 'globalmax' are always at 3 and 6 :
sapply(c(1/4, 1,2,4), function(SEf)
sapply(mets, function(M) maxSE(fk, sk, method = M, SE.factor = SEf)))
### --- clusGap() -------------------------------------------------
## ridiculously nicely separated clusters in 3 D :
x <- rbind(matrix(rnorm(150, sd = 0.1), ncol = 3),
matrix(rnorm(150, mean = 1, sd = 0.1), ncol = 3),
matrix(rnorm(150, mean = 2, sd = 0.1), ncol = 3),
matrix(rnorm(150, mean = 3, sd = 0.1), ncol = 3))
## Slightly faster way to use pam (see below)
pam1 <- function(x,k) list(cluster = pam(x,k, cluster.only=TRUE))
doExtras <- cluster:::doExtras()
## or set it explicitly to TRUE for the following
if(doExtras) {
## Note we use B = 60 in the following examples to keep them "speedy".
## ---- rather keep the default B = 500 for your analysis!
## note we can pass 'nstart = 20' to kmeans() :
gskmn <- clusGap(x, FUN = kmeans, nstart = 20, K.max = 8, B = 60)
gskmn #-> its print() method
plot(gskmn, main = "clusGap(., FUN = kmeans, n.start=20, B= 60)")
set.seed(12); system.time(
gsPam0 <- clusGap(x, FUN = pam, K.max = 8, B = 60)
)
set.seed(12); system.time(
gsPam1 <- clusGap(x, FUN = pam1, K.max = 8, B = 60)
)
## and show that it gives the same:
stopifnot(identical(gsPam1[-4], gsPam0[-4]))
gsPam1
print(gsPam1, method="globalSEmax")
print(gsPam1, method="globalmax")
}
gs.pam.RU <- clusGap(ruspini, FUN = pam1, K.max = 8, B = 60)
gs.pam.RU
plot(gs.pam.RU, main = "Gap statistic for the 'ruspini' data")
mtext("k = 4 is best .. and k = 5 pretty close")
## This takes a minute..
## No clustering ==> k = 1 ("one cluster") should be optimal:
Z <- matrix(rnorm(256*3), 256,3)
gsP.Z <- clusGap(Z, FUN = pam1, K.max = 8, B = 200)
plot(gsP.Z)
gsP.Z
Run the code above in your browser using DataLab