# --- load data
data("banknote")
dat <- banknote[-1]
n <- nrow(dat) #sample size
nc <- 2 #number of clusters
# fit 2 clusters using the default k-median initialization
# In real applications set 'init.nstart' as large as possibile
set.seed(101)
fit1 <- gmix(dat, K = nc, init.nstart = 1)
print(fit1)
# plot partition (default)
plot(x = fit1, data = dat)
# plot partition onto the first 3 principal component coordinates
plot(x = fit1, data = prcomp(dat)$x, margins = c(1,2,3),
pch_cl = c("A", "B"), col_cl = c("#4285F4", "#0F9D58"),
main = "Principal Components")
# user-defined random initialization with hard assignment labels
set.seed(102)
i2 <- sample(1:nc, size = n, replace = TRUE)
fit2 <- gmix(dat, K = 2, init = i2)
plot(x=fit2, data = dat)
# user-defined smooth "toy" initialization:
# 50% of the points are assigned to cluster 1 with probability 0.95 and to
# cluster 2 with probability 5%. The remaining data points are assigned to
# cluster 1 with probability 10% and to cluster 2 with probability 10%
#
set.seed(103)
idx <- sample(c(TRUE, FALSE), size = n, replace = TRUE)
i3 <- matrix(0, nrow = n, ncol = nc)
i3[idx, ] <- c(0.9, 0.1)
i3[!idx, ] <- c(0.1, 0.9)
# fit
fit3 <- gmix(dat, K = nc, init = i3)
plot(x=fit3, data = dat)
# user-defined function for initialization
# this one produces a 0-1 hard posterior matrix W based on kmeans
#
compute_init <- function(data, K){
cl <- kmeans(data, K, nstart=1, iter.max=10)$cluster
W <- sapply(seq(K), function(x) as.numeric(cl==x))
return(W)
}
fit4 <- gmix(dat, K = nc, init = compute_init)
plot(fit4, data = dat)
Run the code above in your browser using DataLab