# NOT RUN {
## Prep the 'swamps' data
## ======================
data(swamps) # see ?swamps
swamps <- swamps[,-1]
## Assess clustering solutions using cutree() method
## =================================================
## perhaps not the best clustering option, but this is base R
swamps_hclust <- hclust(d = dist(x = log1p(swamps), method = "canberra"),
method = "complete")
## calculate sum-of-AIC values for 10:25 clusters, using the hclust() output
swamps_hclust_aics <- find_optimal(data = swamps, clustering = swamps_hclust,
family = "poisson", cutreeLevels = 10:25)
## Looks like ~20 clusters is where predictive performance levels off
## Note here that the data passed to find_optimal() was actually NOT the
## data used for clustering (transform/distance), rather it was the
## original abundance (response) data of interest
## plot - lower sum-of-AIC valuea indicate 'better' clustering
plot(swamps_hclust_aics)
# }
# NOT RUN {
## Assess clustering solutions by supplying a list of solutions
## ============================================================
## again, we probably wouldn't do this, but for illustrative purposes
## note that we are generating a list of solutions this time
swamps_kmeans <- lapply(X = 2:40,
FUN = function(x, data) {stats::kmeans(x = data, centers = x)$cluster},
data = swamps)
## calculate sum-of-AIC values for the list of clustering solutions
swamps_kmeans_aics <- find_optimal(data = swamps, clustering = swamps_kmeans,
family = "poisson") # note cutreeLevels= argument is not needed
plot(swamps_kmeans_aics)
# }
# NOT RUN {
## See vignette for more explanation than this example
## ============================================================
# }
Run the code above in your browser using DataCamp Workspace