## cluster items in Groceries with support > 5\%
data("Groceries")
s <- Groceries[,itemFrequency(Groceries)>0.05]
d_jaccard <- dissimilarity(s, which = "items")
plot(hclust(d_jaccard, method = "ward"))
## cluster transactions for a sample of Adult
data("Adult")
s <- sample(Adult, 500)
## calculate Jaccard distances and do hclust
d_jaccard <- dissimilarity(s)
hc <- hclust(d_jaccard)
plot(hc)
## get 20 clusters and look at the difference of the item frequencies (bars)
## for the top 20 items) in cluster 1 compared to the data (line)
assign <- cutree(hc, 20)
itemFrequencyPlot(s[assign==1], population=s, topN=20)
## calculate affinity-based distances and do hclust
d_affinity <- dissimilarity(s, method = "affinity")
hc <- hclust(d_affinity)
plot(hc)
## cluster rules
rules <- apriori(Adult, parameter=list(support=0.3))
rules <- subset(rules, subset = lift > 2)
## use affinity
## we need to supply the item affinities from the dataset (sample)
d_affinity <- dissimilarity(rules, method = "affinity",
args = list(affinity = affinity(s)))
plot(hclust(d_affinity))
Run the code above in your browser using DataLab