KQM-package: K Quantiles Medoids (KQM) Clustering.

Description

K Quantiles Medoids (KQM) clustering applies quantiles to divide data of each dimension into K mean intervals. Combining quantiles of all the dimensions of the data and fully permuting quantiles on each dimension is the strategy to determine a pool of candidate initial cluster centers. To find the best initial cluster centers from the pool of candidate initial cluster centers, two methods based on quantile stategy and PAM strategy respectively are proposed. During a clustering process, medoids of clusters are used to update cluster centers in each iteration. Comparison between KQM and the method of randomly selecting initial cluster centers shows that KQM is almost always getting clustering results with smaller total sum squares of distances.

Arguments

Author

Yarong Yang, Nader Ebrahimi, Yoram Rubin, and Jacob Zhang

Details

Package:	KQM
Type:	Package
Version:	1.1.1
Date:	2025-10-13
License:	GPL-2

References

Yarong Yang, Nader Ebrahimi, Yoram Rubin, and Jacob Zhang.(2025) KQM: K Quantiles Medoids (KQM) Clustering. technical report in preparation

Examples

Run this code


# Generate 10 bivariate normal samples
require(MASS)
mu1 <- c(0, 0)          
sigma1 <- matrix(c(1, 0.5, 0.5, 1), nrow=2)  
SP1 <- mvrnorm(n=10, mu=mu1, Sigma=sigma1)

# Generate another 10 bivariate normal samples
mu2<-c(1,1)
sigma2<-matrix(c(1,0,0,1),nrow=2)
SP2<-mvrnorm(n=10,mu=mu2,Sigma=sigma2)

# Generate 10 more new bivariate normal samples
mu3<-c(2,2)
sigma3<-matrix(c(1,0.5,0.5,1),nrow=2)
SP3<-mvrnorm(n=10,mu=mu3,Sigma=sigma3)

# Combine the three groups of bivariate normal samples
data<-rbind(SP1,SP2,SP3)

# Conduct KQM analysis with K=3 by the "YY" method and quantile strategy is 
# selected to determine the initial cluster centers
Res<-KQM(data,3,method="YY",iteration=1000,type=1,style=1)
names(Res@Classes[[1]])<-rep("red",length(Res@Classes[[1]]))
names(Res@Classes[[2]])<-rep("blue",length(Res@Classes[[2]]))
names(Res@Classes[[3]])<-rep("green",length(Res@Classes[[3]]))
Cols<-names(sort(c(Res@Classes[[1]],Res@Classes[[2]],Res@Classes[[3]])))
plot(data[,1],data[,2],type="p",pch=19,col=Cols,lwd=2,xlab=paste("Total SSE = ",
     round(Res@SSE[length(Res@SSE)],2),sep=""),ylab="",
     main="KQM Clustering Results, 'YY' method, style 1")
points(Res@Centers,pch=5,col=c("red","blue","green")) 

#  Compare the clustering results with the original samples 
oldpar<-par(mfrow=c(1,2))
plot(data[,1],data[,2],type="p",pch=19,col=rep(c("sky blue","orange","purple"),rep(10,3)),
     lwd=2,xlab="",ylab="",main="Original Data")
plot(data[,1],data[,2],type="p",pch=19,col=Cols,lwd=2,xlab=paste("Total SSE = ",
     round(Res@SSE[length(Res@SSE)],2),sep=""),ylab="",
     main="KQM Clustering Results, 'YY' method, style 1")
points(Res@Centers,pch=5,col=c("red","blue","green")) 
on.exit<-par(oldpar)

# Conduct KQM analysis with K=3 and randomly picking 3 samples as initial 
#  cluster centers
Res2<-KQM(data,3,method="initial",initial=data[sample(1:nrow(data),3),],iteration=1000,type=1)
names(Res2@Classes[[1]])<-rep("red",length(Res2@Classes[[1]]))
names(Res2@Classes[[2]])<-rep("blue",length(Res2@Classes[[2]]))
names(Res2@Classes[[3]])<-rep("green",length(Res2@Classes[[3]]))
Cols2<-names(sort(c(Res2@Classes[[1]],Res2@Classes[[2]],Res2@Classes[[3]])))
plot(data[,1],data[,2],type="p",pch=19,col=Cols2,lwd=2,xlab=paste("Total SSE = ",
     round(Res2@SSE[length(Res2@SSE)],2),sep=""),ylab="",
     main="KQM Clustering Results, 'initial' method")
points(Res2@Centers,pch=5,col=c("red","blue","green"))

#  Compare the clustering results by the above "YY" method and the "initial" method
oldpar<-par(mfrow=c(1,2))
plot(data[,1],data[,2],type="p",pch=19,col=Cols,lwd=2,xlab=paste("Total SSE = ",
     round(Res@SSE[length(Res@SSE)],2),sep=""),ylab="",
     main="KQM Clustering Results, 'YY' method, style 1")
points(Res@Centers,pch=5,col=c("red","blue","green")) 
plot(data[,1],data[,2],type="p",pch=19,col=Cols2,lwd=2,xlab=paste("Total SSE = ",
     round(Res2@SSE[length(Res2@SSE)],2),sep=""),ylab="",
     main="KQM Clustering Results, 'initial' method")
points(Res2@Centers,pch=5,col=c("red","blue","green")) 
on.exit(oldpar)

# Conduct KQM analysis with K=3 by the "YY" method and PAM strategy is selected to 
# determine the initial cluster centers
Res.2<-KQM(data,3,method="YY",iteration=1000,type=1,style=2)
names(Res.2@Classes[[1]])<-rep("red",length(Res.2@Classes[[1]]))
names(Res.2@Classes[[2]])<-rep("blue",length(Res.2@Classes[[2]]))
names(Res.2@Classes[[3]])<-rep("green",length(Res.2@Classes[[3]]))
Cols.2<-names(sort(c(Res.2@Classes[[1]],Res.2@Classes[[2]],Res.2@Classes[[3]])))
plot(data[,1],data[,2],type="p",pch=19,col=Cols.2,lwd=2,xlab=paste("Total SSE = ",
     round(Res.2@SSE[length(Res.2@SSE)],2),sep=""),ylab="",
     main="KQM Clustering Results, 'YY' method, style 2")
points(Res.2@Centers,pch=5,col=c("red","blue","green")) 

#  Compare the clustering results by the "YY" method with style 1 and style 2 respectively 
oldpar<-par(mfrow=c(1,2))
plot(data[,1],data[,2],type="p",pch=19,col=Cols,lwd=2,xlab=paste("Total SSE = ",
     round(Res@SSE[length(Res@SSE)],2),sep=""),ylab="",
     main="KQM Clustering Results, YY method, style 1")
points(Res@Centers,pch=5,col=c("red","blue","green")) 
plot(data[,1],data[,2],type="p",pch=19,col=Cols.2,lwd=2,xlab=paste("Total SSE = ",
     round(Res.2@SSE[length(Res.2@SSE)],2),sep=""),ylab="",
     main="KQM Clustering Results, 'YY' method, style 2")
points(Res.2@Centers,pch=5,col=c("red","blue","green")) 
on.exit<-par(oldpar)

Run the code above in your browser using DataLab