data(iris)
iris <- as.matrix(iris[,1:4])
res <- dbscan(iris, .4, 4)
pairs(iris, col=res+1)
## compare with dbscan from package fpc (only if installed)
if (requireNamespace("fpc", quietly = TRUE)) {
res2 <- fpc::dbscan(iris, .4, 4)
res2 <- res2$cluster
pairs(iris, col=res2+1)
## make sure both version produce the same results
all(res == res2)
}
## find suitable eps parameter (look at knee)
kNNdistplot(iris, k=4)
## example data from fpc
set.seed(665544)
n <- 600
x <- cbind(runif(10, 0, 10)+rnorm(n, sd=0.2), runif(10, 0, 10) + rnorm(n,
sd=0.2))
res <- dbscan::dbscan(x, .2, 4)
plot(x, col=res+1)
## compare speed against fpc version (if microbenchmark is installed)
if (requireNamespace("microbenchmark", quietly = TRUE)) {
t_dbscan <- microbenchmark::microbenchmark(
dbscan::dbscan(x, .2, 4), times = 10, unit="ms")
t_dbscan_linear <- microbenchmark::microbenchmark(
dbscan::dbscan(x, .2, 4, search="linear"), times = 10, unit="ms")
t_fpc <- microbenchmark::microbenchmark(
fpc::dbscan(x, .2, 4), times = 10, unit="ms")
boxplot(rbind(t_fpc, t_dbscan_linear, t_dbscan),
names=c("fpc (R)", "dbscan (linear)", "dbscan (kdtree)"),
main = "Runtime comparison in ms")
## speedup of the kd-tree-based version compared to the fpc implementation
median(t_fpc$time)/median(t_dbscan$time)
}
Run the code above in your browser using DataCamp Workspace