require(datanugget)
      #2-d small example with visualization
      X = rbind.data.frame(matrix(rnorm(10^4, sd = 0.3), ncol = 2),
                matrix(rnorm(10^4, mean = 1, sd = 0.3), ncol = 2))
      #create data nuggets
      my.DN = create.DN(x = X,
                        R = 500,
                        delete.percent = .1,
                        DN.num1 = 500,
                        DN.num2 = 250,
                        no.cores = 0,
                        make.pbs = FALSE)
      #refine data nuggets
      my.DN2 = refine.DN(x = X,
                         DN = my.DN,
                         EV.tol = .9,
                         min.nugget.size = 2,
                         max.splits = 5,
                         no.cores = 0,
                         make.pbs = FALSE)
      #plot raw large dataset
      plot(X)
      #transform weights to get colors for plot
      w_trans = my.DN2$`Data Nuggets`[, "Weight"]/sum(my.DN2$`Data Nuggets`[, "Weight"])
      w_trans = w_trans/quantile(w_trans,0.8)
      col = sapply(w_trans, function(t){rgb(0,min(t,1),0)})
      #plot refined data nugget centers with weights
      #lighter green means more weights
      plot(my.DN2$`Data Nuggets`[, c("Center1",
                                     "Center2")],col=col,lty = 2,pch=16, cex=0.5)
      #K-means Clustering for data nuggets
      DN.clus = DN.Wkmeans(datanugget = my.DN2,
                  k = 2,
                  num.init = 1,
                  max.iterations = 5)
      DN.clus$`Cluster Centers`
      DN.clus$`WWCSS`
      #plot the clustering result for data nuggets
      plot(my.DN2$`Data Nuggets`[, c("Center1",
                                     "Center2")],
          col = DN.clus$`Cluster Assignments for data nuggets`, lty = 2,pch=16, cex=0.5)
      points(DN.clus$`Cluster Centers`, col = 1:2, pch = 8, cex = 5)
      #plot the clustering result for raw large dataset
      plot(X, col = DN.clus$`Cluster Assignments for original dataset`)
Run the code above in your browser using DataLab