stream <- DSD_Gaussians(k=3, d=2)
dstream <- DSC_DStream(gridsize=0.05, Cm=1.5)
update(dstream, stream, 500)
plot(dstream, stream)
# Evaluate micro-clusters
# Note: we use here only n=500 points for evaluation to speed up execution
evaluate(dstream, stream, measure=c("numMicro","numMacro","purity","crand", "SSQ"),
n=100)
# DStream also provides macro clusters. Evaluate macro clusters with type="macro"
plot(dstream, stream, type="macro")
evaluate(dstream, stream, type ="macro",
measure=c("numMicro","numMacro","purity","crand", "SSQ"), n=100)
# Points are by default assigned to the closest micro clusters for evalution.
# However, points can also be assigned to the closest macro-cluster using
# assign="macro".
evaluate(dstream, stream, type ="macro", assign="macro",
measure=c("numMicro","numMacro","purity","crand", "SSQ"), n=100)
# Evaluate an evolving data stream
stream <- DSD_Benchmark(1)
dstream <- DSC_DStream(gridsize=0.05, lambda=0.1)
evaluate_cluster(dstream, stream, type="macro", assign="micro",
measure=c("numMicro","numMacro","purity","crand"),
n=600, horizon=100)
if (FALSE) {
# animate the clustering process
reset_stream(stream)
dstream <- DSC_DStream(gridsize=0.05, lambda=0.1)
animate_cluster(dstream, stream, horizon=100, n=5000,
measure=c("crand"), type="macro", assign="micro",
plot.args = list(type="both", xlim=c(0,1), ylim=c(0,1)))
}
# a simple callback example
# this example requires DSC_MCOD in the streamMOA package
CustomCallback <- function() {
env <- environment()
all_measures <- c("LowestWeightPercentage")
internal_measures <- c()
external_measures <- all_measures
outlier_measures <- c()
this <- list(description = "Custom evaluation callback",
env = environment())
class(this) <- c("CustomCallback", "EvalCallback")
this
}
evaluate_callback.CustomCallback <- function(cb_obj, dsc, measure, points,
actual, predict, outliers,
predict_outliers,
predict_outliers_corrid,
centers, noise) {
r <- list()
if("LowestWeightPercentage" %in% measure)
r$LowestWeightPercentage=min(get_weights(dsc))/sum(get_weights(dsc))
r
}
stream <- DSD_Gaussians(k = 3, d = 2, p = c(0.2, 0.4, 0.4))
km <- DSC_Kmeans(3)
update(km, stream, n=500)
evaluate_with_callbacks(km, stream, type="macro", n=500,
measure = c("crand","LowestWeightPercentage"),
callbacks = list(cc=CustomCallback()))
Run the code above in your browser using DataLab