# creating data and writing it to disk
stream <- DSD_Gaussians(k=3, d=5, outliers=1, space_limit=c(0,2),
outlier_options = list(outlier_horizon=10))
write_stream(stream, "data.txt", n=10, header = TRUE, sep=",", class=TRUE, write_outliers=TRUE)
# reading the same data back (as a loop)
stream2 <- DSD_ReadCSV(k=3, o=1, "data.txt", sep=",", header = TRUE, loop=TRUE, class="class",
outlier="outlier")
stream2
# get points (fist a single point and then 20 using loop)
get_points(stream2)
p <- get_points(stream2, n=20, outlier=TRUE)
message(paste("Outliers",sum(attr(p,"outlier"))))
# clean up
close_stream(stream2)
file.remove("data.txt")
# example with a part of the kddcup1999 data (take only cont. variables)
file <- system.file("examples", "kddcup10000.data.gz", package="stream")
stream <- DSD_ReadCSV(gzfile(file),
take=c(1, 5, 6, 8:11, 13:20, 23:42), class=42, k=7)
stream
get_points(stream, 5, class = TRUE)
# plot 100 points (projected on the first two principal components)
plot(stream, n=100, method="pc")
close_stream(stream)
Run the code above in your browser using DataLab