# \donttest{
## ------------------------------------------------------------
##
## satellite data: convert some of the classes to "outliers"
## unsupervised isopro analysis
##
## ------------------------------------------------------------
## load data, make three of the classes into outliers
data(Satellite, package = "mlbench")
is.outlier <- is.element(Satellite$classes,
c("damp grey soil", "cotton crop", "vegetation stubble"))
## remove class labels, make unsupervised data
x <- Satellite[, names(Satellite)[names(Satellite) != "classes"]]
## isopro calls
i.rnd <- isopro(data=x, method = "rnd", sampsize=32)
i.uns <- isopro(data=x, method = "unsupv", sampsize=32)
i.aut <- isopro(data=x, method = "auto", sampsize=32)
## AUC and precision recall (computed using true class label information)
perf <- cbind(get.iso.performance(is.outlier,i.rnd$howbad),
get.iso.performance(is.outlier,i.uns$howbad),
get.iso.performance(is.outlier,i.aut$howbad))
colnames(perf) <- c("rnd", "unsupv", "auto")
print(perf)
## ------------------------------------------------------------
##
## boston housing analysis
## isopro analysis using a previous VarPro (supervised) object
##
## ------------------------------------------------------------
data(BostonHousing, package = "mlbench")
## call varpro first and then isopro
o <- varpro(medv~., BostonHousing)
o.iso <- isopro(o)
## identify data with extreme percentiles
print(BostonHousing[o.iso$howbad <= quantile(o.iso$howbad, .01),])
## ------------------------------------------------------------
##
## boston housing analysis
## supervised isopro analysis - direct call using formula/data
##
## ------------------------------------------------------------
data(BostonHousing, package = "mlbench")
## direct approach uses formula and data options
o.iso <- isopro(formula=medv~., data=BostonHousing)
## identify data with extreme percentiles
print(BostonHousing[o.iso$howbad <= quantile(o.iso$howbad, .01),])
## ------------------------------------------------------------
##
## monte carlo experiment to study different methods
## unsupervised isopro analysis
##
## ------------------------------------------------------------
## monte carlo parameters
nrep <- 25
n <- 1000
## simulation function
twodimsim <- function(n=1000) {
cluster1 <- data.frame(
x = rnorm(n, -1, .4),
y = rnorm(n, -1, .2)
)
cluster2 <- data.frame(
x = rnorm(n, +1, .2),
y = rnorm(n, +1, .4)
)
outlier <- data.frame(
x = -1,
y = 1
)
x <- data.frame(rbind(cluster1, cluster2, outlier))
is.outlier <- c(rep(FALSE, 2 * n), TRUE)
list(x=x, is.outlier=is.outlier)
}
## monte carlo loop
hbad <- do.call(rbind, lapply(1:nrep, function(b) {
cat("iteration:", b, "\n")
## draw the data
simO <- twodimsim(n)
x <- simO$x
is.outlier <- simO$is.outlier
## iso pro calls
i.rnd <- isopro(data=x, method = "rnd")
i.uns <- isopro(data=x, method = "unsupv")
i.aut <- isopro(data=x, method = "auto")
## save results
c(tail(i.rnd$howbad,1),
tail(i.uns$howbad,1),
tail(i.aut$howbad,1))
}))
## compare performance
colnames(hbad) <- c("rnd", "unsupv", "auto")
print(summary(hbad))
boxplot(hbad,col="blue",ylab="outlier percentile value")
# }
Run the code above in your browser using DataLab