# NOT RUN {
set.seed(1)
id <- 14000:17000
# Replace missing values
modelmd <- model_missing_data(data = GBload[id, -1], tau = 0.5,
S = c(48, 336), indices.to.fix = seq_len(nrow(GBload[id, ])),
consider.as.missing = 0, min.val = 0)
# Impute missing values
data.imputed <- impute_modelled_data(modelmd)
#Detect outliers
system.time(
o.ident <- detect_outliers(data = data.imputed, S = c(48, 336))
)
# Plot of identified outliers in time series
outlier.vector <- rep(F,length(data.imputed))
outlier.vector[o.ident$outlier.pos] <- T
plot(data.imputed, type = "o", col=1 + 1 * outlier.vector,
pch = 1 + 18 * outlier.vector)
# table of identified raw outliers and corresponding probs being outlying data
df <- data.frame(o.ident$outlier.pos.raw,unlist(o.ident$outlier.probs)[o.ident$outlier.pos.raw])
colnames(df) <- c("Outlier position", "Probability of being outlying data")
df
# Plot of feature matrix
plot.ts(o.ident$features, type = "o",
col = 1 + outlier.vector,
pch = 1 + 1 * outlier.vector)
# table of outliers and corresponding features/ feature combinations,
# which caused assignment to outlier cluster
# Detect outliers with feat.int = T
set.seed(1)
system.time(
o.ident <- detect_outliers(data = data.imputed, S = c(48, 336), feat.inf = T)
)
feature.imp <- unlist(lapply(o.ident$inf.feature.combinations,
function(x) paste(o.ident$feature.inf.tab[x], collapse = " | ")))
df <- data.frame(o.ident$outlier.pos.raw,o.ident$outlier.probs[o.ident$outlier.pos.raw],
feature.imp[as.numeric(names(feature.imp)) %in% o.ident$outlier.pos.raw])
colnames(df) <- c("Outlier position", "Probability being outlying data", "Responsible features")
View(df)
# }
Run the code above in your browser using DataLab