library(outliertree)
### random data frame with an obvious outlier
nrows = 100
set.seed(1)
df = data.frame(
numeric_col1 = c(rnorm(nrows - 1), 1e6),
numeric_col2 = rgamma(nrows, 1),
categ_col = sample(c('categA', 'categB', 'categC'),
size = nrows, replace = TRUE)
)
### test data frame with another obvious outlier
nrows_test = 50
df_test = data.frame(
numeric_col1 = rnorm(nrows_test),
numeric_col2 = c(-1e6, rgamma(nrows_test - 1, 1)),
categ_col = sample(c('categA', 'categB', 'categC'),
size = nrows_test, replace = TRUE)
)
### fit model on training data
outliers_model = outlier.tree(df, outliers_print=FALSE, nthreads=1)
### find the test outlier
test_outliers = predict(outliers_model, df_test,
outliers_print = 1, return_outliers = TRUE,
nthreads = 1)
### retrieve the outlier info (for row 1) as an R list
test_outliers[[1]]
### to turn it into a 6-column table:
# dt = t(data.table::as.data.table(test_outliers))
Run the code above in your browser using DataLab