data <- mtcars # Size nrow(data) = 32
# For single variables ------------------------------------------------------
outliers_list <- check_outliers(data$mpg) # Find outliers
outliers_list # Show the row index of the outliers
as.numeric(outliers_list) # The object is a binary vector...
filtered_data <- data[!outliers_list, ] # And can be used to filter a dataframe
nrow(filtered_data) # New size, 28 (4 outliers removed)
# Find all observations beyond +/- 2 SD
check_outliers(data$mpg, method = "zscore", threshold = 2)
# For dataframes ------------------------------------------------------
check_outliers(data) # It works the same way on dataframes
# You can also use multiple methods at once
outliers_list <- check_outliers(data, method = c(
"mahalanobis",
"iqr",
"zscore"
))
outliers_list
# Using `as.data.frame()`, we can access more details!
outliers_info <- as.data.frame(outliers_list)
head(outliers_info)
outliers_info$Outlier # Including the probability of being an outlier
# And we can be more stringent in our outliers removal process
filtered_data <- data[outliers_info$Outlier < 0.1, ]
# We can run the function stratified by groups using `{dplyr}` package:
if (require("poorman")) {
iris %>%
group_by(Species) %>%
check_outliers()
}
if (FALSE) {
# You can also run all the methods
check_outliers(data, method = "all")
# For statistical models ---------------------------------------------
# select only mpg and disp (continuous)
mt1 <- mtcars[, c(1, 3, 4)]
# create some fake outliers and attach outliers to main df
mt2 <- rbind(mt1, data.frame(
mpg = c(37, 40), disp = c(300, 400),
hp = c(110, 120)
))
# fit model with outliers
model <- lm(disp ~ mpg + hp, data = mt2)
outliers_list <- check_outliers(model)
if (require("see")) {
plot(outliers_list)
}
insight::get_data(model)[outliers_list, ] # Show outliers data
}
Run the code above in your browser using DataLab