library(dplyr)
# Localize the outlying sessions above a certain threshold
california_ev_sessions %>%
sample_frac(0.05) %>%
plot_points(start = 3)
# For example sessions that start before 5 AM or that are
# longer than 20 hours are considered outliers
sessions_clean <- california_ev_sessions %>%
sample_frac(0.05) %>%
cut_sessions(
start = 3,
connection_hours_max = 20,
connection_start_min = 5
)
plot_points(sessions_clean, start = 3)
Run the code above in your browser using DataLab