library(dplyr)
data <- tibble(
participant_id = 1,
datetime = c(
"2022-06-21 15:00:00", "2022-06-21 15:55:00",
"2022-06-21 17:05:00", "2022-06-21 17:10:00"
),
confidence = 100,
type = "WALKING"
)
# get bins per hour, even if the interval is longer than one hour
data |>
mutate(datetime = as.POSIXct(datetime)) |>
mutate(lead = lead(datetime)) |>
bin_data(
start_time = datetime,
end_time = lead,
by = "hour"
)
# Alternatively, you can give an integer value to by to create custom-sized
# bins, but only if fixed = FALSE. Not that these bins are not rounded to,
# as in this example 30 minutes, but rather depends on the earliest time
# in the group.
data |>
mutate(datetime = as.POSIXct(datetime)) |>
mutate(lead = lead(datetime)) |>
bin_data(
start_time = datetime,
end_time = lead,
by = 1800L,
fixed = FALSE
)
# More complicated data for showcasing grouping:
data <- tibble(
participant_id = 1,
datetime = c(
"2022-06-21 15:00:00", "2022-06-21 15:55:00",
"2022-06-21 17:05:00", "2022-06-21 17:10:00"
),
confidence = 100,
type = c("STILL", "WALKING", "STILL", "WALKING")
)
# binned_intervals also takes into account the prior grouping structure
out <- data |>
mutate(datetime = as.POSIXct(datetime)) |>
group_by(participant_id) |>
mutate(lead = lead(datetime)) |>
group_by(participant_id, type) |>
bin_data(
start_time = datetime,
end_time = lead,
by = "hour"
)
print(out)
# To get the duration for each bin (note to change the variable names in sum):
purrr::map_dbl(
out$bin_data,
~ sum(as.double(.x$lead) - as.double(.x$datetime),
na.rm = TRUE
)
)
# Or:
out |>
tidyr::unnest(bin_data, keep_empty = TRUE) |>
mutate(duration = .data$lead - .data$datetime) |>
group_by(bin, .add = TRUE) |>
summarise(duration = sum(.data$duration, na.rm = TRUE), .groups = "drop")
Run the code above in your browser using DataLab