Learn R Programming

healthyR.ts

The goal of healthyR.ts is to provide a consistent verb framework for performing time series analysis and forecasting on both administrative and clinical hospital data.

Installation

You can install the released version of healthyR.ts from CRAN with:

install.packages("healthyR.ts")

And the development version from GitHub with:

# install.packages("devtools")
devtools::install_github("spsanderson/healthyR.ts")

Example

This is a basic example which shows you how to generate random walk data.

library(healthyR.ts)
library(ggplot2)

df <- ts_random_walk()

head(df)
#> # A tibble: 6 × 4
#>     run     x        y cum_y
#>   <dbl> <dbl>    <dbl> <dbl>
#> 1     1     1 0.0521   1052.
#> 2     1     2 0.000486 1053.
#> 3     1     3 0.0567   1112.
#> 4     1     4 0.125    1252.
#> 5     1     5 0.0825   1355.
#> 6     1     6 0.00340  1360.

Now that the data has been generated, lets take a look at it.

df %>%
   ggplot(
       mapping = aes(
           x = x
           , y = cum_y
           , color = factor(run)
           , group = factor(run)
        )
    ) +
    geom_line(alpha = 0.8) +
    ts_random_walk_ggplot_layers(df)

That is still pretty noisy, so lets see this in a different way. Lets clear this up a bit to make it easier to see the full range of the possible volatility of the random walks.

library(dplyr)
library(ggplot2)

df %>%
    group_by(x) %>%
    summarise(
        min_y = min(cum_y),
        max_y = max(cum_y)
    ) %>%
    ggplot(
        aes(x = x)
    ) +
    geom_line(aes(y = max_y), color = "steelblue") +
    geom_line(aes(y = min_y), color = "firebrick") +
    geom_ribbon(aes(ymin = min_y, ymax = max_y), alpha = 0.2) +
    ts_random_walk_ggplot_layers(df)

This package comes with a wide variety of functions from Data Generators to Statistics functions. The function ts_random_walk() in the above example is a Data Generator.

Let’s take a look at a plotting function.

data_tbl <- data.frame(
  date_col = seq.Date(
    from = as.Date("2020-01-01"),
    to   = as.Date("2022-06-01"),
    length.out = 365*2 + 180
    ),
  value = rnorm(365*2+180, mean = 100)
)

ts_calendar_heatmap_plot(
  .data          = data_tbl
  , .date_col    = date_col
  , .value_col   = value
  , .interactive = FALSE
)

Time Series Clustering via Features:

data_tbl <- ts_to_tbl(AirPassengers) %>%
  mutate(group_id = rep(1:12, 12))

output <- ts_feature_cluster(
  .data = data_tbl,
  .date_col = date_col,
  .value_col = value,
  group_id,
  .features = c("acf_features","entropy"),
  .scale = TRUE,
  .prefix = "ts_",
  .centers = 3
)

ts_feature_cluster_plot(
  .data = output,
  .date_col = date_col,
  .value_col = value,
  .center = 2,
  group_id
)
#> $plot
#> $plot$static_plot
#> 
#> $plot$plotly_plot
#> 
#> 
#> $data
#> $data$original_data
#> # A tibble: 144 × 4
#>    index     date_col   value group_id
#>    <yearmon> <date>     <dbl>    <int>
#>  1 Jan 1949  1949-01-01   112        1
#>  2 Feb 1949  1949-02-01   118        2
#>  3 Mar 1949  1949-03-01   132        3
#>  4 Apr 1949  1949-04-01   129        4
#>  5 May 1949  1949-05-01   121        5
#>  6 Jun 1949  1949-06-01   135        6
#>  7 Jul 1949  1949-07-01   148        7
#>  8 Aug 1949  1949-08-01   148        8
#>  9 Sep 1949  1949-09-01   136        9
#> 10 Oct 1949  1949-10-01   119       10
#> # ℹ 134 more rows
#> 
#> $data$kmm_data_tbl
#> # A tibble: 3 × 3
#>   centers k_means  glance          
#>     <int> <list>   <list>          
#> 1       1 <kmeans> <tibble [1 × 4]>
#> 2       2 <kmeans> <tibble [1 × 4]>
#> 3       3 <kmeans> <tibble [1 × 4]>
#> 
#> $data$user_item_tbl
#> # A tibble: 12 × 8
#>    group_id ts_x_acf1 ts_x_acf10 ts_diff1_acf1 ts_diff1_acf10 ts_diff2_acf1
#>       <int>     <dbl>      <dbl>         <dbl>          <dbl>         <dbl>
#>  1        1     0.741       1.55       -0.0995          0.474       -0.182 
#>  2        2     0.730       1.50       -0.0155          0.654       -0.147 
#>  3        3     0.766       1.62       -0.471           0.562       -0.620 
#>  4        4     0.715       1.46       -0.253           0.457       -0.555 
#>  5        5     0.730       1.48       -0.372           0.417       -0.649 
#>  6        6     0.751       1.61        0.122           0.646        0.0506
#>  7        7     0.745       1.58        0.260           0.236       -0.303 
#>  8        8     0.761       1.60        0.319           0.419       -0.319 
#>  9        9     0.747       1.59       -0.235           0.191       -0.650 
#> 10       10     0.732       1.50       -0.0371          0.269       -0.510 
#> 11       11     0.746       1.54       -0.310           0.357       -0.556 
#> 12       12     0.735       1.51       -0.360           0.294       -0.601 
#> # ℹ 2 more variables: ts_seas_acf1 <dbl>, ts_entropy <dbl>
#> 
#> $data$cluster_tbl
#> # A tibble: 12 × 9
#>    cluster group_id ts_x_acf1 ts_x_acf10 ts_diff1_acf1 ts_diff1_acf10
#>      <int>    <int>     <dbl>      <dbl>         <dbl>          <dbl>
#>  1       1        1     0.741       1.55       -0.0995          0.474
#>  2       1        2     0.730       1.50       -0.0155          0.654
#>  3       2        3     0.766       1.62       -0.471           0.562
#>  4       2        4     0.715       1.46       -0.253           0.457
#>  5       2        5     0.730       1.48       -0.372           0.417
#>  6       1        6     0.751       1.61        0.122           0.646
#>  7       1        7     0.745       1.58        0.260           0.236
#>  8       1        8     0.761       1.60        0.319           0.419
#>  9       2        9     0.747       1.59       -0.235           0.191
#> 10       2       10     0.732       1.50       -0.0371          0.269
#> 11       2       11     0.746       1.54       -0.310           0.357
#> 12       2       12     0.735       1.51       -0.360           0.294
#> # ℹ 3 more variables: ts_diff2_acf1 <dbl>, ts_seas_acf1 <dbl>, ts_entropy <dbl>
#> 
#> 
#> $kmeans_object
#> $kmeans_object[[1]]
#> K-means clustering with 2 clusters of sizes 5, 7
#> 
#> Cluster means:
#>   ts_x_acf1 ts_x_acf10 ts_diff1_acf1 ts_diff1_acf10 ts_diff2_acf1 ts_seas_acf1
#> 1 0.7456468   1.568532     0.1172685      0.4858013    -0.1799728    0.2876449
#> 2 0.7387865   1.528308    -0.2909349      0.3638392    -0.5916245    0.2930543
#>   ts_entropy
#> 1  0.4918321
#> 2  0.6438176
#> 
#> Clustering vector:
#>  [1] 1 1 2 2 2 1 1 1 2 2 2 2
#> 
#> Within cluster sum of squares by cluster:
#> [1] 0.3704304 0.3660630
#>  (between_SS / total_SS =  59.8 %)
#> 
#> Available components:
#> 
#> [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
#> [6] "betweenss"    "size"         "iter"         "ifault"

Time to/from Event Analysis

library(dplyr)
df <- ts_to_tbl(AirPassengers) %>% select(-index)

ts_time_event_analysis_tbl(
  .data = df,
  .horizon = 6,
  .date_col = date_col,
  .value_col = value,
  .direction = "both"
) %>%
  ts_event_analysis_plot()


ts_time_event_analysis_tbl(
  .data = df,
  .horizon = 6,
  .date_col = date_col,
  .value_col = value,
  .direction = "both"
) %>%
  ts_event_analysis_plot(.plot_type = "individual")

ARIMA Simulators

output <- ts_arima_simulator()
output$plots$static_plot

Automatic Workflows which can be thought of as Boiler Plate Time Series modeling. This is in it’s infancy in this package.

Auto WorkflowsBoilerplate Workflow
ts_auto_arima()Boilerplate Workflow
ts_auto_arima_xgboost()Boilerplate Workflow
ts_auto_croston()Boilerplate Workflow
ts_auto_exp_smoothing()Boilerplate Workflow
ts_auto_glmnet()Boilerplate Workflow
ts_auto_lm()Boilerplate Workflow
ts_auto_mars()Boilerplate Workflow
ts_auto_nnetar()Boilerplate Workflow
ts_auto_prophet_boost()Boilerplate Workflow
ts_auto_prophet_reg()Boilerplate Workflow
ts_auto_smooth_es()Boilerplate Workflow
ts_auto_svm_poly()Boilerplate Workflow
ts_auto_svm_rbf()Boilerplate Workflow
ts_auto_theta()Boilerplate Workflow
ts_auto_xgboost()Boilerplate Workflow

This is just a start of what is in this package!

Copy Link

Version

Install

install.packages('healthyR.ts')

Monthly Downloads

264

Version

0.3.1

License

MIT + file LICENSE

Issues

Pull Requests

Stars

Forks

Maintainer

Steven Sanderson

Last Published

October 11th, 2024

Functions in healthyR.ts (0.3.1)

ci_hi

Confidence Interval Generic
required_pkgs.step_ts_acceleration

Requited Packages
step_ts_acceleration

Recipes Time Series Acceleration Generator
ts_auto_arima_xgboost

Boilerplate Workflow
auto_stationarize

Automatically Stationarize Time Series Data
ts_auto_croston

Boilerplate Workflow
calibrate_and_plot

Helper function - Calibrate and Plot
arima_string

Forecast arima.string
ts_auto_exp_smoothing

Boilerplate Workflow
assign_value

Misc for boilerplate
step_ts_velocity

Recipes Time Series velocity Generator
color_blind

Provide Colorblind Compliant Colors
ts_auto_svm_poly

Boilerplate Workflow
tidy_fft

Tidy Style FFT
internal_ts_both_event_tbl

Event Analysis
ts_auto_glmnet

Boilerplate Workflow
ts_info_tbl

Get Time Series Information
ts_auto_smooth_es

Boilerplate Workflow
internal_ts_forward_event_tbl

Event Analysis
get_recipe_call

Misc for boilerplate
ts_arima_simulator

Simulate ARIMA Model
tidyeval

Tidy eval helpers
ts_event_analysis_plot

Time Series Event Analysis Plot
ts_brownian_motion_augment

Brownian Motion
ts_auto_arima

Boilerplate Workflow
ts_extract_auto_fitted_workflow

Extract Boilerplate Items
ts_auto_svm_rbf

Boilerplate Workflow
internal_ts_backward_event_tbl

Event Analysis
ts_velocity_augment

Augment Function Velocity
ts_vva_plot

Time Series Value, Velocity and Acceleration Plot
ts_scedacity_scatter_plot

Time Series Model Scedacity Plot
ts_auto_prophet_reg

Boilerplate Workflow
ts_acceleration_augment

Augment Function Acceleration
ts_acceleration_vec

Vector Function Time Series Acceleration
ts_model_compare

Compare Two Time Series Models
ts_model_auto_tune

Time Series Model Tuner
ts_adf_test

Augmented Dickey-Fuller Test for Time Series Stationarity
ts_auto_nnetar

Boilerplate Workflow
ts_growth_rate_vec

Vector Function Time Series Growth Rate
ts_auto_recipe

Build a Time Series Recipe
model_extraction_helper

Model Method Extraction Helper
ts_qc_run_chart

Quality Control Run Chart
ts_sma_plot

Simple Moving Average Plot
ts_auto_prophet_boost

Boilerplate Workflow
ts_auto_theta

Boilerplate Workflow
ts_calendar_heatmap_plot

Time Series Calendar Heatmap
ts_growth_rate_augment

Augment Data with Time Series Growth Rates
%>%

Pipe operator
ts_velocity_vec

Vector Function Time Series Acceleration
ts_wfs_svm_rbf

Auto SVM RBF (Kernlab) Workflowset Function
ts_compare_data

Compare data over time periods
ts_brownian_motion_plot

Auto-Plot a Geometric/Brownian Motion Augment
ts_auto_lm

Boilerplate Workflow
ts_auto_mars

Boilerplate Workflow
ts_wfs_nnetar_reg

Auto NNETAR Workflowset Function
ts_wfs_prophet_reg

Auto PROPHET Regression Workflowset Function
ts_geometric_brownian_motion_augment

Geometric Brownian Motion
ts_wfs_xgboost

Auto XGBoost (XGBoost) Workflowset Function
ts_feature_cluster_plot

Time Series Feature Clustering
ts_feature_cluster

Time Series Feature Clustering
ts_auto_xgboost

Boilerplate Workflow
ts_is_date_class

Check if an object is a date class
ts_qq_plot

Time Series Model QQ Plot
ts_get_date_columns

Get date or datetime variables (column names)
ts_model_rank_tbl

Model Rank
util_difflog_ts

Differencing with Log Transformation to Make Time Series Stationary
ts_brownian_motion

Brownian Motion
ts_wfs_arima_boost

Auto Arima XGBoost Workflowset Function
ts_to_tbl

Coerce a time-series object to a tibble
ts_random_walk

Random Walk Function
ts_wfs_auto_arima

Auto Arima (Forecast auto_arima) Workflowset Function
util_doublediff_ts

Double Differencing to Make Time Series Stationary
ts_forecast_simulator

Time-series Forecasting Simulator
ts_random_walk_ggplot_layers

Get Random Walk ggplot2 layers
ts_wfs_ets_reg

Auto ETS Workflowset Function
ts_geometric_brownian_motion

Geometric Brownian Motion
ts_lag_correlation

Time Series Lag Correlation Analysis
ts_model_spec_tune_template

Time Series Model Spec Template
ts_wfs_svm_poly

Auto SVM Poly (Kernlab) Workflowset Function
ts_wfs_mars

Auto MARS (Earth) Workflowset Function
ts_wfs_lin_reg

Auto Linear Regression Workflowset Function
util_singlediff_ts

Single Differencing to Make Time Series Stationary
ts_ma_plot

Time Series Moving Average Plot
ts_scale_color_colorblind

Provide Colorblind Compliant Colors
ts_scale_fill_colorblind

Provide Colorblind Compliant Colors
ts_splits_plot

Time Series Splits Plot
ts_time_event_analysis_tbl

Event Analysis
util_doubledifflog_ts

Double Differencing with Log Transformation to Make Time Series Stationary
util_log_ts

Logarithmic Transformation to Make Time Series Stationary
chr_assign

Misc for boilerplate
ci_lo

Confidence Interval Generic