#weekly covid prevalence in three California counties
#load as tsl
#subset first 10 time series
#sum by month
tsl <- tsl_initialize(
x = covid_prevalence,
name_column = "name",
time_column = "time"
) |>
tsl_subset(
names = 1:10
) |>
tsl_aggregate(
new_time = "months",
method = max
)
#compute dissimilarity
distantia_df <- distantia(
tsl = tsl,
lock_step = TRUE
)
#generate dissimilarity matrix
psi_matrix <- distantia_matrix(
df = distantia_df
)
#example with kmeans clustering
#------------------------------------
#kmeans with 3 groups
psi_kmeans <- stats::kmeans(
x = as.dist(psi_matrix[[1]]),
centers = 3
)
#case-wise silhouette width
utils_cluster_silhouette(
labels = psi_kmeans$cluster,
d = psi_matrix
)
#overall silhouette width
utils_cluster_silhouette(
labels = psi_kmeans$cluster,
d = psi_matrix,
mean = TRUE
)
#example with hierarchical clustering
#------------------------------------
#hierarchical clustering
psi_hclust <- stats::hclust(
d = as.dist(psi_matrix[[1]])
)
#generate labels for three groups
psi_hclust_labels <- stats::cutree(
tree = psi_hclust,
k = 3,
)
#case-wise silhouette width
utils_cluster_silhouette(
labels = psi_hclust_labels,
d = psi_matrix
)
#overall silhouette width
utils_cluster_silhouette(
labels = psi_hclust_labels,
d = psi_matrix,
mean = TRUE
)
Run the code above in your browser using DataLab