iris_pca <- ordinate(iris, prcomp, cols = seq(4), scale. = TRUE)
# NB: Non-standard aesthetics are handled as in version > 3.5.1; see:
# https://github.com/tidyverse/ggplot2/issues/6191
# This prevents `scale_color_discrete(aesthetics = ...)` from synching them.
ggbiplot(iris_pca) +
stat_rows_bagplot(
aes(fill = Species),
median_gp = list(color = sync()),
fence_gp = list(linewidth = 0.25),
outlier_gp = list(shape = "asterisk")
) +
scale_color_discrete(name = "Species", aesthetics = c("color", "fill")) +
geom_cols_vector(aes(label = name))
ggbiplot(iris_pca) +
stat_rows_bagplot(
aes(fill = Species, color = Species),
median_gp = list(color = sync()),
fence_gp = list(linewidth = 0.25),
outlier_gp = list(shape = "asterisk")
) +
geom_cols_vector(aes(label = name))
# scaled PCA of Anderson iris measurements
iris[, -5] %>%
princomp(cor = TRUE) %>%
as_tbl_ord() %>%
mutate_rows(species = iris$Species) %>%
print() -> iris_pca
# row-principal biplot with depth median-based stars
iris_pca %>%
ggbiplot(aes(color = species)) +
theme_bw() +
scale_color_brewer(type = "qual", palette = 2) +
stat_rows_star(alpha = .5, fun.ord = "depth_median") +
geom_rows_point(alpha = .5) +
stat_rows_center(fun.ord = "depth_median", size = 4, shape = 1L) +
ggtitle(
"Row-principal PCA biplot of Anderson iris measurements",
"Segments connect each observation to its within-species depth median"
)
# correspondence analysis of combined female and male hair and eye color data
HairEyeColor %>%
rowSums(dims = 2L) %>%
MASS::corresp(nf = 2L) %>%
as_tbl_ord() %>%
augment_ord() %>%
print() -> hec_ca
# inertia across artificial coordinates (all singular values < 1)
get_inertia(hec_ca)
# in row-principal biplot, row coordinates are weighted averages of columns
# (and vice-versa)
hec_ca %>%
confer_inertia("rows") %>%
ggbiplot(aes(color = .matrix, fill = .matrix, shape = .matrix)) +
theme_bw() +
stat_cols_chull(alpha = .1) +
geom_cols_point() +
geom_rows_point() +
ggtitle("Row-principal CA of hair & eye color")
# centered principal components analysis of U.S. personal expenditure data
USPersonalExpenditure %>%
prcomp() %>%
as_tbl_ord() %>%
augment_ord() %>%
# allow radiating text to exceed plotting window
ggbiplot(aes(label = name), clip = "off",
sec.axes = "cols", scale.factor = 50) +
geom_rows_label(size = 3) +
# omit labels in the conical hull without the origin
geom_cols_vector(vector_labels = FALSE) +
stat_cols_cone(linetype = "dotted") +
geom_cols_vector(stat = "cone", vector_labels = TRUE, color = "transparent") +
ggtitle(
"U.S. Personal Expenditure data, 1940-1960",
"Row-principal biplot of centered PCA"
)
# compute row-principal components of scaled iris measurements
iris[, -5] %>%
prcomp(scale = TRUE) %>%
as_tbl_ord() %>%
mutate_rows(species = iris$Species) %>%
print() -> iris_pca
# row-principal biplot with centroids and confidence elliptical disks
iris_pca %>%
ggbiplot(aes(color = species)) +
theme_bw() +
geom_rows_point() +
geom_polygon(
aes(fill = species),
color = NA, alpha = .25, stat = "rows_ellipse"
) +
geom_cols_vector(color = "#444444") +
scale_color_brewer(
type = "qual", palette = 2,
aesthetics = c("color", "fill")
) +
ggtitle(
"Row-principal PCA biplot of Anderson iris measurements",
"Overlaid with 95% confidence disks"
)
# hull peeling with breaks below
judge_pca <- ordinate(USJudgeRatings, princomp, cols = -c(1, 12))
ggbiplot(judge_pca, axis.type = "predictive") +
geom_cols_axis() +
geom_rows_point(elements = "score") +
stat_rows_peel(
aes(alpha = after_stat(hull)), color = "black", elements = "score",
breaks = c(.9, .5, .1), cut = "below"
)
# hull peeling by groups
iris_pca <- ordinate(iris, cols = 1:4, model = prcomp)
ggbiplot(iris_pca) +
geom_rows_point(aes(color = Species), shape = "circle open") +
stat_rows_peel(
aes(fill = Species, alpha = after_stat(hull)),
num = 3
)
# unscaled PCA
iris_pca <- ordinate(iris, cols = 1:4, model = prcomp)
# biplot canvas
iris_biplot <-
iris_pca %>%
ggbiplot(aes(color = Species, label = name), axis.type = "predictive") +
geom_rows_point() +
geom_cols_axis(aes(center = center))
# print select cases
top_cases <- c(1, 51, 101)
iris[top_cases, ]
# subset variables
length_vars <- c(1, 3)
iris[, length_vars] %>%
aggregate(by = iris[, "Species", drop = FALSE], FUN = mean)
# project all cases onto all axes
iris_biplot + stat_rows_projection()
# project all cases onto select axes
iris_biplot + stat_rows_projection(ref_subset = length_vars)
# project select cases onto all axes
iris_biplot + stat_rows_projection(subset = top_cases)
# project select cases onto select axes
iris_biplot + stat_rows_projection(subset = top_cases, ref_subset = length_vars)
# project select cases onto manually provided axes
iris_cols <- as.data.frame(get_cols(iris_pca))[c(1, 2), ]
iris_biplot + stat_rows_projection(subset = top_cases, referent = iris_cols)
# project selected cases onto selected axes in full-dimensional space
iris_pca %>%
ggbiplot(ord_aes(iris_pca, color = Species, label = name),
axis.type = "predictive") +
geom_rows_point() +
geom_cols_axis(aes(center = center)) +
stat_rows_projection(subset = top_cases, ref_subset = length_vars)
# default (standardized) linear discriminant analysis
glass_lda <- MASS::lda(Site ~ SiO2 + Al2O3 + FeO + MgO + CaO, glass)
# bestow 'tbl_ord' class & augment observation, centroid, and variable fields
as_tbl_ord(glass_lda) %>%
augment_ord() %>%
print() -> glass_lda
# row-standard biplot
glass_lda %>%
confer_inertia(1) %>%
ggbiplot(aes(shape = grouping)) +
theme_bw() + theme_biplot() +
geom_rows_point(size = 4) +
geom_rows_point(elements = "score") +
stat_cols_rule(
aes(label = name), color = "#888888", num = 8L,
ref_elements = "score", fun.offset = function(x) minabspp(x, p = .1),
text.size = 2.5, label_dodge = .04
) +
scale_shape_manual(values = c(2L, 3L, 0L, 5L)) +
ggtitle(
"LDA of Freestone glass measurements",
"Row-standard biplot of standardized LDA"
)
# contribution LDA of sites on measurements
glass_lda <-
lda_ord(Site ~ SiO2 + Al2O3 + FeO + MgO + CaO, glass,
axes.scale = "contribution")
# bestow 'tbl_ord' class & augment observation, centroid, and variable fields
as_tbl_ord(glass_lda) %>%
augment_ord() %>%
print() -> glass_lda
# symmetric biplot
glass_lda %>%
confer_inertia(.5) %>%
ggbiplot(aes(shape = grouping)) +
theme_bw() + theme_biplot() +
geom_rows_point() +
stat_rows_density_2d(elements = "score", alpha = .5, color = "#444444") +
stat_cols_rule(
aes(label = name), geom = "axis", color = "#888888", num = 8L,
ref_elements = "active", fun.offset = function(x) minabspp(x, p = .1),
label_dodge = 0.04, text.size = 2.5, text_dodge = .025
) +
scale_shape_manual(values = c(16L, 17L, 15L, 18L)) +
ggtitle(
"LDA of Freestone glass measurements",
"Symmetric biplot of contribution LDA"
)
if (FALSE) {
# classical multidimensional scaling of road distances between European cities
euro_mds <- ordinate(eurodist, cmdscale_ord, k = 11)
# monoplot of city locations
euro_plot <- euro_mds %>%
negate_ord("PCo2") %>%
ggbiplot() +
geom_cols_text(aes(label = name), size = 3)
print(euro_plot)
# biplot with minimal spanning tree based on plotting window distances
euro_plot +
stat_cols_spantree(
engine = "mlpack",
alpha = .5, linetype = "dotted"
)
# biplot with minimal spanning tree based on full-dimensional distances
euro_plot +
stat_cols_spantree(
ord_aes(euro_mds), engine = "mlpack",
alpha = .5, linetype = "dotted"
)
}
Run the code above in your browser using DataLab