# NOT RUN {
### Example 1 ###
# This example dataset has two columns: a blood test result (value)
# and an anesthetic treatment (anesthetic). There are five anesthetics in the
# dataset: Midazolam, Propofol, Ketamine, Thiamylal, and Diazepam.
set.seed(35)
df1 <- data.frame(
anesthetic = c(rep("Midazolam", 50), rep("Propofol", 20), rep("Ketamine", 40),
rep("Thiamylal", 80), rep("Diazepam", 20)),
value = c(sample(2:5, 50, replace = TRUE), sample(6:10, 20, replace = TRUE),
sample(1:7, 40, replace = TRUE), sample(3:10, 80, replace = TRUE),
sample(10:20, 20, replace = TRUE)))
head(df1)
variationAcrossGroups(df1, "anesthetic", "value", sigLevel = .01)
# The boxplot tells us that Diazepam, Propofol, and Thiamylal all have
# significantly different mean values from all other groups, including each other
# (p <= 0.01). Midazolam and Ketamine do not have significantly different mean
# values because they share the label "a", but they are significantly different
# from all the other treatments.
### Example 2 ###
# This example dataset has three columns: department, admission date, and
# blood pressure reading (BP). We will examine whether blood pressures
# vary by department and year of admission.
set.seed(2017)
n <- 200
bp <- data.frame(department = sample(c("Cardiology", "Oncology", "Gastroenterology"),
size = n,
replace = TRUE,
prob = c(0.5, 0.3, 0.2)),
admit_date = sample(seq(as.Date("2015-01-01"),
as.Date("2017-12-31"),
by = "day"),
size = n))
bp$BP <- floor(rnorm(n,
120 *
ifelse(bp$admit_date > "2015-12-31", 1.5, 1) +
ifelse(bp$department == "Cardiology", 80, 0),
ifelse(bp$department == "Oncology", 60, 30)))
head(bp)
variationAcrossGroups(bp,
categoricalCols = "department",
measureColumn = "BP",
dateCol = "admit_date",
levelOfDateGroup = "yearly",
plotGroupDifferences = TRUE)
# Since plotGroupDifferences = TRUE and the default of returnGroupStats is
# FALSE, the function prints the boxplot and the 95% family-wise confidence
# interval plot, and returns the summary statistics data frame. The two plots
# show:
#
# 1. The boxplot of BP across all combinations of the two categories.
# department has 3 levels, as does date grouped by year, so there are a total
# of 3 x 3 = 9 groupings, which are shown on the x axis of the boxplot. Groups
# that have a shared letter are *not* significantly different. For example,
# (Gastroenterology | 2015) and (Oncology | 2015) share a "b" label, so
# patients in those groups do not have significantly different mean BP. On the
# other hand, (Cardiology | 2015) and (Gastroenterology | 2015) do not share a
# label, so patients in those groups do have significantly different BP. Likewise,
# Oncology patients in 2015 have different BPs from Oncology patients in either of the subsequent
# years, but Oncology patients in 2016 and 2017 do not have significantly
# different BP (as shown by their shared "c" label).
#
# 2. Confidence-interval level plot This plot present the results
# of the Tukey's Honest Significant Differences test. It compares all possible
# pairs of groups and adjusts p-values for multiple comparisons. Red lines
# indicate a significant difference between the two groups at the chosen
# significance level (0.05 by default). Groups are ordered by p-values. The
# group with the greater mean value is always listed first (e.g. Cardiology |
# 2016 has greater BP than Oncology 2015).
# }
Run the code above in your browser using DataLab