# Load Packages
library(emmeans)
library(VizTest)
library(dplyr)
# Use built-in Esophageal Cancer Data
data(esoph)
# Aggregate data by age group
ag_data <- aggregate(esoph[,c("ncases", "ncontrols")], list(age = esoph$age), sum)
# Turn counts into integerss (not required, but makes printing nicer)
ag_data$ncases <- as.integer(ag_data$ncases)
ag_data$ncontrols <- as.integer(ag_data$ncontrols)
# Make age into unordered factor
ag_data$age <- factor(as.character(ag_data$age),
levels=levels(esoph$age))
# Estimate model of prevalence by age and overall (the summary model)
model1 <- glm(cbind(ncases, ncontrols) ~ age,
data = ag_data, family = binomial())
model_sum <- glm(cbind(ncases, ncontrols) ~ 1,
data = ag_data, family = binomial())
# Make data frame of results for plotting using emmeans
fit <- emmeans(model1, "age")
fit_ci <- confint(fit)
# add in original count data
ag_data <- cbind(fit, ag_data[,c("ncases", "ncontrols")])
# turn coefficients and confidence intervals into odds ratio scale
ag_data$or <- exp(ag_data$emmean)
ag_data$lower <- exp(ag_data$asymp.LCL)
ag_data$upper <- exp(ag_data$asymp.UCL)
# Make summary data frame that we can use for plotting
fit_sum <- data.frame(age= "Summary", emmean = coef(model_sum),
SE = unname(sqrt(vcov(model_sum))), or = exp(coef(model_sum)))
sum_ci <- confint(model_sum)
fit_sum$lower <- exp(sum_ci[1])
fit_sum$upper <- exp(sum_ci[2])
fit_sum$ncases <- sum(ag_data$ncases)
fit_sum$ncontrols <- sum(ag_data$ncontrols)
rownames(fit_sum) <- NULL
# Find the optimal visual testing intervals
viztest(fit, include_zero=FALSE, make_plot=FALSE, test_level = .05)
# Add inferential CIs to data (not for summary, though)
fit_ici <- confint(fit, level = .75)
ag_data$lower_ici <- exp(fit_ici$asymp.LCL)
ag_data$upper_ici <- exp(fit_ici$asymp.UCL)
# bind together the age-specific and summary data frames for plotting
ag_data <- dplyr::bind_rows(ag_data, fit_sum)
# identify summary row
ag_data$is_sum <- ag_data$age == "Summary"
# add point-size weight
ag_data$pt_size <- 1/ag_data$SE^2
# make age_label such that ages plot smallest at top and summary at bottom
ag_data$age_label <- factor(ag_data$age, levels=rev(ag_data$age))
# Make gg forest plot
out <- gg_forest(ag_data,
y = "age_label",
x = "or",
xmin_std = "lower",
xmax_std = "upper",
xmin_ici = "lower_ici",
xmax_ici = "upper_ici",
size_prop = "pt_size",
is_summary = "is_sum",
use_log_scale = TRUE,
data_cols = c("Age" = "age_label",
"Controls" = "ncontrols",
"Cases"="ncases",
"OR" = "or"),
max_size=5,
table_header_size = 16,
table_text_size = 5,
col_nudge=c(-.085, 0,0,0),
diamond_aspect=15,
diamond_row_frac = .9)
# print plot
plot(out, widths=1, 1)
Run the code above in your browser using DataLab