# Bootstrap is useful when:
# - Data is skewed (not normal)
# - You want CI for statistics other than the mean (e.g., median, SD)
# - You don't want to assume a specific distribution
data(iris, package = "datasets")
head(iris)
set.seed(123) # For reproducible results
# Example 1: CI for the median (resistant to outliers)
iris |>
ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "perc")
# Compare to mean CI - median is often more robust
# Example 2: CI for the median by group
iris |>
dplyr::group_by(Species) |>
ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "perc")
# Useful when groups have different distributions
# Example 3: CI for standard deviation
# How variable is petal length?
set.seed(456)
iris |>
ci_boot(Petal.Length, FUN = sd, R = 1000, bci.method = "perc")
# Example 4: CI for interquartile range (IQR)
# IQR = 75th percentile - 25th percentile
set.seed(789)
iris |>
ci_boot(Petal.Length, FUN = IQR, R = 1000, bci.method = "perc")
# Example 5: CI for correlation coefficient (Pearson's r)
# How related are petal length and width?
set.seed(101)
iris |>
dplyr::group_by(Species) |>
ci_boot(
Petal.Length, Petal.Width,
FUN = cor, method = "pearson",
R = 1000, bci.method = "perc"
)
# Look for CIs that don't include 0 (suggests real correlation)
# Example 6: Comparing BCa and percentile methods
set.seed(111)
# BCa method (often more accurate but requires more assumptions)
iris |> ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "bca")
# Percentile method (simpler, more robust)
iris |> ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "perc")
# Example 7: Effect of number of bootstrap replications
set.seed(222)
# Fewer replications (faster but less stable)
iris |> ci_boot(Petal.Length, FUN = median, R = 500, bci.method = "perc")
# More replications (slower but more stable)
iris |> ci_boot(Petal.Length, FUN = median, R = 5000, bci.method = "perc")
# For teaching: 1000 is usually enough; for research: 5000-10000
# Example 8: Handling missing values
set.seed(333)
iris |>
ci_boot(
Petal.Length,
FUN = median, na.rm = TRUE,
R = 1000, bci.method = "bca"
)
# Example 9: With mtcars dataset
set.seed(444)
data(mtcars, package = "datasets")
mtcars |>
dplyr::group_by(cyl) |>
ci_boot(mpg, FUN = median, R = 1000, bci.method = "perc")
# Compare median MPG for different cylinder counts
# Example 10: Spearman correlation (rank-based, robust to outliers)
set.seed(555)
iris |>
dplyr::group_by(Species) |>
ci_boot(
Petal.Length, Petal.Width,
FUN = cor, method = "spearman",
R = 1000, bci.method = "perc"
)
Run the code above in your browser using DataLab