Learn R Programming

RVCompare (version 0.1.8)

get_Y_AB_bounds_bootstrap: Estimate Y_A and Y_B bounds with bootstrap

Description

Estimate the confidence intervals for the cumulative distributions of Y_A and Y_B using bootstrap. Much slower than the Dvoretzky–Kiefer–Wolfowitz approach.

Usage

get_Y_AB_bounds_bootstrap(
  X_A_observed,
  X_B_observed,
  alpha = 0.05,
  EPSILON = 1e-20,
  nOfBootstrapSamples = 1000,
  ignoreMinimumLengthCheck = FALSE
)

Value

Returns a list with the following fields:

- p: values in the interval [0,1] that represent the nOfEstimationPoints points in which the densities are estimated. Useful for plotting.

- Y_A_cumulative_estimation: an array with the estimated cumulative diustribution function of Y_A from 0 to p[[i]].

- Y_A_cumulative_upper: an array with the upper bounds of confidence 1 - alpha of the cumulative density of Y_A

- Y_A_cumulative_lower: an array with the lower bounds of confidence 1 - alpha of the cumulative density of Y_A

- Y_B_cumulative_estimation: The same as Y_A_cumulative_estimation for Y_B.

- Y_B_cumulative_upper: The same as Y_A_cumulative_upper for Y_B

- Y_B_cumulative_lower: The same as Y_A_cumulative_lower for Y_B

- diff_estimation: Y_A_cumulative_estimation - Y_B_cumulative_estimation

- diff_upper: an array with the upper bounds of confidence 1 - alpha of the difference between the cumulative distributions

- diff_lower: an array with the lower bounds of confidence 1 - alpha of the difference between the cumulative distributions

Arguments

X_A_observed

array of the observed samples (real values) of X_A.

X_B_observed

array of the observed samples (real values) of X_B, it needs to have the same length as X_A.

alpha

(optional, default value 0.05) the error of the confidence interval. If alpha = 0.05 then we have 95 percent confidence interval.

EPSILON

(optional, default value 1e-20) minimum difference between two values to be considered different.

nOfBootstrapSamples

(optional, default value 1e3) how many bootstrap samples to average. Increases computation time.

ignoreMinimumLengthCheck

(optional, default value FALSE) wether to check for a minimum length in X_A and X_B.

Examples

Run this code
library(ggplot2)

### Example 1 ###
X_A_observed <- rnorm(100, mean = 2, sd = 1)
X_B_observed <- rnorm(100, mean = 2.1, sd = 0.5)
# \donttest{
 res <- get_Y_AB_bounds_bootstrap(X_A_observed, X_B_observed)
# }
# \dontshow{
# easier on computation for testing.
res <- get_Y_AB_bounds_bootstrap(X_A_observed, X_B_observed, nOfBootstrapSamples=1e2)
# }
fig1 = plot_Y_AB(res, plotDifference=FALSE)+ ggplot2::ggtitle("Example 1")
print(fig1)


# \donttest{
### Example 2 ###
# Comparing the estimations with the actual distributions for two normal distributions.
###################################
## sample size = 100 ##############
###################################
X_A_observed <- rnorm(100,mean = 1, sd = 1)
X_B_observed <- rnorm(100,mean = 1.3, sd = 0.5)
res <- get_Y_AB_bounds_bootstrap(X_A_observed, X_B_observed)

X_A_observed_large_sample <- sort(rnorm(1e4, mean = 1, sd = 1))
X_B_observed_large_sample <- sort(rnorm(1e4, mean = 1.3, sd = 0.5))
actualDistributions <- getEmpiricalCumulativeDistributions(
        X_A_observed_large_sample,
        X_B_observed_large_sample,
        nOfEstimationPoints=1e4,
        EPSILON=1e-20)


actualDistributions$Y_A_cumulative_estimation <- lm(Y_A_cumulative_estimation ~
        p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
        data = actualDistributions)$fitted.values
actualDistributions$Y_B_cumulative_estimation <- lm(Y_B_cumulative_estimation ~
        p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
        data = actualDistributions)$fitted.values

fig = plot_Y_AB(res, plotDifference=FALSE) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_A_cumulative_estimation, colour = "Actual Y_A", linetype="Actual Y_A")) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_B_cumulative_estimation, colour = "Actual Y_B", linetype="Actual Y_B")) +

scale_colour_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="#00BFC4", "X_B"="#F8766D", "Actual Y_A"="#0000FF", "Actual Y_B"="#FF0000"))+

scale_linetype_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="solid", "X_B"="dashed", "Actual Y_A"="solid", "Actual Y_B"="solid"))+

ggtitle("100 samples used in the estimation")
print(fig)

###################################
## sample size = 300 ##############
###################################
X_A_observed <- rnorm(300,mean = 1, sd = 1)
X_B_observed <- rnorm(300,mean = 1.3, sd = 0.5)
res <- get_Y_AB_bounds_bootstrap(X_A_observed, X_B_observed)

X_A_observed_large_sample <- sort(rnorm(1e4, mean = 1, sd = 1))
X_B_observed_large_sample <- sort(rnorm(1e4, mean = 1.3, sd = 0.5))
actualDistributions <- getEmpiricalCumulativeDistributions(
        X_A_observed_large_sample,
        X_B_observed_large_sample,
        nOfEstimationPoints=1e4,
        EPSILON=1e-20)


actualDistributions$Y_A_cumulative_estimation <- lm(Y_A_cumulative_estimation ~
        p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
        data = actualDistributions)$fitted.values

actualDistributions$Y_B_cumulative_estimation <- lm(Y_B_cumulative_estimation ~
       p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
       data = actualDistributions)$fitted.values

fig = plot_Y_AB(res, plotDifference=FALSE) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_A_cumulative_estimation, colour = "Actual Y_A", linetype="Actual Y_A")) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_B_cumulative_estimation, colour = "Actual Y_B", linetype="Actual Y_B")) +

scale_colour_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="#00BFC4", "X_B"="#F8766D", "Actual Y_A"="#0000FF", "Actual Y_B"="#FF0000"))+

scale_linetype_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="solid", "X_B"="dashed", "Actual Y_A"="solid", "Actual Y_B"="solid"))+

ggtitle("300 samples used in the estimation")
print(fig)
# }

Run the code above in your browser using DataLab