######***###### UNIVARIATE DATA ######***######
### Example 1: univariate statistic (median)
# generate 100 standard normal observations
set.seed(1)
n <- 100
x <- rnorm(n)
# nonparametric bootstrap
npbs <- np.boot(x = x, statistic = median)
npbs
### Example 2: multivariate statistic (quartiles)
# generate 100 standard normal observations
set.seed(1)
n <- 100
x <- rnorm(n)
# nonparametric bootstrap
npbs <- np.boot(x = x, statistic = quantile,
probs = c(0.25, 0.5, 0.75))
npbs
if (FALSE) {
######***###### MULTIVARIATE DATA ######***######
### Example 1: univariate statistic (correlation)
## Generate bivariate data with population var = 1 and cor = 0.5
# correlation matrix square root (with rho = 0.5)
rho <- 0.5
val <- c(sqrt(1 + rho), sqrt(1 - rho))
corsqrt <- matrix(c(val[1], -val[2], val), 2, 2) / sqrt(2)
# generate 100 bivariate observations (with rho = 0.5)
n <- 100
set.seed(1)
data <- cbind(rnorm(n), rnorm(n)) %*% corsqrt
## Method A: x = data frame; statistic of x
# define statistic function
statfun <- function(data) cor(data[,1], data[,2])
# nonparametric bootstrap
set.seed(2)
npbs <- np.boot(x = data, statistic = statfun)
npbs
## Method B: x = 1:n; statistic of data[ix,] with data passed via ...
# define statistic function
statfun <- function(ix, data) cor(data[ix,1], data[ix,2])
# nonparametric bootstrap
set.seed(2)
npbs <- np.boot(x = 1:n, statistic = statfun, data = data)
npbs
### Example 2: multivariate statistic (variances and covariance)
## Generate bivariate data with population var = 1 and cor = 0.5
# correlation matrix square root (with rho = 0.5)
rho <- 0.5
val <- c(sqrt(1 + rho), sqrt(1 - rho))
corsqrt <- matrix(c(val[1], -val[2], val), 2, 2) / sqrt(2)
# generate 100 bivariate observations (with rho = 0.5)
n <- 100
set.seed(1)
data <- cbind(rnorm(n), rnorm(n)) %*% corsqrt
## Method A: x = data frame; statistic of x
# define statistic function
statfun <- function(data) {
cmat <- cov(data)
ltri <- lower.tri(cmat, diag = TRUE)
cvec <- cmat[ltri]
names(cvec) <- c("var(x1)", "cov(x1,x2)", "var(x2)")
cvec
}
# nonparametric bootstrap
set.seed(2)
npbs <- np.boot(x = data, statistic = statfun)
npbs
## Method B: x = 1:n; statistic of data[ix,] with data passed via ...
# define statistic function
statfun <- function(ix, data) {
cmat <- cov(data[ix,])
ltri <- lower.tri(cmat, diag = TRUE)
cvec <- cmat[ltri]
names(cvec) <- c("var(x1)", "cov(x1,x2)", "var(x2)")
cvec
}
# nonparametric bootstrap
set.seed(2)
npbs <- np.boot(x = 1:n, statistic = statfun, data = data)
npbs
######***###### REGRESSION ######***######
### Example 1: bootstrap cases
## Generate bivariate data with E(y|x) = 1 + 2 * x
# generate 100 observations
n <- 100
set.seed(1)
x <- seq(0, 1, length.out = n)
y <- 1 + 2 * x + rnorm(n)
data <- data.frame(x = x, y = y)
## Method A: x = data frame; statistic of x
# define statistic function
statfun <- function(data) {
lm(y ~ x, data = data)$coefficients
}
# nonparametric bootstrap
set.seed(2)
npbs <- np.boot(x = data, statistic = statfun)
npbs
## Method B: x = 1:n; statistic of data[ix,] with data passed via ...
# define statistic function
statfun <- function(ix, data) {
lm(y ~ x, data = data[ix,])$coefficients
}
# nonparametric bootstrap
set.seed(2)
npbs <- np.boot(x = 1:n, statistic = statfun, data = data)
npbs
### Example 2: bootstrap residuals
# generate 100 observations
n <- 100
set.seed(1)
x <- seq(0, 1, length.out = n)
y <- 1 + 2 * x + rnorm(n)
data <- data.frame(x = x, y = y)
# prepare data
mod0 <- lm(y ~ x, data = data)
df <- data.frame(x = x, y = y, fit = mod0$fitted.values, resid = mod0$residuals)
# define statistic function
statfun <- function(ix, data) {
data$y <- data$fit + data$resid[ix]
lm(y ~ x, data = data)$coefficients
}
# define jackknife function
jackfun <- function(ix, data){
lm(y ~ x, data = data[ix,])$coefficients
}
# nonparametric bootstrap
npbs <- np.boot(x = 1:n, statistic = statfun, data = df, jackknife = jackfun)
npbs
}
Run the code above in your browser using DataLab