# NOT RUN {
  
# find a null distribution
null_dist <- gss %>%
  # we're interested in the number of hours worked per week
  specify(response = hours) %>%
  # hypothesizing that the mean is 40
  hypothesize(null = "point", mu = 40) %>%
  # generating data points for a null distribution
  generate(reps = 1000, type = "bootstrap") %>%
  # calculating a distribution of t test statistics
  calculate(stat = "t")
  
# we can easily plot the null distribution by piping into visualize
null_dist %>%
  visualize()
# we can add layers to the plot as in ggplot, as well... 
# find the point estimate---mean number of hours worked per week
point_estimate <- gss %>%
  specify(response = hours) %>%
  hypothesize(null = "point", mu = 40) %>%
  calculate(stat = "t")
  
# find a confidence interval around the point estimate
ci <- null_dist %>%
  get_confidence_interval(point_estimate = point_estimate,
                          # at the 95% confidence level
                          level = .95,
                          # using the standard error method
                          type = "se")  
  
# display a shading of the area beyond the p-value on the plot
null_dist %>%
  visualize() +
  shade_p_value(obs_stat = point_estimate, direction = "two-sided")
null_dist %>%
  visualize() +
  shade_confidence_interval(ci)
  
# to plot a theoretical null distribution, skip the generate()
# step and supply `method = "theoretical"` to `visualize()`
null_dist_theoretical <- gss %>%
  specify(response = hours) %>%
  hypothesize(null = "point", mu = 40) %>%
  calculate(stat = "t") 
  
visualize(null_dist_theoretical, method = "theoretical")
# to plot both a theory-based and simulation-based null distribution,
# use the simulation-based null distribution and supply
# `method = "both"` to `visualize()`
visualize(null_dist, method = "both")
# More in-depth explanation of how to use the infer package
# }
# NOT RUN {
vignette("infer")
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab