# NOT RUN { op <- par(mfrow = c(2, 2)) hist(islands) utils::str(hist(islands, col = "gray", labels = TRUE)) hist(sqrt(islands), breaks = 12, col = "lightblue", border = "pink") ##-- For non-equidistant breaks, counts should NOT be graphed unscaled: r <- hist(sqrt(islands), breaks = c(4*0:5, 10*3:5, 70, 100, 140), col = "blue1") text(r$mids, r$density, r$counts, adj = c(.5, -.5), col = "blue3") sapply(r[2:3], sum) sum(r$density * diff(r$breaks)) # == 1 lines(r, lty = 3, border = "purple") # -> lines.histogram(*) par(op) require(utils) # for str str(hist(islands, breaks = 12, plot = FALSE)) #-> 10 (~= 12) breaks str(hist(islands, breaks = c(12,20,36,80,200,1000,17000), plot = FALSE)) hist(islands, breaks = c(12,20,36,80,200,1000,17000), freq = TRUE, main = "WRONG histogram") # and warning # } # NOT RUN { <!-- % save 2 seconds --> ## Extreme outliers; the "FD" rule would take very large number of 'breaks': XXL <- c(1:9, c(-1,1)*1e300) hh <- hist(XXL, "FD") # did not work in R <= 3.4.1; now gives warning ## pretty() determines how many counts are used (platform dependently!): length(hh$breaks) ## typically 1 million -- though 1e6 was "a suggestion only" # } # NOT RUN { require(stats) set.seed(14) x <- rchisq(100, df = 4) # } # NOT RUN { ## Comparing data with a model distribution should be done with qqplot()! qqplot(x, qchisq(ppoints(x), df = 4)); abline(0, 1, col = 2, lty = 2) ## if you really insist on using hist() ... : hist(x, freq = FALSE, ylim = c(0, 0.2)) curve(dchisq(x, df = 4), col = 2, lty = 2, lwd = 2, add = TRUE) # }
Run the code above in your browser using DataCamp Workspace