qqtest: `qqtest` A self-calibrated quantile quantile plot for testing distributional shape.

Description

Draws a quantile quantile plot for visually assessing whether the data come from a test distribution that has been defined in one of many ways. The vertical axis plots the data quantiles, the horizontal those of a test distribution. Interval estimates and exemplars provide different comparative information to assess the evidence provided by the qqplot against the hypothesis that the data come from the test distribution (default is normal or gaussian). Interval estimates provide test information related to individual quantiles, exemplars provide test information related to the shape of the quantile quantile curve. Optionally, a visual test of significance (a lineup plot) can be displayed to provide a coarse level of significance for testing the null hypothesis that the data come from the test distribution. The default behaviour generates 1000 samples from the test distribution and overlays the plot with pointwise interval estimates for the ordered quantiles from the test distribution. A small number of independently generated exemplar test distribution sample quantile traces are also overlaid. Various option choices are available to effect different visualizations of the uncertainty surrounding the quantile quantile plot (see argument descriptions and examples).

Usage

qqtest(data, dist = "gaussian", df = 1, qfunction = NULL,
  rfunction = NULL, dataTest = NULL, xAxisAsProbs = TRUE,
  yAxisAsProbs = TRUE, xAxisProbs = c(0.05, 0.25, 0.5, 0.75, 0.95),
  yAxisProbs = c(0.05, 0.25, 0.5, 0.75, 0.95), nreps = 1000,
  centralPercents = c(0.9, 0.95, 0.99), envelope = TRUE,
  drawPercentiles = TRUE, drawQuartiles = TRUE, legend = TRUE,
  nexemplars = 10, plainTrails = FALSE, alphaTrails = 0.5,
  lwdTrails = 1, lineup = FALSE, nsuspects = 20, col = NULL, h = 260,
  c = 90, l = 60, alpha = 1, cex = 1, pch = 19, xlab = NULL,
  ylab = NULL, xlim = NULL, ylim = NULL, ...)

Arguments

Value

Displays the qqplot. If lineup is TRUE, it returns a list with the location (TrueLoc) of the plot that corresponds to data encoded as a string whose contents need to be evaluated. This provides some simple obfuscation of the true location so that the visual assessment can be honest.

Examples

Run this code

# default qqtest plot
qqtest(precip)
#
# default qqtest plot
qqtest(precip, main = "Precipitation (inches/year) in 70 US cities")
#
# compare qqtest default to qqnorm
op <- par(mfrow=c(1,2))
qqnorm(precip) ; qqtest(precip)
par(op)
#
#  gaussian - no quartiles, no exemplars
qqtest(precip, nexemplars=0, drawQuartiles=FALSE,
       main = "Precipitation (inches/year) in 70 US cities")
#
#  gaussian - no quartiles, no percentiles,
#             no envelope  just coloured exemplars
qqtest(precip, nexemplars=20, envelope=FALSE,
       drawPercentiles= FALSE, drawQuartiles=FALSE,
       main = "Precipitation (inches/year) in 70 US cities")
#
#  gaussian - no quartiles, no percentiles, no envelope,
#             plain grey trails, wide trails show density
qqtest(precip, nexemplars=20,
       lwdTrails=10, plainTrails=TRUE, alphaTrail=0.3,
       envelope=FALSE, drawPercentiles= FALSE, drawQuartiles=FALSE,
       main = "Precipitation (inches/year) in 70 US cities")
#
#  gaussian - no quartiles, no percentiles, no envelope,
#             colour trails, wide trails show density
qqtest(precip, nexemplars=20, lwdTrails=10, col="black",
       plainTrails=FALSE, alphaTrail=0.3,
       envelope=FALSE, drawPercentiles= FALSE, drawQuartiles=FALSE,
       main = "Precipitation (inches/year) in 70 US cities")
#
#  gaussian - common qqplot like qqnorm
qqtest(precip, xAxisAsProbs=FALSE,  yAxisAsProbs=FALSE,
       nexemplars=0,  envelope=FALSE,
       drawPercentiles= FALSE, drawQuartiles=FALSE,
       col="black", main= "Normal Q-Q Plot",
       xlab="Theoretical Quantiles",
       ylab="Precipitation (inches/year) in 70 US cities",
       pch=21)
#
#  gaussian - traditional qqplot, but now showing in the line up
result <- qqtest(precip, nexemplars=0, nreps=0,
                 envelope=FALSE,
                 drawPercentiles= FALSE, drawQuartiles=FALSE,
                 lineup=TRUE,
                 cex=0.75, col="grey20",
                 xlab="", ylab="",
                 pch=21)
# the location of the real data in the line up can be found by evaluating
# the contents of the string
 result$TrueLoc
#
# lognormal
qqtest(precip, dist = "lognormal",
       main = "Precipitation (inches/year) in 70 US cities")
#
#
# Half normal ... using the penicillin data from Daniel(1959)
data(penicillin)

qqtest(penicillin, dist = "half-normal")

# Or the same again but with significant contrast labelled


with (penicillin,
	{qqtest(value, yAxisProbs=c(0.1, 0.75, 0.90, 0.95),
         dist="half-normal",
			ylab="Sample cumulative probability",
         xlab="Half-normal cumulative probability")
	 ppAdj <- (1+ppoints(31))/2  # to get half-normals from normal
	 x <- qnorm(ppAdj)
	 valOrder <- order(value)    # need data and rownames in increasing order
	 y <- value[valOrder]
	 tags <- rownames(penicillin)[valOrder]
	 selPoints <- 28:31          # going to label only the largest effects
	 text(x[selPoints], y[selPoints],
       tags[selPoints],
       pos=2, cex=0.75)
	}
)
#
# student on 3 df
qqtest(precip, dist = "student", df = 3,
       main = "Precipitation (inches/year) in 70 US cities")
#
# chi-squared on 3 df
qqtest(precip, dist = "chi-squared", df = 3,
       main = "Precipitation (inches/year) in 70 US cities")
#
# user supplied qfunction and rfunction -- compare to beta distribution
qqtest(precip,
       qfunction=function(p){qbeta(p, 2, 2)},
       rfunction=function(n){rbeta(n, 2, 2)},
       main = "Precipitation (inches/year) in 70 US cities")
#
#
# user supplied qfunction only -- compare to beta distribution
qqtest(precip,
       qfunction=function(p){qbeta(p, 2, 2)},
       main = "Precipitation (inches/year) in 70 US cities")
#
# comparing data samples
#
# Does the sample of beaver2's temperatures look like they
# could have come from a distribution shaped like beaver1's?
#
 	qqtest(beaver2[,"temp"],
		       dataTest=beaver1[,"temp"],
		       ylab="Beaver 2", xlab="Beaver 1",
		       main="Beaver body temperatures")
#
#
# For the famous iris data, does the sample of iris versicolor
# appear to have the same (marginal) distributional shape
# as does that of iris virginica (to which it is more closely related)?
#
op <- par(mfrow=c(2,2))
with(iris, {
	qqtest(Sepal.Length[Species=="versicolor"],
		   dataTest= Sepal.Length[Species=="virginica"],
		   ylab="versicolor", xlab="virginica",
		   main="Sepal length")
	qqtest(Sepal.Width[Species=="versicolor"],
		   dataTest= Sepal.Width[Species=="virginica"],
		   ylab="versicolor", xlab="virginica",
		   main="Sepal width")
	qqtest(Petal.Length[Species=="versicolor"],
		   dataTest=Petal.Length[Species=="virginica"],
		   ylab="versicolor", xlab="virginica",
		   main="Petal length")
	qqtest(Petal.Width[Species=="versicolor"],
		   dataTest= Petal.Width[Species=="virginica"],
		   ylab="versicolor", xlab="virginica",
		   main="Petal width")
	}
	)
par(op)

Run the code above in your browser using DataLab