# NOT RUN {
  # The last part of the EXAMPLES section in the help file for 
  # cdfCompareCensored compares the empirical distribution of copper and zinc 
  # between two sites:  Alluvial Fan and Basin-Trough (Millard and Deverel, 1988).  
  # The data for this example are stored in Millard.Deverel.88.df.  Perform a 
  # test to determine if there is a significant difference between these two 
  # sites (perform a separate test for the copper and the zinc).
  Millard.Deverel.88.df
  #    Cu.orig Cu Cu.censored Zn.orig  Zn Zn.censored         Zone Location
  #1       < 1  1        TRUE     <10  10        TRUE Alluvial.Fan        1
  #2       < 1  1        TRUE       9   9       FALSE Alluvial.Fan        2
  #3         3  3       FALSE      NA  NA       FALSE Alluvial.Fan        3
  #.
  #.
  #.
  #116       5  5       FALSE      50  50       FALSE Basin.Trough       48
  #117      14 14       FALSE      90  90       FALSE Basin.Trough       49
  #118       4  4       FALSE      20  20       FALSE Basin.Trough       50
  #------------------------------
  # First look at the copper data 
  #------------------------------
  Cu.AF <- with(Millard.Deverel.88.df, 
    Cu[Zone == "Alluvial.Fan"])
  Cu.AF.cen <- with(Millard.Deverel.88.df, 
    Cu.censored[Zone == "Alluvial.Fan"]) 
  Cu.BT <- with(Millard.Deverel.88.df, 
    Cu[Zone == "Basin.Trough"])
  Cu.BT.cen <- with(Millard.Deverel.88.df, 
    Cu.censored[Zone == "Basin.Trough"])
  # Note the large number of tied observations in the copper data
  #-------------------------------------------------------------- 
  table(Cu.AF[!Cu.AF.cen]) 
  # 1  2  3  4  5  7  8  9 10 11 12 16 20 
  # 5 21  6  3  3  3  1  1  1  1  1  1  1
  table(Cu.BT[!Cu.BT.cen]) 
  # 1  2  3  4  5  6  8  9 12 14 15 17 23 
  # 7  4  8  5  1  2  1  2  1  1  1  1  1
  # Logrank test with hypergeometric variance:
  #-------------------------------------------
  twoSampleLinearRankTestCensored(x = Cu.AF, x.censored = Cu.AF.cen, 
    y = Cu.BT, y.censored = Cu.BT.cen)
  #Results of Hypothesis Test
  #Based on Censored Data
  #--------------------------
  #
  #Null Hypothesis:                 Fy(t) = Fx(t)
  #
  #Alternative Hypothesis:          Fy(t) != Fx(t) for at least one t
  #
  #Test Name:                       Two-Sample Linear Rank Test:
  #                                 Logrank Test
  #                                 with Hypergeometric Variance
  #
  #Censoring Side:                  left
  #
  #Censoring Level(s):              x =  1  5 10 20 
  #                                 y =  1  2  5 10 15 
  #
  #Data:                            x = Cu.AF
  #                                 y = Cu.BT
  #
  #Censoring Variable:              x = Cu.AF.cen
  #                                 y = Cu.BT.cen
  #
  #Number NA/NaN/Inf's Removed:     x = 3
  #                                 y = 1
  #
  #Sample Sizes:                    nx = 65
  #                                 ny = 49
  #
  #Percent Censored:                x = 26.2%
  #                                 y = 28.6%
  #
  #Test Statistics:                 nu     = -1.8791355
  #                                 var.nu = 13.6533490
  #                                 z      = -0.5085557
  #
  #P-value:                         0.6110637
  # Compare the p-values produced by the Normal Scores 2 test
  # using the hypergeomtric vs. permutation variance estimates.
  # Note how much larger the estimated variance is based on
  # the permuation variance estimate:
  #-----------------------------------------------------------
  twoSampleLinearRankTestCensored(x = Cu.AF, x.censored = Cu.AF.cen, 
    y = Cu.BT, y.censored = Cu.BT.cen, 
    test = "normal.scores.2")$p.value
  #[1] 0.2008913
  twoSampleLinearRankTestCensored(x = Cu.AF, x.censored = Cu.AF.cen, 
    y = Cu.BT, y.censored = Cu.BT.cen, 
    test = "normal.scores.2", variance = "permutation")$p.value
  #[1] [1] 0.657001
  #--------------------------
  # Now look at the zinc data
  #--------------------------
  Zn.AF <- with(Millard.Deverel.88.df, 
    Zn[Zone == "Alluvial.Fan"])
  Zn.AF.cen <- with(Millard.Deverel.88.df, 
    Zn.censored[Zone == "Alluvial.Fan"])
  Zn.BT <- with(Millard.Deverel.88.df, 
    Zn[Zone == "Basin.Trough"])
  Zn.BT.cen <- with(Millard.Deverel.88.df, 
    Zn.censored[Zone == "Basin.Trough"])
  # Note the moderate number of tied observations in the zinc data, 
  # and the "outlier" of 620 in the Alluvial Fan data.
  #--------------------------------------------------------------- 
  table(Zn.AF[!Zn.AF.cen]) 
  #  5   7   8   9  10  11  12  17  18  19  20  23  29  30  33  40  50 620 
  #  1   1   1   1  20   2   1   1   1   1  14   1   1   1   1   1   1   1 
  table(Zn.BT[!Zn.BT.cen]) 
  # 3  4  5  6  8 10 11 12 13 14 15 17 20 25 30 40 50 60 70 90 
  # 2  2  2  1  1  5  1  2  1  1  1  2 11  1  4  3  2  2  1  1
  # Logrank test with hypergeometric variance:
  #-------------------------------------------
  twoSampleLinearRankTestCensored(x = Zn.AF, x.censored = Zn.AF.cen, 
    y = Zn.BT, y.censored = Zn.BT.cen)
  #Results of Hypothesis Test
  #Based on Censored Data
  #--------------------------
  #
  #Null Hypothesis:                 Fy(t) = Fx(t)
  #
  #Alternative Hypothesis:          Fy(t) != Fx(t) for at least one t
  #
  #Test Name:                       Two-Sample Linear Rank Test:
  #                                 Logrank Test
  #                                 with Hypergeometric Variance
  #
  #Censoring Side:                  left
  #
  #Censoring Level(s):              x =  3 10 
  #                                 y =  3 10 
  #
  #Data:                            x = Zn.AF
  #                                 y = Zn.BT
  #
  #Censoring Variable:              x = Zn.AF.cen
  #                                 y = Zn.BT.cen
  #
  #Number NA/NaN/Inf's Removed:     x = 1
  #                                 y = 0
  #
  #Sample Sizes:                    nx = 67
  #                                 ny = 50
  #
  #Percent Censored:                x = 23.9%
  #                                 y =  8.0%
  #
  #Test Statistics:                 nu     = -6.992999
  #                                 var.nu = 17.203227
  #                                 z      = -1.686004
  #
  #P-value:                         0.09179512
  #----------
  # Compare the p-values produced by the Logrank, Gehan, Peto-Peto, 
  # and Tarone-Ware tests using the hypergeometric variance.
  #-----------------------------------------------------------
  twoSampleLinearRankTestCensored(x = Zn.AF, x.censored = Zn.AF.cen, 
    y = Zn.BT, y.censored = Zn.BT.cen, 
    test = "logrank")$p.value
  #[1] 0.09179512
  twoSampleLinearRankTestCensored(x = Zn.AF, x.censored = Zn.AF.cen, 
    y = Zn.BT, y.censored = Zn.BT.cen, 
    test = "gehan")$p.value
  #[1] 0.0185445
  twoSampleLinearRankTestCensored(x = Zn.AF, x.censored = Zn.AF.cen, 
    y = Zn.BT, y.censored = Zn.BT.cen, 
    test = "peto-peto")$p.value
  #[1] 0.009704529
  twoSampleLinearRankTestCensored(x = Zn.AF, x.censored = Zn.AF.cen, 
    y = Zn.BT, y.censored = Zn.BT.cen, 
    test = "tarone-ware")$p.value
  #[1] 0.03457803
  #----------
  # Clean up
  #---------
  rm(Cu.AF, Cu.AF.cen, Cu.BT, Cu.BT.cen, 
     Zn.AF, Zn.AF.cen, Zn.BT, Zn.BT.cen)
  #==========
  # Example 16.5 on pages 16-22 to 16.23 of USEPA (2009) shows how to perform 
  # the Tarone-Ware two sample linear rank test based on censored data using 
  # observations on tetrachloroethylene (PCE) (ppb) collected at one background 
  # and one compliance well.  The data for this example are stored in 
  # EPA.09.Ex.16.5.PCE.df.
  EPA.09.Ex.16.5.PCE.df
  #    Well.type PCE.Orig.ppb PCE.ppb Censored
  #1  Background           <4     4.0     TRUE
  #2  Background          1.5     1.5    FALSE
  #3  Background           <2     2.0     TRUE
  #4  Background          8.7     8.7    FALSE
  #5  Background          5.1     5.1    FALSE
  #6  Background           <5     5.0     TRUE
  #7  Compliance          6.4     6.4    FALSE
  #8  Compliance         10.9    10.9    FALSE
  #9  Compliance            7     7.0    FALSE
  #10 Compliance         14.3    14.3    FALSE
  #11 Compliance          1.9     1.9    FALSE
  #12 Compliance           10    10.0    FALSE
  #13 Compliance          6.8     6.8    FALSE
  #14 Compliance           <5     5.0     TRUE
  with(EPA.09.Ex.16.5.PCE.df, 
    twoSampleLinearRankTestCensored(
      x = PCE.ppb[Well.type == "Compliance"], 
      x.censored = Censored[Well.type == "Compliance"], 
      y = PCE.ppb[Well.type == "Background"], 
      y.censored = Censored[Well.type == "Background"], 
      test = "tarone-ware", alternative = "greater"))
  #Results of Hypothesis Test
  #Based on Censored Data
  #--------------------------
  #
  #Null Hypothesis:                 Fy(t) = Fx(t)
  #
  #Alternative Hypothesis:          Fy(t) > Fx(t) for at least one t
  #
  #Test Name:                       Two-Sample Linear Rank Test:
  #                                 Tarone-Ware Test
  #                                 with Hypergeometric Variance
  #
  #Censoring Side:                  left
  #
  #Censoring Level(s):              x = 5 
  #                                 y = 2 4 5 
  #
  #Data:                            x = PCE.ppb[Well.type == "Compliance"]
  #                                 y = PCE.ppb[Well.type == "Background"]
  #
  #Censoring Variable:              x = Censored[Well.type == "Compliance"]
  #                                 y = Censored[Well.type == "Background"]
  #
  #Sample Sizes:                    nx = 8
  #                                 ny = 6
  #
  #Percent Censored:                x = 12.5%
  #                                 y = 50.0%
  #
  #Test Statistics:                 nu     =  8.458912
  #                                 var.nu = 20.912407
  #                                 z      =  1.849748
  #
  #P-value:                         0.03217495
  # Compare the p-value for the Tarone-Ware test with p-values from 
  # the logrank, Gehan, and Peto-Peto tests
  #-----------------------------------------------------------------
  with(EPA.09.Ex.16.5.PCE.df, 
    twoSampleLinearRankTestCensored(
      x = PCE.ppb[Well.type == "Compliance"], 
      x.censored = Censored[Well.type == "Compliance"], 
      y = PCE.ppb[Well.type == "Background"], 
      y.censored = Censored[Well.type == "Background"], 
      test = "tarone-ware", alternative = "greater"))$p.value
  #[1] 0.03217495
  with(EPA.09.Ex.16.5.PCE.df, 
    twoSampleLinearRankTestCensored(
      x = PCE.ppb[Well.type == "Compliance"], 
      x.censored = Censored[Well.type == "Compliance"], 
      y = PCE.ppb[Well.type == "Background"], 
      y.censored = Censored[Well.type == "Background"], 
      test = "logrank", alternative = "greater"))$p.value
  #[1] 0.02752793
  with(EPA.09.Ex.16.5.PCE.df, 
    twoSampleLinearRankTestCensored(
      x = PCE.ppb[Well.type == "Compliance"], 
      x.censored = Censored[Well.type == "Compliance"], 
      y = PCE.ppb[Well.type == "Background"], 
      y.censored = Censored[Well.type == "Background"], 
      test = "gehan", alternative = "greater"))$p.value
  #[1] 0.03656224
  with(EPA.09.Ex.16.5.PCE.df, 
    twoSampleLinearRankTestCensored(
      x = PCE.ppb[Well.type == "Compliance"], 
      x.censored = Censored[Well.type == "Compliance"], 
      y = PCE.ppb[Well.type == "Background"], 
      y.censored = Censored[Well.type == "Background"], 
      test = "peto-peto", alternative = "greater"))$p.value
  #[1] 0.03127296
  #==========
  # The results shown in Table 9 of Millard and Deverel (1988, p.2097) are correct 
  # only for the hypergeometric variance and the modified MWW tests; the other 
  # results were computed as if there were no ties.  Re-compute the correct 
  # z-statistics and p-values for the copper and zinc data. 
  test <- c(rep(c("gehan", "logrank", "peto-peto"), 2), "peto-peto", 
    "normal.scores.1", "normal.scores.2", "normal.scores.2")
  variance <- c(rep("permutation", 3), rep("hypergeometric", 3), 
    "asymptotic", rep("permutation", 2), "hypergeometric")
  stats.mat <- matrix(as.numeric(NA), ncol = 4, nrow = 10)
  for(i in 1:10) {
    dum.list <- with(Millard.Deverel.88.df, 
        twoSampleLinearRankTestCensored(
          x = Cu[Zone == "Basin.Trough"], 
          x.censored = Cu.censored[Zone == "Basin.Trough"],
          y = Cu[Zone == "Alluvial.Fan"], 
          y.censored = Cu.censored[Zone == "Alluvial.Fan"], 
          test = test[i], variance = variance[i]))
    stats.mat[i, 1:2] <- c(dum.list$statistic["z"], dum.list$p.value)
    dum.list <- with(Millard.Deverel.88.df, 
        twoSampleLinearRankTestCensored(
          x = Zn[Zone == "Basin.Trough"], 
          x.censored = Zn.censored[Zone == "Basin.Trough"],
          y = Zn[Zone == "Alluvial.Fan"], 
          y.censored = Zn.censored[Zone == "Alluvial.Fan"], 
          test = test[i], variance = variance[i]))
    stats.mat[i, 3:4] <- c(dum.list$statistic["z"], dum.list$p.value)
  }
  dimnames(stats.mat) <- list(paste(test, variance, sep = "."), 
    c("Cu.Z", "Cu.p.value", "Zn.Z", "Zn.p.value"))
  round(stats.mat, 2)
  #                               Cu.Z Cu.p.value Zn.Z Zn.p.value
  #gehan.permutation              0.87       0.38 2.49       0.01
  #logrank.permutation            0.79       0.43 1.75       0.08
  #peto-peto.permutation          0.92       0.36 2.42       0.02
  #gehan.hypergeometric           0.71       0.48 2.35       0.02
  #logrank.hypergeometric         0.51       0.61 1.69       0.09
  #peto-peto.hypergeometric       1.03       0.30 2.59       0.01
  #peto-peto.asymptotic           0.90       0.37 2.37       0.02
  #normal.scores.1.permutation    0.94       0.34 2.37       0.02
  #normal.scores.2.permutation    0.98       0.33 2.39       0.02
  #normal.scores.2.hypergeometric 1.28       0.20 2.48       0.01
  #----------
  # Clean up
  #---------
  rm(test, variance, stats.mat, i, dum.list)
# }
Run the code above in your browser using DataLab