robest: Optimally robust estimation

Description

Function to compute optimally robust estimates for L2-differentiable parametric families via k-step construction.

Usage

robest(x, L2Fam,  fsCor = 1, risk = asMSE(), steps = 1L,
        verbose = NULL, OptOrIter = "iterate", nbCtrl = gennbCtrl(),
        startCtrl = genstartCtrl(), startICCtrl = genstartICCtrl(),
        kStepCtrl = genkStepCtrl(), na.rm = TRUE, ..., debug = FALSE,
        withTimings = FALSE)

Arguments

sample

L2Fam

object of class "L2ParamFamily"

fsCor

positive real: factor used to correct the neighborhood radius; see details.

risk

object of class "RiskType"

steps

positive integer: number of steps used for k-steps construction

verbose

logical: if TRUE, some messages are printed

OptOrIter

character; which method to be used for determining Lagrange multipliers A and a: if (partially) matched to "optimize", getLagrangeMultByOptim is used; otherwise: by default, or if matched to "iterate" or to "doubleiterate", getLagrangeMultByIter is used. More specifically, when using getLagrangeMultByIter, and if argument risk is of class "asGRisk", by default and if matched to "iterate" we use only one (inner) iteration, if matched to "doubleiterate" we use up to Maxiter (inner) iterations.

nbCtrl

a list specifying input concerning the used neighborhood; to be generated by a respective call to gennbCtrl.

startCtrl

a list specifying input concerning the used starting estimator; to be generated by a respective call to genstartCtrl.

startICCtrl

a list specifying input concerning the call to getStartIC which returns the starting influence curve; to be generated by a respective call to genstartICCtrl.

kStepCtrl

a list specifying input concerning the used variant of a kstepEstimator; to be generated by a respective call to genkStepCtrl.

na.rm

logical: if TRUE, the estimator is evaluated at complete.cases(x).

…

further arguments

debug

logical: if TRUE, only the respective calls within the function are generated for debugging purposes.

withTimings

logical: if TRUE, separate (and aggregate) timings for the three steps evaluating the starting value, finding the starting influence curve, and evaluating the k-step estimator is issued.

Value

Object of class "kStepEstimate". In addition, it has an attribute "timings" where computation time is stored.

Details

A new, more structured interface to the former function roptest. For details, see this function.

Examples

Run this code

# NOT RUN {
## Don't test to reduce check time on CRAN
# }
# NOT RUN {
#############################
## 1. Binomial data
#############################
## generate a sample of contaminated data
set.seed(123)
ind <- rbinom(100, size=1, prob=0.05)
x <- rbinom(100, size=25, prob=(1-ind)*0.25 + ind*0.9)

## Family
BF <- BinomFamily(size = 25)
## ML-estimate
MLest <- MLEstimator(x, BF)
estimate(MLest)
confint(MLest)

## compute optimally robust estimator (known contamination)
nb <- gennbCtrl(eps=0.05)
robest1 <- robest(x, BF, nbCtrl = nb, steps = 3)
estimate(robest1)

confint(robest1, method = symmetricBias())
## neglecting bias
confint(robest1)
plot(pIC(robest1))
tmp <- qqplot(x, robest1, cex.pch=1.5, exp.cex2.pch = -.25,
              exp.fadcol.pch = .55, jit.fac=.9)

## compute optimally robust estimator (unknown contamination)
nb2 <- gennbCtrl(eps.lower = 0, eps.upper = 0.2)
robest2 <- robest(x, BF, nbCtrl = nb2, steps = 3)
estimate(robest2)
confint(robest2, method = symmetricBias())
plot(pIC(robest2))

## total variation neighborhoods (known deviation)
nb3 <- gennbCtrl(eps = 0.025, neighbor = TotalVarNeighborhood())
robest3 <- robest(x, BF, nbCtrl = nb3, steps = 3)
estimate(robest3)
confint(robest3, method = symmetricBias())
plot(pIC(robest3))

## total variation neighborhoods (unknown deviation)
nb4 <- gennbCtrl(eps.lower = 0, eps.upper = 0.1,
                 neighbor = TotalVarNeighborhood())
robest3 <- robest(x, BF, nbCtrl = nb4, steps = 3)
robest4 <- robest(x, BinomFamily(size = 25), nbCtrl = nb4, steps = 3)
estimate(robest4)
confint(robest4, method = symmetricBias())
plot(pIC(robest4))


#############################
## 2. Poisson data
#############################
## Example: Rutherford-Geiger (1910); cf. Feller~(1968), Section VI.7 (a)
x <- c(rep(0, 57), rep(1, 203), rep(2, 383), rep(3, 525), rep(4, 532), 
       rep(5, 408), rep(6, 273), rep(7, 139), rep(8, 45), rep(9, 27), 
       rep(10, 10), rep(11, 4), rep(12, 0), rep(13, 1), rep(14, 1))

## Family
PF <- PoisFamily()

## ML-estimate
MLest <- MLEstimator(x, PF)
estimate(MLest)
confint(MLest)

## compute optimally robust estimator (unknown contamination)
nb1 <- gennbCtrl(eps.upper = 0.1)
robest <- robest(x, PF, nbCtrl = nb1, steps = 3)
estimate(robest)

confint(robest, symmetricBias())
plot(pIC(robest))
tmp <- qqplot(x, robest, cex.pch=1.5, exp.cex2.pch = -.25,
              exp.fadcol.pch = .55, jit.fac=.9)
 
## total variation neighborhoods (unknown deviation)
nb2 <- gennbCtrl(eps.upper = 0.05, neighbor = TotalVarNeighborhood())
robest1 <- robest(x, PF, nbCtrl = nb2, steps = 3)
estimate(robest1)
confint(robest1, symmetricBias())
plot(pIC(robest1))
# }
# NOT RUN {
#############################
## 3. Normal (Gaussian) location and scale
#############################
## 24 determinations of copper in wholemeal flour
library(MASS)
data(chem)
plot(chem, main = "copper in wholemeal flour", pch = 20)

## Family
NF <- NormLocationScaleFamily()
## ML-estimate
MLest <- MLEstimator(chem, NF)
estimate(MLest)
confint(MLest)

## Don't run to reduce check time on CRAN
# }
# NOT RUN {
## compute optimally robust estimator (known contamination)
## takes some time -> you can use package RobLox for normal 
## location and scale which is optimized for speed
nb1 <- gennbCtrl(eps = 0.05)
robEst <- robest(chem, NF, nbCtrl = nb1, steps = 3)
estimate.call(robEst)
attr(robEst,"timings")
estimate(robest)

confint(robest, symmetricBias())
plot(pIC(robest))
## plot of relative and absolute information; cf. Kohl (2005)
infoPlot(pIC(robest))

tmp <- qqplot(chem, robest, cex.pch=1.5, exp.cex2.pch = -.25,
              exp.fadcol.pch = .55, withLab = TRUE, which.Order=1:4,
              exp.cex2.lbl = .12,exp.fadcol.lbl = .45,
              nosym.pCI = TRUE, adj.lbl=c(1.7,.2),
              exact.pCI = FALSE, log ="xy")
             
## finite-sample correction
if(require(RobLox)){
    n <- length(chem)
    r <- 0.05*sqrt(n)
    r.fi <- finiteSampleCorrection(n = n, r = r)
    fsCor0 <- r.fi/r
    nb1 <- gennbCtrl(eps = 0.05)
    robest <- robest(chem, NF, nbCtrl = nb1, fsCor = fsCor0, steps = 3)
    estimate(robest)
}

## compute optimally robust estimator (unknown contamination)
## takes some time -> use package RobLox!
nb2 <- gennbCtrl(eps.lower = 0.05, eps.upper = 0.1)
robest1 <- robest(chem, NF, nbCtrl = nb2, steps = 3)
estimate(robest1)
confint(robest1, symmetricBias())
plot(pIC(robest1))
## plot of relative and absolute information; cf. Kohl (2005)
infoPlot(pIC(robest1))
# }

Run the code above in your browser using DataLab