# EXAMPLE 1: For this example, we generate 100,000 observations from a
# N(0, 1) distribution, then evaluate the kernel density on a grid of 50
# equally spaced points using the npksum() function, then compare
# results with the (identical) npudens() function output
n <- 100000
x <- rnorm(n)
x.eval <- seq(-4, 4, length=50)
# Compute the bandwidth with the normal-reference rule-of-thumb
bw <- npudensbw(dat=x, bwmethod="normal-reference")
# Compute the univariate kernel density estimate using the 100,000
# training data points evaluated on the 50 evaluation data points, 
# i.e., 1/nh times the sum of the kernel function evaluated at each of
# the 50 points
den.ksum <- npksum(txdat=x, exdat=x.eval, bws=bw$bw)$ksum/(n*bw$bw[1])
# Note that, alternatively (easier perhaps), you could use the
# bandwidth.divide=TRUE argument and drop the *bw$bw[1] term in the
# denominator, as in
# npksum(txdat=x, exdat=x.eval, bws=bw$bw, bandwidth.divide=TRUE)$ksum/n
# Compute the density directly with the npudens() function...
den <- fitted(npudens(tdat=x, edat=x.eval, bws=bw$bw))
# Plot the true DGP, the npksum()/(nh) estimate and (identical)
# npudens() estimate
plot(x.eval, dnorm(x.eval), xlab="X", ylab="Density", type="l")
points(x.eval, den.ksum, col="blue")
points(x.eval, den, col="red", cex=0.2)
legend(1, .4, 
       c("DGP", "npksum()", 
       "npudens()"), 
       col=c("black", "blue", "red"), 
       lty=c(1, 1, 1))
# Sleep for 5 seconds so that we can examine the output...
Sys.sleep(5)
# EXAMPLE 2: For this example, we first obtain residuals from a parametric
# regression model, then compute a vector of leave-one-out kernel
# weighted sums of squared residuals where the kernel function is raised
# to the power 2. We use a normal-reference rule-of-thumb obtained by
# using the scaling factor 1.06 and bwscaling=TRUE. Note that this
# returns a vector of kernel weighted sums, one for each element of the
# error vector. Note also that you can specify the bandwidth type, 
# kernel function, kernel order etc.
data("cps71")
attach(cps71)
error <- residuals(lm(logwage~age+I(age^2)))
ksum <- npksum(txdat=age, 
               tydat=error^2, 
               bws=1.06, 
               leave.one.out=TRUE, 
               bwscaling=TRUE, 
               kernel.pow=2)
ksum
# Sleep for 5 seconds so that we can examine the output...
Sys.sleep(5)
# Note - npksum also can inherit properties from a bandwidth
# object. To again specify a value for the bandwidth, you might do so in
# the bandwidth object itself then pass this into npksum (e.g., 
# below I use a regression bandwidth object but you could use, say, a
# density bandwidth object instead).
bw <- npregbw(xdat=age, ydat=logwage, bws=c(1.06),
              bwscaling=TRUE, 
              bandwidth.compute=FALSE)
ksum <- npksum(txdat=age, 
               tydat=error^2, 
               bws=bw$bw, 
               leave.one.out=TRUE, 
               kernel.pow=2)
# Obviously, if we wanted the sum of these weighted kernel sums then, 
# trivially, 
sum(ksum$ksum)
detach(cps71)
# Sleep for 5 seconds so that we can examine the output...
Sys.sleep(5)
# Note that weighted leave-one-out sums of squared residuals are used to
# construct consistent model specification tests. In fact, the
# npcmstest() routine in this package is constructed purely from calls
# to npksum(). You can type npcmstest to see the npcmstest()
# code and also examine some examples of the many uses of
# npksum().
# EXAMPLE 3: For this example, we conduct local-constant (i.e., 
# Nadaraya-Watson) kernel regression. We shall use cross-validated
# bandwidths using npregbw() by way of example. Note we extract
# the kernel sum from npksum() via the `$ksum' argument in both
# the numerator and denominator.
data("cps71")
attach(cps71)
bw <- npregbw(xdat=age, ydat=logwage)
fit.lc <- npksum(txdat=age, tydat=logwage, bws=bw$bw)$ksum/
          npksum(txdat=age, bws=bw$bw)$ksum
plot(age, logwage, xlab="Age", ylab="log(wage)")
lines(age, fit.lc)
# Sleep for 5 seconds so that we can examine the output...
Sys.sleep(5)
# If you wished to compute the kernel sum for a set of evaluation points, 
# you first generate the set of points then feed this to npksum, 
# e.g., for the set (20, 30, ..., 60) we would use
age.seq <- seq(20, 60, 10)
fit.lc <- npksum(txdat=age, exdat=age.seq, tydat=logwage, bws=bw$bw)$ksum/
          npksum(txdat=age, exdat=age.seq, bws=bw$bw)$ksum
# Note that now fit.lc contains the 5 values of the local constant
# estimator corresponding to age.seq...
fit.lc
detach(cps71)
# EXAMPLE 4: For this example, we conduct least-squares cross-validation
# for the local-constant regression estimator. We first write an R
# function `ss' that computes the leave-one-out sum of squares using the
# npksum() function, and then feed this function, along with
# random starting values for the bandwidth vector, to the nlm() routine
# in R (nlm = Non-Linear Minimization). Finally, we compare results with
# the function npregbw() that is written solely in C and calls
# a tightly coupled C-level search routine.  Note that one could make
# repeated calls to nlm() using different starting values for h (highly
# recommended in general).
# Increase the number of digits printed out by default in R and avoid
# using scientific notation for this example (we wish to compare
# objective function minima)
options(scipen=100, digits=12)
# Generate 100 observations from a simple DGP where one explanatory
# variable is irrelevant.
n <- 100
x1 <- runif(n)
x2 <- rnorm(n)
x3 <- runif(n)
txdat <- data.frame(x1, x2, x3)
# Note - x3 is irrelevant
tydat <- x1 + sin(x2) + rnorm(n)
# Write an R function that returns the average leave-one-out sum of
# squared residuals for the local constant estimator based upon
# npksum(). This function accepts one argument and presumes that
# txdat and tydat have been defined already.
ss <- function(h) {
# Test for valid (non-negative) bandwidths - return infinite penalty
# when this occurs
  if(min(h)<=0) {
    return(.Machine$double.xmax)
  } else {
    mean <-  npksum(txdat, 
                    tydat, 
                    leave.one.out=TRUE, 
                    bandwidth.divide=TRUE, 
                    bws=h)$ksum/
             npksum(txdat, 
                    leave.one.out=TRUE, 
                    bandwidth.divide=TRUE, 
                    bws=h)$ksum
  
    return(sum((tydat-mean)^2)/length(tydat))
  }
}
# Now pass this function to R's nlm() routine along with random starting
# values and place results in `nlm.return'.
nlm.return <- nlm(ss, runif(length(txdat)))
bw <- npregbw(xdat=txdat, ydat=tydat)
# Bandwidths from nlm()
nlm.return$estimate
# Bandwidths from npregbw()
bw$bw
# Function value (minimum) from nlm()
nlm.return$minimum
# Function value (minimum) from npregbw()
bw$fval
# Sleep for 5 seconds so that we can examine the output...
Sys.sleep(5)
# EXAMPLE 5: For this example, we use npksum() to plot the kernel
# function itself. Our `training data' is the singleton, 0, and our
# evaluation data a grid in [-4,4], while we use a bandwidth of 1. By
# way of example we plot a sixth order Gaussian kernel (note that this
# kernel function can assume negative values)
x <- 0
x.eval <- seq(-4,4,length=500)
kernel <- npksum(txdat=x,exdat=x.eval,bws=1,
                 bandwidth.divide=TRUE,
                 ckertype="gaussian",
                 ckerorder=6)$ksum
plot(x.eval,kernel,type="l",xlab="X",ylab="Kernel Function") 
abline(0,0)Run the code above in your browser using DataLab