#
# Midwest ozone data 'day 16' stripped of missings
data( ozone2)
y<- ozone2$y[16,]
good<- !is.na( y)
y<-y[good]
x<- ozone2$lon.lat[good,]
# nearly interpolate using defaults (Exponential)
mKrig( x,y, theta = 2.0, lambda=.01)-> out
#
# NOTE this should be identical to
# Krig( x,y, theta=2.0, lambda=.01)
# interpolate using tapered version of the exponential,
# the taper scale is set to 1.5 default taper covariance is the Wendland.
# Tapering will done at a scale of 1.5 relative to the scaling
# done through the theta passed to the covariance function.
mKrig( x,y,cov.function="stationary.taper.cov",
theta = 2.0, lambda=.01,
Taper="Wendland", Taper.args=list(theta = 1.5, k=2, dimension=2)
) -> out2
predict.surface( out2)-> out.p
surface( out.p)
# Try out GCV on a grid of lambda's.
# For this small data set
# one should really just use Krig or Tps but this is an example of
# approximate GCV that will work for much larger data sets using sparse
# covariances and the Monte Carlo trace estimate
#
# a grid of lambdas:
lgrid<- 10**seq(-1,1,,15)
GCV<- matrix( NA, 15,20)
trA<- matrix( NA, 15,20)
GCV.est<- rep( NA, 15)
eff.df<- rep( NA, 15)
logPL<- rep( NA, 15)
# loop over lambda's
for ( k in 1:15){
out<- mKrig( x,y,cov.function="stationary.taper.cov",
theta = 2.0, lambda=lgrid[k],
Taper="Wendland", Taper.args=list(theta = 1.5, k=2, dimension=2) )
GCV[k,]<- out$GCV.info
trA[k,]<- out$trA.info
eff.df[k]<- out$eff.df
GCV.est[k]<- out$GCV
logPL[k]<- out$lnProfileLike
}
#
# plot the results different curves are for individual estimates
# the two lines are whether one averages first the traces or the GCV criterion.
#
par( mar=c(5,4,4,6))
matplot( trA, GCV, type="l", col=1, lty=2, xlab="effective degrees of freedom", ylab="GCV")
lines( eff.df, GCV.est, lwd=2, col=2)
lines( eff.df, rowMeans(GCV), lwd=2)
# add exact GCV computed by Krig
out0<- Krig( x,y,cov.function="stationary.taper.cov",
theta = 2.0,
Taper="Wendland", Taper.args=list(theta = 1.5, k=2, dimension=2), spam.format=FALSE)
lines( out0$gcv.grid[,2:3], lwd=4, col="darkgreen")
# add profile likelihood
utemp<- par()$usr
utemp[3:4] <- range( -logPL)
par( usr=utemp)
lines( eff.df, -logPL, lwd=2, col="blue", lty=2)
axis( 4)
mtext( side=4,line=3, "-ln profile likelihood", col="blue")
title( "GCV ( green = exact) and -ln profile likelihood", cex=2)
# an example using a "Z" covariate and the Matern family
data(COmonthlyMet)
y<- CO.tmin.MAM.climate
good<- !is.na( y)
y<-y[good]
x<- CO.loc[good,]
Z<- CO.elev[good]
out<- mKrig( x,y, Z=Z, cov.function="stationary.cov", Covariance="Matern",
theta=4.0, smoothness=1.0, lambda=.1)
set.panel(2,1)
# quilt.plot with elevations
quilt.plot( x, predict(out))
# quilt.plot without elevation linear term included
quilt.plot( x, predict(out, drop.Z=TRUE))
set.panel()
# here is a series of examples with a bigger problem
# using a compactly supported covariance directly
set.seed( 334)
N<- 1000
x<- matrix( 2*(runif(2*N)-.5),ncol=2)
y<- sin( 1.8*pi*x[,1])*sin( 2.5*pi*x[,2]) + rnorm( 1000)*.1
look2<-mKrig( x,y, cov.function="wendland.cov",k=2, theta=.2,
lambda=.1)
# take a look at fitted surface
predict.surface(look2)-> out.p
surface( out.p)
# this works because the number of nonzero elements within distance theta
# are less than the default maximum allocated size of the
# sparse covariance matrix.
# see spam.options() for the default values
# The following will give a warning for theta=.9 because
# allocation for the covariance matirx storage is too small.
# Here theta controls the support of the covariance and so
# indirectly the number of nonzero elements in the sparse matrix
mKrig( x,y, cov.function="wendland.cov",k=2, theta=.9, lambda=.1)-> look2
# The warning resets the memory allocation for the covariance matirx according the
# values 'spam.options(nearestdistnnz=c(416052,400))'
# this is inefficient becuase the preliminary pass failed.
# the following call completes the computation in "one pass"
# without a warning and without having to reallocate more memory.
spam.options(nearestdistnnz=c(416052,400))
mKrig( x,y, cov.function="wendland.cov",k=2, theta=.9, lambda=1e-2)-> look2
# as a check notice that
# print( look2)
# report the number of nonzero elements consistent with the specifc allocation
# increase in spam.options
# new data set of 1500 locations
set.seed( 234)
N<- 1500
x<- matrix( 2*(runif(2*N)-.5),ncol=2)
y<- sin( 1.8*pi*x[,1])*sin( 2.5*pi*x[,2]) + rnorm( N)*.01
# the following is an example of where the allocation (for nnzR)
# for the cholesky factor is too small. A warning is issued and
# the allocation is increased by 25% in this example
#
mKrig( x,y,
cov.function="wendland.cov",k=2, theta=.1,
lambda=1e2 )-> look2
# to avoid the warning
mKrig( x,y,
cov.function="wendland.cov",k=2, theta=.1,
lambda=1e2,
chol.args=list(pivot=TRUE,memory=list(nnzR= 450000)) )-> look2
# success!
# fiting multiple data sets
#
#\dontrun{
y1<- sin( 1.8*pi*x[,1])*sin( 2.5*pi*x[,2]) + rnorm( N)*.01
y2<- sin( 1.8*pi*x[,1])*sin( 2.5*pi*x[,2]) + rnorm( N)*.01
Y<- cbind(y1,y2)
mKrig( x,Y,cov.function="wendland.cov",k=2, theta=.1,
lambda=1e2 )-> look3
# note slight difference in summary because two data sets have been fit.
print( look3)
#}
##################################################
# finding a good choice for theta as a taper
##################################################
# Suppose the target is a spatial prediction using roughly 50 nearest neighbors
# (tapering covariances is effective for roughly 20 or more in the situation of
# interpolation) see Furrer, Genton and Nychka (2006).
# take a look at a random set of 100 points to get idea of scale
set.seed(223)
ind<- sample( 1:N,100)
hold<- rdist( x[ind,], x)
dd<- (apply( hold, 1, sort))[65,]
dguess<- max(dd)
# dguess is now a reasonable guess at finding cutoff distance for
# 50 or so neighbors
# full distance matrix excluding distances greater than dguess
# but omit the diagonal elements -- we know these are zero!
hold<- nearest.dist( x, delta= dguess,upper=TRUE)
# exploit spam format to get quick of number of nonzero elements in each row
hold2<- diff( hold@rowpointers)
# min( hold2) = 55 which we declare close enough
# now the following will use no less than 55 nearest neighbors
# due to the tapering.
mKrig( x,y, cov.function="wendland.cov",k=2, theta=dguess,
lambda=1e2) -> look2
Run the code above in your browser using DataLab