predict: Prediction and uncertainty quantification on the testing input using a GaSP model.

Description

This function computes the predictive mean and variance on the given testing input using a GaSP model.

Usage

# S4 method for fgasp
predict(param,object, testing_input, var_data=TRUE, sigma_2=NULL)

Value

The returned value is a S4 Class predictobj.fgasp.

Arguments

param: a vector of parameters. The first parameter is the natural logarithm of the inverse range parameter in the kernel function. If the data contain noise, the second parameter is the logarithm of the nugget-variance ratio parameter.
object: an object of class fgasp.
testing_input: a vector of testing input for prediction.
var_data: a bool valueto decide whether the noise of the data is included for computing the posterior predictive variance.
sigma_2: a numerical value specifying the variance of the kernel function. If given, the package uses this parameter for prediction.

Author

tools:::Rd_package_author("FastGaSP")

Maintainer: tools:::Rd_package_maintainer("FastGaSP")

References

Hartikainen, J. and Sarkka, S. (2010). Kalman filtering and smoothing solutions to temporal gaussian process regression models, Machine Learning for Signal Processing (MLSP), 2010 IEEE International Workshop, 379-384.

M. Gu, Y. Xu (2017), Nonseparable Gaussian stochastic process: a unified view and computational strategy, arXiv:1711.11501.

M. Gu, X. Wang and J.O. Berger (2018), Robust Gaussian Stochastic Process Emulation, Annals of Statistics, 46, 3038-3066.

Examples

Run this code


library(FastGaSP)

#------------------------------------------------------------------------------
# Example 1: a simple example with noise to show fast computation algorithm 
#------------------------------------------------------------------------------

y_R<-function(x){
  cos(2*pi*x)
}

###let's test for 2000 observations
set.seed(1)
num_obs=2000
input=runif(num_obs)

output=y_R(input)+rnorm(num_obs,mean=0,sd=0.1)

##constucting the fgasp.model
fgasp.model=fgasp(input, output)

##range and noise-variance ratio (nugget) parameters 
param=c( log(1),log(.02))
## the log lik
log_lik(param,fgasp.model)
##time cost to compute the likelihood
time_cost=system.time(log_lik(param,fgasp.model))
time_cost[1]

##consider a nonparametric regression setting 
##estimate the parameter by maximum likelihood estimation

est_all<-optim(c(log(1),log(.02)),log_lik,object=fgasp.model,method="L-BFGS-B",
              control = list(fnscale=-1))  

##estimated log inverse range parameter and log nugget
est_all$par

##estimate variance 
est.var=Get_log_det_S2(est_all$par,fgasp.model@have_noise,fgasp.model@delta_x,
                          fgasp.model@output,fgasp.model@kernel_type)[[2]]/fgasp.model@num_obs
est.var

###1. Do some interpolation test 
num_test=5000
testing_input=runif(num_test) ##there are the input where you don't have observations
pred.model=predict(param=est_all$par,object=fgasp.model,testing_input=testing_input)

lb=pred.model@mean+qnorm(0.025)*sqrt(pred.model@var)
ub=pred.model@mean+qnorm(0.975)*sqrt(pred.model@var)

## calculate lb for the mean function
pred.model2=predict(param=est_all$par,object=fgasp.model,testing_input=testing_input,var_data=FALSE)
lb_mean_funct=pred.model2@mean+qnorm(0.025)*sqrt(pred.model2@var)
ub_mean_funct=pred.model2@mean+qnorm(0.975)*sqrt(pred.model2@var)

## plot the prediction
min_val=min(lb,output)
max_val=max(ub,output)

plot(pred.model@testing_input,pred.model@mean,type='l',col='blue',
     ylim=c(min_val,max_val),
     xlab='x',ylab='y')
polygon( c(pred.model@testing_input,rev(pred.model@testing_input)),
    c(lb,rev(ub)),col = "grey80", border = FALSE)
lines(pred.model@testing_input,pred.model@mean,type='l',col='blue')
lines(pred.model@testing_input,y_R(pred.model@testing_input),type='l',col='black')
lines(pred.model2@testing_input,lb_mean_funct,col='blue',lty=2)
lines(pred.model2@testing_input,ub_mean_funct,col='blue',lty=2)
lines(input,output,type='p',pch=16,col='black',cex=0.4) #one can plot data

legend("bottomleft", legend=c("predictive mean","95% predictive interval","truth"),
       col=c("blue","blue","black"), lty=c(1,2,1), cex=.8)


##mean square error for all inputs
mean((pred.model@mean- y_R(pred.model@testing_input))^2)
##coverage for the mean
length(which(y_R(pred.model@testing_input)>lb_mean_funct &
               y_R(pred.model@testing_input)

Run the code above in your browser using DataLab