## -----------------------------------------------------------------------------
## EXAMPLE 1 - Theta estimate with synthetic data
## -----------------------------------------------------------------------------
## Samples of 2-D data points drawn from three nonlinearly separable
## classes which take the form of two annular rings and one zero-centered
## Gaussian are used in this little illustrative example.
genSample <- function(n, noiseVar=0) {
## class 1 and 2 (x ~ U(0,1))
u <- 4. * matrix(runif(2*n), nrow=n, ncol=2) - 2.;
i <- which(((u[, 1]^2 + u[, 2]^2) > .1) & ((u[, 1]^2 + u[, 2]^2) < .5) );
j <- which(((u[, 1]^2 + u[, 2]^2) > .6) & ((u[, 1]^2 + u[, 2]^2) < 1) );
X <- u[c(i, j),];
t.class <- c(rep(1, length(i)),rep(2, length(j)));
## class 3 (x ~ N(0,1))
x <- 0.1 * matrix(rnorm(2*length(i)), ncol=2, nrow=length(i) );
k <- which((x[, 1]^2 + x[, 2]^2) < 0.1);
X <- rbind(X, x[k, ]);
t.class <- c(t.class, rep(3, length(k)));
## add random coloumns
if (noiseVar>0) X <- cbind(X, matrix(rnorm(noiseVar*nrow(X)), ncol=noiseVar, nrow=nrow(X)));
structure( list( t.class=t.class, X=X), class="MultiNoisyData");
}
set.seed(123); ## Init random number generator
## Generate training and test samples as an independent
## test set to assess out-of-sample prediction error
## and predictive likelihoods.
nNoisyInputs <- 0; ## number of additional noisy input parameters
Ntest <- Ntrain <- 500; ## sample sizes
dataXt.train <- genSample(Ntrain, nNoisyInputs);
dataXt.test <- genSample(Ntest, nNoisyInputs);
## Not run:
# theta <- runif(ncol(dataXt.train$X));
# res <- vbmp( dataXt.train$X, dataXt.train$t.class,
# dataXt.test$X, dataXt.test$t.class, theta,
# control=list(bThetaEstimate = T,
# bPlotFitting=T, maxIts=50));
# ## End(Not run)
## set theta params (previously estimated)
theta <- c(0.09488309, 0.16141604);
## Fit the vbmp
res <- vbmp( dataXt.train$X, dataXt.train$t.class,
dataXt.test$X, dataXt.test$t.class, theta,
control=list(maxIts=5));
## print out-of-sample error estimate
predError(res);
## Not run:
# ## ----------------------------------------------------------
# ## EXAMPLE 2 - BRCA12 genomic data
# ## ----------------------------------------------------------
# ## Leave-one-out (LOO) cross-validation prediction error of the probabilistic
# ## Gaussian process classifier used in Zsofia Kote-Jarai et al.
# ## Clin Cancer Res 2006;12(13);3896-3901
#
# if(any(installed.packages()[,1]=="Biobase")) {
# library("Biobase");
# data("BRCA12");
# brca.y <- BRCA12$Target.class;
# brca.x <- t(exprs(BRCA12));
# } else {
# print("Deprecated.....");
# load(url("http://www.dcs.gla.ac.uk/people/personal/girolami/pubs_2005/VBGP/BRCA12.RData"));
# brca.y <- as.numeric(BRCA12$y);
# brca.x <- as.matrix(BRCA12[,-1]);
# }
#
# sKernelType <- "iprod"; ## Covariance function type
# Thresh <- 1e-8; ## Iteration threshold
# InfoLevel <- 1;
# theta <- rep(1.0, ncol(brca.x));
# ITER.THETA <- 24;
# n <- nrow(brca.x) ;
# Kfold <- n; # number of folds , if equal to n then LOO
# samps <- sample(rep(1:Kfold, length=n), n, replace=FALSE);
# res <- rep(NA, n);
# print(paste("LOO crossvalidation started...... (",n,"steps)"));
# for (x in 1:Kfold) {
# cat(paste(x,", ",sep="")); flush.console();
# resX <- vbmp( brca.x[samps!=x,], brca.y[samps!=x],
# brca.x[samps==x,], brca.y[samps==x],
# theta, control=list(bThetaEstimate=F,
# bPlotFitting=F, maxIts=ITER.THETA,
# sKernelType=sKernelType, Thresh=Thresh));
# res[samps==x] <- predClass(resX);
# }
# print("(end)");
# print(paste("Crossvalidated error rate", round(sum(res!=brca.y)/n,2)));
# ## End(Not run)
Run the code above in your browser using DataCamp Workspace