MCMCCV: Inference and model selection for analysis of geographical genetic variation

Description

MCMC inference and model selection by cross-validation for the analysis of geographical genetic variation

Usage

MCMCCV(gen, D_G, D_E, nit, thinning, theta.max, theta.init, run, ud, n.validation.set,print.pct)

Arguments

gen

An array with dimensions (n,l,a) n: number of geographical locations, l: number of loci, a: max number of alleles

D_G

A matrix of geograpical distances

D_E

A matrix of environmental distances

nit

Number of iterations

thinning

Thinning of MCMC iterations

theta.max

Upper bounds for the vector of parameters in theta. Note that in theta, the parameters are assumed to be in this order: (alpha,beta_G, beta_E, gamma, delta)

theta.init

Initial state of theta

run

A vector of booleans of length 3 stating which sub-model is investigated among G+E,G,E (in this order). For example, the default value run=c(TRUE,FALSE,FALSE) means that only one MCMC run will be performed to estimate paremeters in the G+E model while e.g. run=c(TRUE,FALSE,TRUE) means that MCMC runs will be performed for models G+E and E. If n.validation.set >0 likelihoods under these two models will be returned for model selection.

A vector of booleans of length 5 stating which entries in theta=(alpha,beta_E,beta_G,gamma,delta) will be updated in the MCMC iterations. By default all parameters in theta will be updated. If one entry is not updated, the value used along the MCMC simulation for this parameter is the initial value.

n.validation.set

The number of pairs (sites x locus) used as validation set

print.pct

A boolean stating whether Fortran prints percentage of computation achieved along each MCMC run.

Value

the validation set for the various models compared.

Examples

Run this code

## Not run: 
# data(toydata,package='Sunder')
#      
#      #### Computing options
#      nit <- 10^2
#      run  <- c(1,1,1)
#      thinning  <- 1 # max(nit/10^3,1);
#      ud   <- c(0,1,1,0,0) 
#      theta.init <- c(1,2,1,1,0.01)
#      n.validation.set  <- dim(gen)[1]*dim(gen)[2]/10 
#      theta.max  <- c(10,10*max(D_G),10*max(D_E),1,0.01)
#      
#      plot  <- TRUE
#      trace <- FALSE
#      
#      #### Call Sunder ####
#      output <- MCMCCV(gen,D_G,D_E,
#                                 nit,thinning,
#                                 theta.max,
#                                 theta.init,
#                                 run,ud,n.validation.set)
#      
#      mod.lik <- output$mod.lik
#      tvt <- output$theta
#      
#      
#      ## plotting outputs
#      upd=matrix(nrow=sum(run), ncol=length(theta.init), data=1)
#      upd[2,3]=upd[3,2]=0
#      
#      plot(as.vector(D_G),as.vector(cor(t(gen[,,1]))),
#           bg=colorRampPalette(c("blue", "red"))(dim(D_E)[1]^2)[order(order(as.vector(D_E)))],
#           pch=21,
#           xlab='Geographic distance',
#           ylab='Empirical covariance genotypes')
#      
#      
#      kol=c(4,2,3) 
#      xseq=seq(thinning,nit,thinning)
#      ylab=c(expression(paste(alpha)),
#             expression(paste(beta[D])),
#             expression(paste(beta[E])),
#             expression(paste(gamma)),
#             expression(paste(delta)))
#      
#      par(mfrow=c(sum(run),length(theta.init)))
#      for (j in 1:sum(run))
#      {
#        for(k in 1:length(theta.init))
#        {
#          if (sum(upd[,k]==1)>0)
#          {
#            if(upd[j, k]==1)
#            {
#              if(exists("theta"))
#                ylim=c(min(tvt[k,,j],theta[k]),max(tvt[k,,j],theta[k])) else
#                  ylim=c(min(tvt[k,,j]),max(tvt[k,,j]))
#              plot(0, type="n",xlab="",ylab="", xlim=c(0,nit), ylim=ylim)
#              lines(xseq,tvt[k,,j],col=kol[j],xlab="",ylab="")
#              if(exists("theta")) abline(h=theta[k],lty=2)
#              title(xlab="iterations")        
#              mtext(ylab[k], side=2, line=2.3,las=1)} else plot.new()
#          }
#        }
#      }
# 
# print(mod.lik)
# print(paste('The model achieving the highest likelihood on the validation set is:',
#             names(mod.lik)[order(mod.lik,decreasing=TRUE)[1]]))
# theta.GE <- apply(output$theta[,,1],1,mean)
# print('Posterior mean theta under model G+E:')
# print(theta.GE)
# 
# theta.G <- apply(output$theta[,,2],1,mean)
# theta.G[3] <- NA
# print('Posterior mean theta under model G:')
# print(theta.G)
# 
# theta.E <- apply(output$theta[,,3],1,mean)
# theta.E[2] <- NA
# print('Posterior mean theta under model E:')
# print(theta.E)
# 
# ## End(Not run)

Run the code above in your browser using DataLab