# A basic example of synchronous running of code over 100 iterations,
# split up between 10 tasks (or 10 jobs if mgrid is not installed):
# The function to evaluate:
f <- function(iteration){
# All objects supplied to object.list will be visible here, but
# remember to call all necessary libraries within the function
cat("Running iteration", iteration, "\n")
# Some lengthy code evaluation....
output <- rpois(10, iteration)
return(output)
}
# Run the function on xgrid for 100 iterations split between 10 machines:
results <- xgrid.run(f, niters=100, threads=10)
# A basic example of xapply to calculate the mean of a list of numbers:
# A list of 3 datasets from which to calculate the mean:
datasets <- list(c(1,5,6,NA), c(9,2,NA,0), c(-1,4,10,20))
# Standard lapply syntax:
results1 <- lapply(datasets, mean, na.rm=TRUE)
# Equivalent xapply syntax:
results2 <- xapply(datasets, mean,
xgrid.options=list(wait.interval='15s'), na.rm=TRUE)
# Or submit the job:
id <- xapply(datasets, mean, xgrid.options=list(submitandstop=TRUE),
na.rm=TRUE)
# And retrieve the results:
results3 <- xgrid.results(id)
# Any packages required by the function need to be installed on the
# nodes the function is run on. This function retrieves information
# about the available packages on each of the node names provided:
# The name of one or more nodes to get information about:
nodenames <- c("mynode", "guestnode", "othernode")
# Run the job:
results <- xgrid.run(function(i){
return(installed.packages()[,'Version'])
},
niters=length(nodenames), threads=length(nodenames),
wait.interval="10 seconds", xgrid.method='separatejobs',
sub.options=paste("-f -h '", nodenames, "'", sep=""),
show.output=FALSE)
# Make the names match up to the statistics:
names(results) <- nodenames
# Show the available packages and their versions for each node:
results
# An example of running an Xgrid job within another Xgrid job, using
# xgrid.submit to submit a job that runs a JAGS model to convergence
# using xgrid.autorun.jags:
# Create an ART script to make sure that (a) R is installed,
# (b) JAGS is installed, and (c) the runjags package is installed
# on the node:
cat('#!/bin/bash
if [ ! -f /usr/bin/R ]; then
echo 0
exit 0
fi
if [ ! -f /usr/local/bin/jags ]; then
echo 0
exit 0
fi
/usr/bin/R --slave -e "suppressMessages(r<-require(runjags,quietly=T));cat(r*1,fill=T)"
exit 0
', file='runjagsART.sh')
# Some data etc we will need for the model:
library(runjags)
X <- 1:100
Y <- rnorm(length(X), 2*X + 10, 1)
data <- dump.format(list(X=X, Y=Y, N=length(X)))
# Model in the JAGS format
model <- "model {
for(i in 1 : N){
Y[i] ~ dnorm(true.y[i], precision);
true.y[i] <- (m * X[i]) + c;
}
m ~ dunif(-1000,1000);
c ~ dunif(-1000,1000);
precision ~ dexp(1);
}"
# Get the Xgrid controller hostname and password to be passed
# to the slave job:
hostname <- Sys.getenv('XGRID_CONTROLLER_HOSTNAME')
password <- Sys.getenv('XGRID_CONTROLLER_PASSWORD')
# The function we are going to call on xgrid:
f <- function(iteration){
# Make sure the necessary environmental variables are set:
Sys.setenv(XGRID_CONTROLLER_HOSTNAME=hostname)
Sys.setenv(XGRID_CONTROLLER_PASSWORD=password)
# Call the library on the node:
library(runjags)
# Use xgrid.autorun.jags to run 2 chains until convergence:
results <- xgrid.autorun.jags(model=model,
monitor=c("m", "c", "precision"), data=data, n.chains=2,
inits=list(list(.RNG.name='base::Wichmann-Hill'),
list(.RNG.name='base::Marsaglia-Multicarry')),
plots = FALSE, xgrid.method='separatejobs',
wait.interval='1 min', jobname='xgridslavejob')
return(results)
}
# Submit the function to xgrid using our ART script to ensure the
# node can handle the job (the ART script path must be specified as
# an absolute link as xgrid won't be called in the current working
# directory, and all paths must be enclosed in quotes to preserve
# spaces):
name <- xgrid.submit(f, object.list=list(X=X, Y=Y, model=model,
data=data, hostname=hostname, password=password), threads=1,
niters=1, sub.options=if(!file.exists(Sys.which('mgrid')))
paste('-art "', getwd(), '/runjagsART.sh"', sep='') else
paste('-a "', getwd(), '/runjagsART.sh"', sep=''),
xgrid.method='simple')
# Cleanup (remove runjagsART file):
unlink('runjagsART.sh')
# Get the results once it is finished:
results <- xgrid.results(name)$iteration.1
# Subit an xgrid job just to see which packages are installed
# on a particular machine.
# Ensure mgrid is installed:
if(!file.exists(Sys.which('mgrid'))) install.mgrid()
# A function to harvest details of R version and installed packages:
f <- function(i){
archavail <- any(dimnames(installed.packages())[[2]]=='Archs')
# To deal with older versions of R:
if(archavail){
packagesinst <- installed.packages()[,c('Version', 'Archs', 'Built')]
}else{
packagesinst <- installed.packages()[,c('Version', 'OS_type', 'Built')]
}
Rinst <- unlist(R.version[c('version.string', 'arch', 'platform')])
names(Rinst) <- c('Version', 'Archs', 'Built')
return(rbind(R=Rinst, packagesinst))
}
# Or to get more details about a particular package:
g <- function(i){
p <- library(help='bayescount')
return(p$info)
}
# Get the information back from 2 specific machines called 'newnode1'
# and 'newnode2':
results <- xgrid.run(f, niters=2, threads=2,
sub.options='-h newnode1:newnode2', wait.interval='15 seconds')Run the code above in your browser using DataLab