Learn R Programming

BatchExperiments (version 1.1)

addExperiments: Add experiemts to the registry.

Description

Add experiments for running algorithms on problems to the registry, so they can be executed later.

Usage

addExperiments(reg, prob.designs, algo.designs, repls = 1L,
  skip.defined = FALSE)

Arguments

reg
[ExperimentRegistry] Registry.
prob.designs
[character | Design | list of Design] Either problem ids, a single problem design or a list of problem designs, the latter two created by
algo.designs
[character | Design | list of Design] Either algorithm ids, a single algorithm design or a list of algorithm designs, the latter two created b
repls
[integer(1)] Number of replications. Default is 1.
skip.defined
[logical] If set to TRUE, already defined experiments get skipped. Otherwise an error is thrown. Default is FALSE.

Value

  • Invisibly returns vector of ids of added experiments.

Examples

Run this code
### EXAMPLE 1 ###
reg <- makeExperimentRegistry(id="example1", file.dir=tempfile())

# Define a problem:
# Subsampling from the iris dataset.
data(iris)
subsample <- function(static, ratio) {
  n <- nrow(static)
  train <- sample(n, floor(n * ratio))
  test <- setdiff(seq(n), train)
  list(test=test, train=train)
}
addProblem(reg, id="iris", static=iris,
           dynamic=subsample, seed=123)

# Define algorithm "tree":
# Decision tree on the iris dataset, modeling Species.
tree.wrapper <- function(static, dynamic, ...) {
  library(rpart)
  mod <- rpart(Species ~ ., data=static[dynamic$train, ], ...)
  pred <- predict(mod, newdata=static[dynamic$test, ], type="class")
  table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id="tree", fun=tree.wrapper)

# Define algorithm "forest":
# Random forest on the iris dataset, modeling Species.
forest.wrapper <- function(static, dynamic, ...) {
  library(randomForest)
  mod <- randomForest(Species ~ ., data=static, subset=dynamic$train, ...)
  pred <- predict(mod, newdata=static[dynamic$test, ])
  table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id="forest", fun=forest.wrapper)

# Define problem parameters:
pars <- list(ratio=c(0.67, 0.9))
iris.design <- makeDesign("iris", exhaustive=pars)

# Define decision tree parameters:
pars <- list(minsplit=c(10, 20), cp=c(0.01, 0.1))
tree.design <- makeDesign("tree", exhaustive=pars)

# Define random forest parameters:
pars <- list(ntree=c(100, 500))
forest.design <- makeDesign("forest", exhaustive=pars)

# Add experiments to the registry:
# Use  previously defined experimental designs.
addExperiments(reg, prob.designs=iris.design,
               algo.designs=list(tree.design, forest.design),
               repls=2) # usually you would set repls to 100 or more.

# Optional: Short summary over problems and algorithms.
summarizeExperiments(reg)

# Optional: Test one decision tree job and one expensive (ntree=1000)
# random forest job. Use findExperiments to get the right job ids.
do.tests <- FALSE
if (do.tests) {
  id1 <- findExperiments(reg, algo.pattern="tree")[1]
  id2 <- findExperiments(reg, algo.pattern="forest",
                         algo.pars=(ntree == 1000))[1]
  testJob(reg, id1)
  testJob(reg, id2)
}

# Submit the jobs to the batch system
submitJobs(reg)

# Calculate the misclassification rate for all (already done) jobs.
reduce <- function(job, res) {
  n <- sum(res)
  list(mcr=(n-sum(diag(res)))/n)
}
res <- reduceResultsExperiments(reg, fun=reduce)
print(res)

# Aggregate results using 'ddply' from package 'plyr':
# Calculate the mean over all replications of identical experiments
# (same problem, same algorithm and same parameters)
library(plyr)
vars <- setdiff(names(res), c("repl", "mcr"))
aggr <- ddply(res, vars, summarise, mean.mcr=mean(mcr))
print(aggr)



### EXAMPLE 2 ###
# define two simple test functions
testfun1 = function(x) sum(x^2)
testfun2 = function(x) -exp(-sum(abs(x)))

# Define ExperimentRegistry:
reg = makeExperimentRegistry("example02", seed=123, file.dir=tempfile())

# Add the testfunctions to the registry:
addProblem(reg, "testfun1", static=testfun1)
addProblem(reg, "testfun2", static=testfun2)

# Use SimulatedAnnealing on the test functions:
addAlgorithm(reg, "sann", fun=function(static, dynamic) {
  upp = rep(10, 2)
  low = -upp
  start = sample(c(-10, 10), 2)
  res = optim(start, fn=static, lower=low, upper=upp, method="SANN")
  res = res[c("par", "value", "counts", "convergence")]
  res$start = start
  return(res)
})

# add experiments and submit
addExperiments(reg, repls=10)
submitJobs(reg)

# Gather informations from the experiments, in this case function value
# and whether the algorithm convergenced:
reduceResultsExperiments(reg, fun=function(job, res) res[c("value", "convergence")])

Run the code above in your browser using DataLab