### EXAMPLE 1 ###
reg <- makeExperimentRegistry(id="example1", file.dir=tempfile())
# Define a problem:
# Subsampling from the iris dataset.
data(iris)
subsample <- function(static, ratio) {
n <- nrow(static)
train <- sample(n, floor(n * ratio))
test <- setdiff(seq(n), train)
list(test=test, train=train)
}
addProblem(reg, id="iris", static=iris,
dynamic=subsample, seed=123)
# Define algorithm "tree":
# Decision tree on the iris dataset, modeling Species.
tree.wrapper <- function(static, dynamic, ...) {
library(rpart)
mod <- rpart(Species ~ ., data=static[dynamic$train, ], ...)
pred <- predict(mod, newdata=static[dynamic$test, ], type="class")
table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id="tree", fun=tree.wrapper)
# Define algorithm "forest":
# Random forest on the iris dataset, modeling Species.
forest.wrapper <- function(static, dynamic, ...) {
library(randomForest)
mod <- randomForest(Species ~ ., data=static, subset=dynamic$train, ...)
pred <- predict(mod, newdata=static[dynamic$test, ])
table(static$Species[dynamic$test], pred)
}
addAlgorithm(reg, id="forest", fun=forest.wrapper)
# Define problem parameters:
pars <- list(ratio=c(0.67, 0.9))
iris.design <- makeDesign("iris", exhaustive=pars)
# Define decision tree parameters:
pars <- list(minsplit=c(5, 10, 20), cp=c(0.01, 0.1))
tree.design <- makeDesign("tree", exhaustive=pars)
# Define random forest parameters:
pars <- list(ntree=c(100, 500, 1000))
forest.design <- makeDesign("forest", exhaustive=pars)
# Add experiments to the registry:
# Use previously defined experimental designs.
addExperiments(reg, prob.designs=iris.design,
algo.designs=list(tree.design, forest.design),
repls=2) # usually you would set repls to 100 or more.
# Optional: Short summary over problems and algorithms.
summarizeExperiments(reg)
# Optional: Test one decision tree job and one expensive (ntree=1000)
# random forest job. Use findExperiments to get the right job ids.
do.tests <- FALSE
if (do.tests) {
id1 <- findExperiments(reg, algo.pattern="tree")[1]
id2 <- findExperiments(reg, algo.pattern="forest",
algo.pars=(ntree == 1000))[1]
testJob(reg, id1)
testJob(reg, id2)
}
# Submit the jobs to the batch system
submitJobs(reg)
# Calculate the misclassification rate for all (already done) jobs.
reduce <- function(job, res) {
n <- sum(res)
list(mcr=(n-sum(diag(res)))/n)
}
res <- reduceResultsExperiments(reg, fun=reduce)
print(res)
# Aggregate results using 'ddply' from package 'plyr':
# Calculate the mean over all replications of identical experiments
# (same problem, same algorithm and same parameters)
library(plyr)
vars <- setdiff(names(res), c("repl", "mcr"))
aggr <- ddply(res, vars, summarise, mean.mcr=mean(mcr))
print(aggr)
### EXAMPLE 2 ###
# define two simple test functions
testfun1 = function(x) sum(x^2)
testfun2 = function(x) -exp(-sum(abs(x)))
# Define ExperimentRegistry:
reg = makeExperimentRegistry("example02", seed=123, file.dir=tempfile())
# Add the testfunctions to the registry:
addProblem(reg, "testfun1", static=testfun1)
addProblem(reg, "testfun2", static=testfun2)
# Use SimulatedAnnealing on the test functions:
addAlgorithm(reg, "sann", fun=function(static, dynamic) {
upp = rep(10, 2)
low = -upp
start = sample(c(-10, 10), 2)
res = optim(start, fn=static, lower=low, upper=upp, method="SANN")
res = res[c("par", "value", "counts", "convergence")]
res$start = start
return(res)
})
# add experiments and submit
addExperiments(reg, repls=10)
submitJobs(reg)
# Gather informations from the experiments, in this case function value and if the algorithm convergenced:
reduceResultsExperiments(reg, fun=function(job, res) res[c("value", "convergence")])Run the code above in your browser using DataLab