# skeleton functions to be saved and edited
SimFunctions()
#### Step 1 --- Define your conditions under study and create design data.frame
# (use EXPLICIT names, avoid things like N <- 100. That's fine in functions, not here)
Design <- expand.grid(sample_size = c(30, 60, 90, 120),
group_size_ratio = c(1, 4, 8),
standard_deviation_ratio = c(.5, 1, 2))
dim(Design)
head(Design)
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions
# help(Generate)
Generate <- function(condition, fixed_objects = NULL){
N <- condition$sample_size # alternatively, could use Attach() to make objects available
grs <- condition$group_size_ratio
sd <- condition$standard_deviation_ratio
if(grs < 1){
N2 <- N / (1/grs + 1)
N1 <- N - N2
} else {
N1 <- N / (grs + 1)
N2 <- N - N1
}
group1 <- rnorm(N1)
group2 <- rnorm(N2, sd=sd)
dat <- data.frame(group = c(rep('g1', N1), rep('g2', N2)), DV = c(group1, group2))
dat
}
# help(Analyse)
Analyse <- function(condition, dat, fixed_objects = NULL){
welch <- t.test(DV ~ group, dat)
ind <- t.test(DV ~ group, dat, var.equal=TRUE)
# In this function the p values for the t-tests are returned,
# and make sure to name each element, for future reference
ret <- c(welch = welch$p.value, independent = ind$p.value)
ret
}
# help(Summarise)
Summarise <- function(condition, results, fixed_objects = NULL){
#find results of interest here (e.g., alpha < .1, .05, .01)
ret <- EDR(results, alpha = .05)
ret
}
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Collect results by looping over the rows in design
# test to see if it works and for debugging
Final <- runSimulation(design=Design, replications=5,
generate=Generate, analyse=Analyse, summarise=Summarise)
head(Final)
# didactic demonstration when summarise function is not supplied (returns list of matrices)
Final2 <- runSimulation(design=Design, replications=5,
generate=Generate, analyse=Analyse)
print(Final2[1:3])
# or a single condition
Final3 <- runSimulation(design=Design[1, ], replications=5,
generate=Generate, analyse=Analyse)
Final3
## Not run:
# # complete run with 1000 replications per condition
# Final <- runSimulation(design=Design, replications=1000, parallel=TRUE,
# generate=Generate, analyse=Analyse, summarise=Summarise)
# head(Final, digits = 3)
# View(Final)
#
# ## save results to a file upon completion (not run)
# # runSimulation(design=Design, replications=1000, parallel=TRUE, save=TRUE, filename = 'mysim',
# # generate=Generate, analyse=Analyse, summarise=Summarise)
#
#
#
# ## Debug the generate function. See ?browser for help on debugging
# ## Type help to see available commands (e.g., n, c, where, ...),
# ## ls() to see what has been defined, and type Q to quit the debugger
# runSimulation(design=Design, replications=1000,
# generate=Generate, analyse=Analyse, summarise=Summarise,
# parallel=TRUE, edit='generate')
#
# ## Alternatively, place a browser() within the desired function line to
# ## jump to a specific location
# Summarise <- function(condition, results, fixed_objects = NULL){
# #find results of interest here (e.g., alpha < .1, .05, .01)
# ret <- EDR(results[,nms], alpha = .05)
# browser()
# ret
# }
#
# runSimulation(design=Design, replications=1000,
# generate=Generate, analyse=Analyse, summarise=Summarise,
# parallel=TRUE)
#
#
#
#
# ## EXTRA: To run the simulation on a MPI cluster, use the following setup on each node (not run)
# # library(doMPI)
# # cl <- startMPIcluster()
# # registerDoMPI(cl)
# # Final <- runSimulation(design=Design, replications=1000, MPI=TRUE, save=TRUE,
# # generate=Generate, analyse=Analyse, summarise=Summarise)
# # saveRDS(Final, 'mysim.rds')
# # closeCluster(cl)
# # mpi.quit()
#
#
# ## Similarly, run simulation on a network linked via ssh
# ## (two way ssh key-paired connection must be possible between master and slave nodes)
# ##
# ## define IP addresses, including primary IP
# # primary <- '192.168.2.20'
# # IPs <- list(
# # list(host=primary, user='phil', ncore=8),
# # list(host='192.168.2.17', user='phil', ncore=8)
# # )
# # spec <- lapply(IPs, function(IP)
# # rep(list(list(host=IP$host, user=IP$user)), IP$ncore))
# # spec <- unlist(spec, recursive=FALSE)
# #
# # cl <- parallel::makeCluster(type='PSOCK', master=primary, spec=spec)
# # Final <- runSimulation(design=Design, replications=1000, parallel = TRUE, save=TRUE,
# # generate=Generate, analyse=Analyse, summarise=Summarise, cl=cl)
#
# #~~~~~~~~~~~~~~~~~~~~~~~~
# # Step 4 --- Post-analysis: Create a new R file for analyzing the Final data.frame with R based
# # regression stuff, so use the lm() function to find main effects, interactions, plots, etc.
# # This is where you get to be a data analyst!
#
# library(dplyr)
# Final2 <- tbl_df(Final)
# Final2 %>% summarise(mean(welch), mean(independent))
# Final2 %>% group_by(standard_deviation_ratio, group_size_ratio) %>%
# summarise(mean(welch), mean(independent))
#
# # quick ANOVA analysis method with all two-way interactions
# SimAnova( ~ (sample_size + group_size_ratio + standard_deviation_ratio)^2, Final)
#
# # or more specific anovas
# SimAnova(independent ~ (group_size_ratio + standard_deviation_ratio)^2,
# Final)
#
# # make some plots
# library(ggplot2)
# library(reshape2)
# welch_ind <- Final[,c('group_size_ratio', "standard_deviation_ratio",
# "welch", "independent")]
# dd <- melt(welch_ind, id.vars = names(welch_ind)[1:2])
#
# ggplot(dd, aes(factor(group_size_ratio), value)) +
# geom_abline(intercept=0.05, slope=0, col = 'red') +
# geom_abline(intercept=0.075, slope=0, col = 'red', linetype='dotted') +
# geom_abline(intercept=0.025, slope=0, col = 'red', linetype='dotted') +
# geom_boxplot() + facet_wrap(~variable)
#
# ggplot(dd, aes(factor(group_size_ratio), value, fill = factor(standard_deviation_ratio))) +
# geom_abline(intercept=0.05, slope=0, col = 'red') +
# geom_abline(intercept=0.075, slope=0, col = 'red', linetype='dotted') +
# geom_abline(intercept=0.025, slope=0, col = 'red', linetype='dotted') +
# geom_boxplot() + facet_grid(variable~standard_deviation_ratio) +
# theme(legend.position = 'none')
#
# ## End(Not run)
Run the code above in your browser using DataLab