#### Step 1 --- Define your conditions under study and create design data.frame
# (use EXPLICIT names, avoid things like N <- 100. That's fine in functions, not here)
sample_sizes <- c(30, 60, 90, 120)
standard_deviation_ratios <- c(1, 4, 8)
group_size_ratios <- c(.5, 1, 2)
Design <- expand.grid(sample_size=sample_sizes,
group_size_ratio=group_size_ratios,
standard_deviation_ratio=standard_deviation_ratios)
dim(Design)
head(Design)
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions
# skeleton functions to be edited
SimDesign_functions()
# help(generate)
Generate <- function(condition, fixed_objects = NULL){
#require packages/define functions if needed, or better yet index with the :: operator
N <- condition$sample_size
grs <- condition$group_size_ratio
sd <- condition$standard_deviation_ratio
if(grs < 1){
N2 <- N / (1/grs + 1)
N1 <- N - N2
} else {
N1 <- N / (grs + 1)
N2 <- N - N1
}
group1 <- rnorm(N1)
group2 <- rnorm(N2, sd=sd)
dat <- data.frame(group = c(rep('g1', N1), rep('g2', N2)), DV = c(group1, group2))
return(dat)
}
# help(analyse)
Analyse <- function(condition, dat, fixed_objects = NULL, parameters = NULL){
# require packages/define functions if needed, or better yet index with the :: operator
require(stats)
mygreatfunction <- function(x) print('Do some stuff')
#wrap computational statistics in try() statements to control estimation problems
welch <- t.test(DV ~ group, dat)
ind <- t.test(DV ~ group, dat, var.equal=TRUE)
# In this function the p values for the t-tests are returned,
# and make sure to name each element, for future reference
ret <- c(welch = welch$p.value, independent = ind$p.value)
return(ret)
}
# help(summarise)
Summarise <- function(condition, results, fixed_objects = NULL, parameters_list = NULL){
#find results of interest here (e.g., alpha < .1, .05, .01)
lessthan.05 <- EDR(results, alpha = .05)
# return the results that will be appended to the design input
ret <- c(lessthan.05=lessthan.05)
return(ret)
}
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Collect results by looping over the rows in design
# test to see if it works and for debugging
Final <- runSimulation(design=Design, replications=5, parallel=FALSE,
generate=Generate, analyse=Analyse, summarise=Summarise)
# complete run with 1000 replications per condition
Final <- runSimulation(design=Design, replications=1000, parallel=TRUE,
generate=Generate, analyse=Analyse, summarise=Summarise)
head(Final)
View(Final)
## save results to a file upon completion (not run)
# runSimulation(design=Design, replications=1000, parallel=TRUE, filename = 'mysim',
# generate=Generate, analyse=Analyse, summarise=Summarise)
## Debug the generate function. See ?browser for help on debugging
## Type help to see available commands (e.g., n, c, where, ...),
## ls() to see what has been defined, and type Q to quit the debugger
runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse, summarise=Summarise,
parallel=TRUE, edit='generate')
## Alternatively, place a browser() within the desired function line to
## jump to a specific location
Summarise <- function(condition, results, parameters_list = NULL){
#find results of interest here (e.g., alpha < .1, .05, .01)
nms <- c('welch', 'independent')
lessthan.05 <- EDR(results[,nms], alpha = .05)
browser()
# return the results that will be appended to the design input
ret <- c(lessthan.05=lessthan.05)
return(ret)
}
runSimulation(design=Design, replications=1000,
generate=Generate, analyse=Analyse, summarise=Summarise,
parallel=TRUE)
## EXTRA: To run the simulation on a MPI cluster, use the following setup on each node (not run)
# library(doMPI)
# cl <- startMPIcluster()
# registerDoMPI(cl)
# Final <- runSimulation(design=Design, replications=1000, MPI=TRUE, save=TRUE,
# generate=Generate, analyse=Analyse, summarise=Summarise)
# saveRDS(Final, 'mysim.rds')
# closeCluster(cl)
# mpi.quit()
## Similarly, run simulation on a network linked via ssh
## (two way ssh key-paired connection must be possible between master and slave nodes)
##
## define IP addresses, including primary IP
# primary <- '192.168.2.20'
# IPs <- list(
# list(host=primary, user='phil', ncore=8),
# list(host='192.168.2.17', user='phil', ncore=8)
# )
# spec <- lapply(IPs, function(IP)
# rep(list(list(host=IP$host, user=IP$user)), IP$ncore))
# spec <- unlist(spec, recursive=FALSE)
#
# cl <- parallel::makeCluster(type='PSOCK', master=primary, spec=spec)
# Final <- runSimulation(design=Design, replications=1000, parallel = TRUE, save=TRUE,
# generate=Generate, analyse=Analyse, summarise=Summarise, cl=cl)
#~~~~~~~~~~~~~~~~~~~~~~~~
# Step 4 --- Post-analysis: Create a new R file for analyzing the Final data.frame with R based
# regression stuff, so use the lm() function to find main effects, interactions, plots, etc.
# This is where you get to be a data analyst!
psych::describe(Final)
psych::describeBy(Final, group = Final$standard_deviation_ratio)
# make into factors (if helpful)
Final$f_gsr <- with(Final, factor(group_size_ratio))
Final$f_sdr <- with(Final, factor(standard_deviation_ratio))
#lm analysis (might want to change DV to a logit for better stability)
mod <- lm(lessthan.05.welch ~ f_gsr * f_sdr, Final)
car::Anova(mod)
mod2 <- lm(lessthan.05.independent ~ f_gsr * f_sdr, Final)
car::Anova(mod2)
# make some plots
library(ggplot2)
library(reshape2)
welch_ind <- Final[,c('group_size_ratio', "standard_deviation_ratio",
"lessthan.05.welch", "lessthan.05.independent")]
dd <- melt(welch_ind, id.vars = names(welch_ind)[1:2])
ggplot(dd, aes(factor(group_size_ratio), value)) +
geom_abline(intercept=0.05, slope=0, col = 'red') +
geom_abline(intercept=0.075, slope=0, col = 'red', linetype='dotted') +
geom_abline(intercept=0.025, slope=0, col = 'red', linetype='dotted') +
geom_boxplot() + facet_wrap(~variable)
ggplot(dd, aes(factor(group_size_ratio), value, fill = factor(standard_deviation_ratio))) +
geom_abline(intercept=0.05, slope=0, col = 'red') +
geom_abline(intercept=0.075, slope=0, col = 'red', linetype='dotted') +
geom_abline(intercept=0.025, slope=0, col = 'red', linetype='dotted') +
geom_boxplot() + facet_grid(variable~standard_deviation_ratio) +
theme(legend.position = 'none')
Run the code above in your browser using DataLab