if (FALSE) {
minobs <- 5
maxobs <- 25
# These two values are not mandatory
meanobs <- 15
medianobs <- 16
n <- 10
# To estimate only the sd of the distribution; mean is fixed
# Note that there is an obligation to have mean even if it
# is in fixed.parameters
# By defaut a normal distribution is fitted
out_sd <- from_min_max(n=n ,
observed.Minimum=minobs ,
observed.Maximum=maxobs ,
fitted.parameters=c(sd=5) ,
fixed.parameters=c(mean=meanobs) ,
n.iter=10000 ,
trace=TRUE )
plot(out_sd, what="MarkovChain", parameters="sd")
plot(out_sd, what="posterior", parameters="sd")
as.parameters(out_sd, index="quantile")
# To estimate both the sd and mean of the distribution
out_sd_mean_norm <- from_min_max(n=n,
observed.Minimum=minobs ,
observed.Maximum=maxobs ,
fitted.parameters=c(mean=15, sd=5) ,
fixed.parameters=NULL ,
n.iter=10000 ,
D="norm**" ,
trace=FALSE )
plot(out_sd_mean_norm, what="MarkovChain", parameters="sd", ylim=c(0, 10))
plot(out_sd_mean_norm, what="MarkovChain", parameters="mean", ylim=c(10, 20))
plot(out_sd_mean_norm, what="posterior", parameters="mean",
breaks=seq(from=0, to=100, by=1), xlim=c(10, 20))
as.parameters(out_sd_mean_norm, index="quantile")
# Let see what's happened for a lognormal distribution
out_sd_mean_lnorm <- from_min_max(n=n,
observed.Minimum=minobs ,
observed.Maximum=maxobs ,
fitted.parameters=c(meanlog=15, sdlog=5) ,
fixed.parameters=NULL ,
n.iter=10000 ,
D="lnorm**" ,
trace=FALSE )
plot(out_sd_mean_lnorm, what="MarkovChain", parameters="sdlog", ylim=c(0, 10))
plot(out_sd_mean_lnorm, what="MarkovChain", parameters="meanlog", ylim=c(10, 20))
plot(out_sd_mean_lnorm, what="posterior", parameters="meanlog", xlim=c(0, 20),
breaks=seq(from=0, to=100, by=1))
as.parameters(out_sd_mean_lnorm, index="quantile")
# To be compared with the rule of thumb:
print(paste0("mean = ", as.character((maxobs + minobs) / 2))) # Mean Not so bad
print(paste0("sd = ", as.character((maxobs - minobs) / 4))) # SD Clearly biased
# Covariation of sd and mean is nearly NULL
cor(x=as.parameters(out_sd_mean_norm, index="all")[, "mean"],
y=as.parameters(out_sd_mean_norm, index="all")[, "sd"])^2
plot(x=as.parameters(out_sd_mean_norm, index="all")[, "mean"],
y=as.parameters(out_sd_mean_norm, index="all")[, "sd"],
xlab="mean", ylab="sd")
# Example when minimum, maximum and mean are known
out_sd_mean2 <- from_min_max(n=n ,
observed.Minimum=minobs ,
observed.Maximum=maxobs ,
observed.Mean=meanobs ,
fitted.parameters=c(mean=15, sd=5) ,
fixed.parameters=NULL ,
n.iter=10000 ,
trace=FALSE )
# Example when minimum, maximum, mean and median are known
out_sd_mean3 <- from_min_max(n=n ,
observed.Minimum=minobs ,
observed.Maximum=maxobs ,
observed.Mean=meanobs ,
observed.Median=medianobs ,
fitted.parameters=c(mean=15, sd=5) ,
fixed.parameters=NULL ,
n.iter=10000 ,
trace=FALSE )
plot(out_sd_mean2, what="MarkovChain", parameters="sd")
plot(out_sd_mean2, what="MarkovChain", parameters="mean")
plot(out_sd_mean2, what="posterior", parameters="mean", xlim=c(0, 100),
breaks=seq(from=0, to=100, by=5))
as.parameters(out_sd_mean2, index="quantile")
# Example of GEV density function
# Parametrisation from https://en.wikipedia.org/wiki/Generalized_extreme_value_distribution
dGEV <- getFromNamespace(".dGEV", ns="HelpersMG")
x <- seq(from=-4, to=4, by=0.1)
plot(x, y=dGEV(x=x,
location=0, scale=1, shape=-1/2, log=FALSE, sum=FALSE),
type="l", col="green", xlab="x", ylab="Density")
lines(x, y=dGEV(x=x,
location=0, scale=1, shape=0, log=FALSE, sum=FALSE), col="red")
lines(x, y=dGEV(x=x,
location=0, scale=1, shape=1/2, log=FALSE, sum=FALSE), col="blue")
legend("topleft", legend=c("shape=-1/2", "shape=0", "shape=1/2"),
lty=1, col=c("green", "red", "blue"))
# Note the different parametrisation about shape
dGEV <- getFromNamespace("dgevd", ns="EnvStats")
x <- seq(from=-4, to=4, by=0.1)
plot(x, y=dGEV(x=x,
location=0, scale=1, shape=-1/2),
type="l", col="green", xlab="x", ylab="Density")
lines(x, y=dGEV(x=x,
location=0, scale=1, shape=0), col="red")
lines(x, y=dGEV(x=x,
location=0, scale=1, shape=1/2), col="blue")
legend("topleft", legend=c("shape=-1/2", "shape=0", "shape=1/2"),
lty=1, col=c("green", "red", "blue"))
# Compute dn using the approximation from Wan et al. (2014)
get_dn <- function(n) {
if (n < 2) {
stop("Sample size n must be at least 2.")
}
qnorm((n - 0.375) / (n + 0.25)) * 2
}
# Estimate standard deviation from min and max
estimate_sd_from_range <- function(min_val, max_val, n) {
dn <- get_dn(n)
range <- max_val - min_val
sd_estimate <- range / dn
return(sd_estimate)
}
# Example usage:
n <- 10
min_val <- 5
max_val <- 25
dn_value <- get_dn(n)
sd_estimate <- estimate_sd_from_range(min_val, max_val, n)
cat("dn =", dn_value, "\n")
cat("Estimated SD =", sd_estimate, "\n")
# To generate data from publication
library(parallel)
library(embryogrowth)
# Values for the prior of SD
outSD <- subset(DatabaseTSD, subset = (((!is.na(IP.SD)) |
(!is.na(IP.SE))) & (!is.na(Hatched))),
select=c("Hatched", "IP.SE", "IP.SD", "IP.mean"))
outSD$IP.SD <- ifelse(is.na(outSD$IP.SD), outSD$IP.SE*sqrt(outSD$Hatched), outSD$IP.SD)
# Model estimation
Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) &
((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) &
(is.na(IP.mean))), select=c("Species", "Incubation.temperature.set",
"Hatched", "IP.min", "IP.max",
"Reference"))
out <- universalmclapply(X=1:nrow(Example), FUN=function(i) {
n <- Example[i, "Hatched"]
priors <- structure(list(Density = c("dunif", "dlnorm"),
Prior1 = c(30, log(mean(outSD$IP.SD))),
Prior2 = c(120, log(sd(outSD$IP.SD))),
SDProp = c(1, 1),
Min = c(30, 0.1),
Max = c(120, 6),
Init = c((Example[i, "IP.min"]+ Example[i, "IP.max"])/2, log(2))),
row.names = c("mean", "sd"),
class = c("PriorsmcmcComposite", "data.frame"))
out_sd_mean_mcmc <- from_min_max(n=n, observed.Minimum=Example[i, "IP.min"],
observed.Maximum=Example[i, "IP.max"],
fitted.parameters=c(mean=(Example[i, "IP.min"]+
Example[i, "IP.max"])/2,
sd=log(2)),
priors = priors,
D="norm**",
n.iter = 10000, n.adapt=15000, thin=30,
trace=100, adaptive = TRUE)
# plot(out_sd_mean_mcmc, what = "MarkovChain", parameters = "sd")
assign(paste0("out_sd_mean_mcmc_", as.character(i)), out_sd_mean_mcmc)
save(list = paste0("out_sd_mean_mcmc_", as.character(i)),
file = file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
# rm(list=paste0("out_sd_mean_mcmc_", as.character(i)))
}, progressbar = TRUE)
# Generate table with all results
Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) &
((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) &
(is.na(IP.mean))), select=c("Species", "Incubation.temperature.set",
"Hatched", "IP.min", "IP.max",
"Reference"))
Example <- cbind(Example, dn=NA)
Example <- cbind(Example, "SD(Hozo 2005)"=NA)
Example <- cbind(Example, "SD(Wan 2014)"=NA)
Example <- cbind(Example, "mean(Wan 2014)"=NA)
Example <- cbind(Example, "median(SD)"=NA)
Example <- cbind(Example, "2.5%(SD)"=NA)
Example <- cbind(Example, "97.5%(SD)"=NA)
Example <- cbind(Example, "25%(SD)"=NA)
Example <- cbind(Example, "75%(SD)"=NA)
Example <- cbind(Example, "median(mean)"=NA)
Example <- cbind(Example, "2.5%(mean)"=NA)
Example <- cbind(Example, "97.5%(mean)"=NA)
Example <- cbind(Example, "25%(mean)"=NA)
Example <- cbind(Example, "75%(mean)"=NA)
Example <- cbind(Example, "z(mean)"=NA)
Example <- cbind(Example, "z(SD)"=NA)
library(coda)
for (i in 1:nrow(Example)) {
n <- Example[i, "Hatched"]
if (n<= 15) {
Example[i, "SD(Hozo 2005)"] <- (1/sqrt(12))*sqrt(((Example[i, "IP.max"] -
Example[i, "IP.min"]))^2+((Example[i, "IP.max"] - Example[i, "IP.min"]))^2/4)
} else {
if (n<=70) {
Example[i, "SD(Hozo 2005)"] <- (Example[i, "IP.max"] - Example[i, "IP.min"])/4
} else {
Example[i, "SD(Hozo 2005)"] <- (Example[i, "IP.max"] - Example[i, "IP.min"])/6
}
}
Example[i, "dn"] <- get_dn(n)
Example[i, "SD(Wan 2014)"] <- estimate_sd_from_range(Example[i, "IP.min"],
Example[i, "IP.max"], n)
Example[i, "mean(Wan 2014)"] <- (Example[i, "IP.min"]+ Example[i, "IP.max"])/2
load(file = file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
out_sd_mean_mcmc <- get(paste0("out_sd_mean_mcmc_", as.character(i)))
k <- as.parameters(out_sd_mean_mcmc, index="quantile", probs=c(0.025, 0.25, 0.5, 0.75, 0.975))
outgk <- geweke.diag(as.mcmc(out_sd_mean_mcmc))
rm(out_sd_mean_mcmc)
rm(list=paste0("out_sd_mean_mcmc_", as.character(i)))
Example[i, "median(SD)"] <- k["50%", "sd"]
Example[i, "2.5%(SD)"] <- k["2.5%", "sd"]
Example[i, "97.5%(SD)"] <- k["97.5%", "sd"]
Example[i, "25%(SD)"] <- k["25%", "sd"]
Example[i, "75%(SD)"] <- k["75%", "sd"]
Example[i, "median(mean)"] <- k["50%", "mean"]
Example[i, "2.5%(mean)"] <- k["2.5%", "mean"]
Example[i, "97.5%(mean)"] <- k["97.5%", "mean"]
Example[i, "25%(mean)"] <- k["25%", "mean"]
Example[i, "75%(mean)"] <- k["75%", "mean"]
Example[i, "z(mean)"] <- outgk$`1`$z["mean"]
Example[i, "z(SD)"] <- outgk$`1`$z["sd"]
}
rownames(Example) <- as.character(1:nrow(Example))
# Figure 1
layout(mat = matrix(1:4, nrow=2))
par(mar=c(4, 4, 0, 0))
plot(out_sd_mean_mcmc_11, what = "MarkovChain", parameters = "mean", ylim=c(70, 76))
text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y,
labels = "A: mean", cex=1.5, pos=4)
plot(out_sd_mean_mcmc_8, what = "MarkovChain", parameters = "mean", ylim=c(50, 52))
text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y,
labels = "C: mean", cex=1.5, pos=4)
plot(out_sd_mean_mcmc_11, what = "MarkovChain", parameters = "sd")
text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y,
labels = "B: sd", cex=1.5, pos=4)
plot(out_sd_mean_mcmc_8, what = "MarkovChain", parameters = "sd")
text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y,
labels = "D: sd", cex=1.5, pos=4)
# Figure 2
dtafigure2 <- matrix(NA, nrow=nrow(as.parameters(out_sd_mean_mcmc, index = "all")),
ncol=nrow(Example))
for (i in 1:nrow(Example)) {
# i <- 1
load(file=file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
out_sd_mean_mcmc <- get(paste0("out_sd_mean_mcmc_", as.character(i)))
PPM <- rnorm(nrow(as.parameters(out_sd_mean_mcmc, index = "all")),
mean = as.parameters(out_sd_mean_mcmc, index = "all")[, "mean"],
sd=as.parameters(out_sd_mean_mcmc, index = "all")[, "sd"])
dtafigure2[, i] <- PPM
}
layout(mat = 1)
par(mar=c(3, 4, 0, 0))
boxplot(dtafigure2, outline=FALSE, las=1, bty="n", xaxt="n", frame=FALSE, ylim=c(40, 90),
col=sapply(as.character(Example$Species),
FUN=function(i) switch(i, "Caretta caretta"="white",
"Chelonia mydas"="green",
"Dermochelys coriacea"="lightblue")),
ylab="Incubation duration in days")
axis(1, at=1:30, labels = rep(NA, 30))
Cc <- sum(as.character(Example$Species) == "Caretta caretta")
CcCm <- sum(as.character(Example$Species) == "Caretta caretta" |
as.character(Example$Species) == "Chelonia mydas")
segments(x0= Cc + 0.5,
x1= Cc + 0.5,
y0=40, y1=90, lty = 2)
segments(x0= CcCm + 0.5,
x1= CcCm + 0.5,
y0=40, y1=90, lty = 2)
par(xpd=TRUE)
text(x=Cc/2, y=33, labels = expression(italic("Caretta caretta")), cex=0.9)
text(x=Cc+(CcCm-Cc)/2, y=32, labels = expression(italic("Chelonia\n mydas")), cex=0.9)
text(x=CcCm+(30-CcCm)/2, y=32, labels = expression(italic("Dermochelys\n coriacea")), cex=0.9)
# With Lognormal
# To generate data from publication
library(parallel)
library(embryogrowth)
# Values for the prior of SD
outSD <- subset(DatabaseTSD, subset = (((!is.na(IP.SD)) |
(!is.na(IP.SE))) & (!is.na(Hatched))),
select=c("Hatched", "IP.SE", "IP.SD", "IP.mean"))
outSD$IP.SD <- ifelse(is.na(outSD$IP.SD), outSD$IP.SE*sqrt(outSD$Hatched), outSD$IP.SD)
# Model estimation
Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) &
((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) &
(is.na(IP.mean))), select=c("Species", "Incubation.temperature.set",
"Hatched", "IP.min", "IP.max",
"Reference"))
out <- universalmclapply(X=1:nrow(Example), FUN=function(i) {
n <- Example[i, "Hatched"]
priors <- structure(list(Density = c("dunif", "dlnorm"),
Prior1 = c(30, log(mean(outSD$IP.SD))),
Prior2 = c(120, log(sd(outSD$IP.SD))),
SDProp = c(1, 1),
Min = c(30, 0.1),
Max = c(120, 6),
Init = c((Example[i, "IP.min"]+ Example[i, "IP.max"])/2, log(2))),
row.names = c("meanlog", "sdlog"),
class = c("PriorsmcmcComposite", "data.frame"))
out_sd_mean_mcmc_LN <- from_min_max(n=n, observed.Minimum=Example[i, "IP.min"],
observed.Maximum=Example[i, "IP.max"],
fitted.parameters=c(mean=(Example[i, "IP.min"]+
Example[i, "IP.max"])/2,
sd=log(2)),
priors = priors,
D="lnorm**",
n.iter = 10000, n.adapt=15000, thin=30,
trace=100, adaptive = TRUE)
# plot(out_sd_mean_mcmc, what = "MarkovChain", parameters = "sd")
assign(paste0("out_sd_mean_mcmc_LN_", as.character(i)), out_sd_mean_mcmc_LN)
save(list = paste0("out_sd_mean_mcmc_LN_", as.character(i)),
file = file.path("dataOut", paste0("out_sd_mean_mcmc_LN_", as.character(i), ".Rdata")))
# rm(list=paste0("out_sd_mean_mcmc_LN_", as.character(i)))
}, progressbar = TRUE)
}
Run the code above in your browser using DataLab