from_min_max: Distribution from minimum and maximum

Description

Bayesian estimate of distribution when minimum, maximum, median or mean are known.
If D="norm**" or "lnorm**", it will use the approximation of Gumbel based on D.
If D="norm*" or "lnorm*", it will generate D distribution using replicates number of random numbers, and estimate Gumbel parameters from the simulated D distribution.
Otherwise it will estimate parameters of Gumbel distribution based on maximum likelihood.
For D="pois*", "beta*" or "chisq*" only second and third solutions are available.

Usage

from_min_max(
  n = stop("n must be known."),
  fitted.parameters = stop("At least one parameter must be supplied."),
  observed.Minimum,
  observed.Maximum,
  observed.Median = NULL,
  observed.Mean = NULL,
  priors = "dnorm",
  fixed.parameters = NULL,
  D = "norm**",
  n.iter = 10000,
  n.chains = 1,
  n.adapt = 100,
  thin = 30,
  adaptive = FALSE,
  trace = 100,
  replicates = 10000,
  silent = FALSE
)

Value

from_min_max returns a list with output, ML, Bayesian results

Arguments

n: Number of observations
fitted.parameters: The initial value to fit
observed.Minimum: The observed minimum
observed.Maximum: The observed maximum
observed.Median: The observed median (can be omitted)
observed.Mean: The observed mean (can be omitted)
priors: The priors (see MHAlgogen()) or character "dnorm" or "dunif"
fixed.parameters: The fixed parameters
D: The distribution to fit as character. ex "norm" or "lnorm"
n.iter: Number of iterations for each chain
n.chains: Number of chains
n.adapt: Number of iteration to stabilize likelihood
thin: Interval for thinning likelihoods
adaptive: Should an adaptive process for SDProp be used
trace: Or FALSE or period to show progress
replicates: Number of replicates to model D
silent: If TRUE, do not print information

Author

Marc Girondot marc.girondot@gmail.com

Details

from_min_max returns standard deviation and/or mean from minimum and maximum

Examples

Run this code

if (FALSE) {
minobs <- 5
maxobs <- 25
# These two values are not mandatory
meanobs <- 15
medianobs <- 16
n <- 10
# To estimate only the sd of the distribution; mean is fixed
# Note that there is an obligation to have mean even if it
# is in fixed.parameters
# By defaut a normal distribution is fitted

out_sd <- from_min_max(n=n                                , 
                       observed.Minimum=minobs                    , 
                       observed.Maximum=maxobs                    , 
                       fitted.parameters=c(sd=5)                  , 
                       fixed.parameters=c(mean=meanobs)           ,
                       n.iter=10000                               ,
                       trace=TRUE                                 )
                            
plot(out_sd, what="MarkovChain", parameters="sd")
plot(out_sd, what="posterior", parameters="sd")
as.parameters(out_sd, index="quantile")

# To estimate both the sd and mean of the distribution

out_sd_mean_norm <- from_min_max(n=n, 
                            observed.Minimum=minobs                    , 
                            observed.Maximum=maxobs                    , 
                            fitted.parameters=c(mean=15, sd=5)         , 
                            fixed.parameters=NULL                      ,
                            n.iter=10000                               ,
                            D="norm**"                                 ,
                            trace=FALSE                                )
                            
plot(out_sd_mean_norm, what="MarkovChain", parameters="sd", ylim=c(0, 10))
plot(out_sd_mean_norm, what="MarkovChain", parameters="mean", ylim=c(10, 20))
plot(out_sd_mean_norm, what="posterior", parameters="mean",  
     breaks=seq(from=0, to=100, by=1), xlim=c(10, 20))
as.parameters(out_sd_mean_norm, index="quantile")

# Let see what's happened for a lognormal distribution

out_sd_mean_lnorm <- from_min_max(n=n, 
                            observed.Minimum=minobs                    , 
                            observed.Maximum=maxobs                    , 
                            fitted.parameters=c(meanlog=15, sdlog=5)   , 
                            fixed.parameters=NULL                      ,
                            n.iter=10000                               ,
                            D="lnorm**"                                ,
                            trace=FALSE                                )
                            
plot(out_sd_mean_lnorm, what="MarkovChain", parameters="sdlog", ylim=c(0, 10))
plot(out_sd_mean_lnorm, what="MarkovChain", parameters="meanlog", ylim=c(10, 20))
plot(out_sd_mean_lnorm, what="posterior", parameters="meanlog", xlim=c(0, 20), 
     breaks=seq(from=0, to=100, by=1))
as.parameters(out_sd_mean_lnorm, index="quantile")

# To be compared with the rule of thumb:
print(paste0("mean = ", as.character((maxobs + minobs) / 2))) # Mean Not so bad
print(paste0("sd = ", as.character((maxobs - minobs) / 4))) # SD Clearly biased

# Covariation of sd and mean is nearly NULL
cor(x=as.parameters(out_sd_mean_norm, index="all")[, "mean"], 
    y=as.parameters(out_sd_mean_norm, index="all")[, "sd"])^2
plot(x=as.parameters(out_sd_mean_norm, index="all")[, "mean"], 
     y=as.parameters(out_sd_mean_norm, index="all")[, "sd"], 
     xlab="mean", ylab="sd")
     
# Example when minimum, maximum and mean are known

out_sd_mean2 <- from_min_max(n=n                                   , 
                             observed.Minimum=minobs               , 
                             observed.Maximum=maxobs               , 
                             observed.Mean=meanobs                 ,
                             fitted.parameters=c(mean=15, sd=5)    , 
                             fixed.parameters=NULL                 ,
                             n.iter=10000                          ,
                             trace=FALSE                            )
                            
# Example when minimum, maximum, mean and median are known

out_sd_mean3 <- from_min_max(n=n                                   , 
                             observed.Minimum=minobs               , 
                             observed.Maximum=maxobs               , 
                             observed.Mean=meanobs                 ,
                             observed.Median=medianobs             ,
                             fitted.parameters=c(mean=15, sd=5)    , 
                             fixed.parameters=NULL                 ,
                             n.iter=10000                          ,
                             trace=FALSE                            )
                             
plot(out_sd_mean2, what="MarkovChain", parameters="sd")
plot(out_sd_mean2, what="MarkovChain", parameters="mean")
plot(out_sd_mean2, what="posterior", parameters="mean", xlim=c(0, 100), 
     breaks=seq(from=0, to=100, by=5))
as.parameters(out_sd_mean2, index="quantile")

# Example of GEV density function  
# Parametrisation from https://en.wikipedia.org/wiki/Generalized_extreme_value_distribution
dGEV <- getFromNamespace(".dGEV", ns="HelpersMG")
x <- seq(from=-4, to=4, by=0.1)
plot(x, y=dGEV(x=x, 
               location=0, scale=1, shape=-1/2, log=FALSE, sum=FALSE), 
     type="l", col="green", xlab="x", ylab="Density")
lines(x, y=dGEV(x=x, 
                location=0, scale=1, shape=0, log=FALSE, sum=FALSE), col="red")
lines(x, y=dGEV(x=x, 
                location=0, scale=1, shape=1/2, log=FALSE, sum=FALSE), col="blue")
legend("topleft", legend=c("shape=-1/2", "shape=0", "shape=1/2"), 
       lty=1, col=c("green", "red", "blue"))
       
# Note the different parametrisation about shape
dGEV <- getFromNamespace("dgevd", ns="EnvStats")
x <- seq(from=-4, to=4, by=0.1)
plot(x, y=dGEV(x=x, 
               location=0, scale=1, shape=-1/2), 
     type="l", col="green", xlab="x", ylab="Density")
lines(x, y=dGEV(x=x, 
                location=0, scale=1, shape=0), col="red")
lines(x, y=dGEV(x=x, 
                location=0, scale=1, shape=1/2), col="blue")
legend("topleft", legend=c("shape=-1/2", "shape=0", "shape=1/2"), 
       lty=1, col=c("green", "red", "blue"))

# Compute dn using the approximation from Wan et al. (2014)
   get_dn <- function(n) {
     if (n < 2) {
       stop("Sample size n must be at least 2.")
     }
     qnorm((n - 0.375) / (n + 0.25)) * 2
   }
   
   # Estimate standard deviation from min and max
   estimate_sd_from_range <- function(min_val, max_val, n) {
     dn <- get_dn(n)
     range <- max_val - min_val
     sd_estimate <- range / dn
     return(sd_estimate)
   }
   
   # Example usage:
   n <- 10
   min_val <- 5
   max_val <- 25
   
   dn_value <- get_dn(n)
   sd_estimate <- estimate_sd_from_range(min_val, max_val, n)
   
   cat("dn =", dn_value, "\n")
   cat("Estimated SD =", sd_estimate, "\n")
   
   # To generate data from publication
   
   library(parallel)
   library(embryogrowth)
   
   # Values for the prior of SD
   outSD <- subset(DatabaseTSD, subset = (((!is.na(IP.SD)) | 
                   (!is.na(IP.SE))) & (!is.na(Hatched))), 
                   select=c("Hatched", "IP.SE", "IP.SD", "IP.mean"))
   outSD$IP.SD <- ifelse(is.na(outSD$IP.SD), outSD$IP.SE*sqrt(outSD$Hatched), outSD$IP.SD)
   
   # Model estimation
   
   Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) & 
                       ((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) & 
                          (is.na(IP.mean))), select=c("Species", "Incubation.temperature.set", 
                                                      "Hatched", "IP.min", "IP.max", 
                                                      "Reference"))
   
   out <- universalmclapply(X=1:nrow(Example), FUN=function(i) {
     n <- Example[i, "Hatched"]
     
     priors <- structure(list(Density = c("dunif", "dlnorm"), 
                              Prior1 = c(30, log(mean(outSD$IP.SD))), 
                              Prior2 = c(120, log(sd(outSD$IP.SD))), 
                              SDProp = c(1, 1), 
                              Min = c(30, 0.1), 
                              Max = c(120, 6), 
                              Init = c((Example[i, "IP.min"]+ Example[i, "IP.max"])/2, log(2))), 
                         row.names = c("mean", "sd"), 
                         class = c("PriorsmcmcComposite", "data.frame"))
     
     
     out_sd_mean_mcmc <- from_min_max(n=n, observed.Minimum=Example[i, "IP.min"], 
                                      observed.Maximum=Example[i, "IP.max"], 
                                      fitted.parameters=c(mean=(Example[i, "IP.min"]+ 
                                                             Example[i, "IP.max"])/2, 
                                                          sd=log(2)), 
                                      priors = priors, 
                                      D="norm**", 
                                      n.iter = 10000, n.adapt=15000, thin=30, 
                                      trace=100, adaptive = TRUE)
     
     # plot(out_sd_mean_mcmc, what = "MarkovChain", parameters = "sd")
     
     assign(paste0("out_sd_mean_mcmc_", as.character(i)), out_sd_mean_mcmc)
     save(list = paste0("out_sd_mean_mcmc_", as.character(i)), 
          file = file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
     # rm(list=paste0("out_sd_mean_mcmc_", as.character(i)))
   }, progressbar = TRUE)
   
   # Generate table with all results
   
   Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) & 
   ((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) & 
     (is.na(IP.mean))), select=c("Species", "Incubation.temperature.set", 
                                 "Hatched", "IP.min", "IP.max", 
                                 "Reference"))
   
   Example <- cbind(Example, dn=NA)
   Example <- cbind(Example, "SD(Hozo 2005)"=NA)
   Example <- cbind(Example, "SD(Wan 2014)"=NA)
   Example <- cbind(Example, "mean(Wan 2014)"=NA)
   Example <- cbind(Example, "median(SD)"=NA)
   Example <- cbind(Example, "2.5%(SD)"=NA)
   Example <- cbind(Example, "97.5%(SD)"=NA)
   Example <- cbind(Example, "25%(SD)"=NA)
   Example <- cbind(Example, "75%(SD)"=NA)
   Example <- cbind(Example, "median(mean)"=NA)
   Example <- cbind(Example, "2.5%(mean)"=NA)
   Example <- cbind(Example, "97.5%(mean)"=NA)
   Example <- cbind(Example, "25%(mean)"=NA)
   Example <- cbind(Example, "75%(mean)"=NA)
   Example <- cbind(Example, "z(mean)"=NA)
   Example <- cbind(Example, "z(SD)"=NA)
   
   
   library(coda)
   
   for (i in 1:nrow(Example)) {
     
     n <- Example[i, "Hatched"]
     if (n<= 15) {
       Example[i, "SD(Hozo 2005)"] <- (1/sqrt(12))*sqrt(((Example[i, "IP.max"] - 
        Example[i, "IP.min"]))^2+((Example[i, "IP.max"] - Example[i, "IP.min"]))^2/4)
     } else {
       if (n<=70) {
         Example[i, "SD(Hozo 2005)"] <- (Example[i, "IP.max"] - Example[i, "IP.min"])/4
       } else {
         Example[i, "SD(Hozo 2005)"] <- (Example[i, "IP.max"] - Example[i, "IP.min"])/6
       }
     }
     
     Example[i, "dn"] <- get_dn(n)
     Example[i, "SD(Wan 2014)"] <- estimate_sd_from_range(Example[i, "IP.min"], 
            Example[i, "IP.max"], n)
     Example[i, "mean(Wan 2014)"] <- (Example[i, "IP.min"]+ Example[i, "IP.max"])/2
     load(file = file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
     out_sd_mean_mcmc <- get(paste0("out_sd_mean_mcmc_", as.character(i)))
     k <- as.parameters(out_sd_mean_mcmc, index="quantile", probs=c(0.025, 0.25, 0.5, 0.75, 0.975))
     outgk <- geweke.diag(as.mcmc(out_sd_mean_mcmc))
     rm(out_sd_mean_mcmc)
     rm(list=paste0("out_sd_mean_mcmc_", as.character(i)))
     Example[i, "median(SD)"] <- k["50%", "sd"]
     Example[i, "2.5%(SD)"] <- k["2.5%", "sd"]
     Example[i, "97.5%(SD)"] <- k["97.5%", "sd"]
     Example[i, "25%(SD)"] <- k["25%", "sd"]
     Example[i, "75%(SD)"] <- k["75%", "sd"]
     Example[i, "median(mean)"] <- k["50%", "mean"]
     Example[i, "2.5%(mean)"] <- k["2.5%", "mean"]
     Example[i, "97.5%(mean)"] <- k["97.5%", "mean"]
     Example[i, "25%(mean)"] <- k["25%", "mean"]
     Example[i, "75%(mean)"] <- k["75%", "mean"]
     Example[i, "z(mean)"] <- outgk$`1`$z["mean"]
     Example[i, "z(SD)"] <- outgk$`1`$z["sd"]
   }
   
   rownames(Example) <- as.character(1:nrow(Example))
  
  # Figure 1
  layout(mat = matrix(1:4, nrow=2))
  par(mar=c(4, 4, 0, 0))
  plot(out_sd_mean_mcmc_11, what = "MarkovChain", parameters = "mean", ylim=c(70, 76))
  text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y, 
     labels = "A: mean", cex=1.5, pos=4)
  plot(out_sd_mean_mcmc_8, what = "MarkovChain", parameters = "mean", ylim=c(50, 52))
  text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y, 
      labels = "C: mean", cex=1.5, pos=4)
  plot(out_sd_mean_mcmc_11, what = "MarkovChain", parameters = "sd")
  text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y, 
      labels = "B: sd", cex=1.5, pos=4)
  plot(out_sd_mean_mcmc_8, what = "MarkovChain", parameters = "sd")
  text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y, 
      labels = "D: sd", cex=1.5, pos=4)
  
  # Figure 2
  dtafigure2 <- matrix(NA, nrow=nrow(as.parameters(out_sd_mean_mcmc, index = "all")), 
                       ncol=nrow(Example))
  
  for (i in 1:nrow(Example)) {
    # i <- 1
    load(file=file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
    out_sd_mean_mcmc <- get(paste0("out_sd_mean_mcmc_", as.character(i)))
    PPM <- rnorm(nrow(as.parameters(out_sd_mean_mcmc, index = "all")), 
       mean = as.parameters(out_sd_mean_mcmc, index = "all")[, "mean"], 
       sd=as.parameters(out_sd_mean_mcmc, index = "all")[, "sd"])
    dtafigure2[, i] <- PPM
  }
  
  layout(mat = 1)
  par(mar=c(3, 4, 0, 0))
  boxplot(dtafigure2, outline=FALSE, las=1, bty="n", xaxt="n", frame=FALSE, ylim=c(40, 90), 
    col=sapply(as.character(Example$Species), 
      FUN=function(i) switch(i, "Caretta caretta"="white", 
                                 "Chelonia mydas"="green", 
                                 "Dermochelys coriacea"="lightblue")), 
    ylab="Incubation duration in days")
  axis(1, at=1:30, labels = rep(NA, 30))
  Cc <- sum(as.character(Example$Species) == "Caretta caretta")
  CcCm <- sum(as.character(Example$Species) == "Caretta caretta" | 
              as.character(Example$Species) == "Chelonia mydas")
  segments(x0= Cc + 0.5, 
           x1= Cc + 0.5, 
           y0=40, y1=90, lty = 2)
  
  segments(x0= CcCm + 0.5, 
           x1= CcCm + 0.5, 
           y0=40, y1=90, lty = 2)
  
  par(xpd=TRUE)
  text(x=Cc/2, y=33, labels = expression(italic("Caretta caretta")), cex=0.9)
  
  text(x=Cc+(CcCm-Cc)/2, y=32, labels = expression(italic("Chelonia\n  mydas")), cex=0.9)
  text(x=CcCm+(30-CcCm)/2, y=32, labels = expression(italic("Dermochelys\n  coriacea")), cex=0.9)
  
  # With Lognormal
   # To generate data from publication
   
   library(parallel)
   library(embryogrowth)
   
   # Values for the prior of SD
   outSD <- subset(DatabaseTSD, subset = (((!is.na(IP.SD)) | 
                   (!is.na(IP.SE))) & (!is.na(Hatched))), 
                   select=c("Hatched", "IP.SE", "IP.SD", "IP.mean"))
   outSD$IP.SD <- ifelse(is.na(outSD$IP.SD), outSD$IP.SE*sqrt(outSD$Hatched), outSD$IP.SD)
   
   # Model estimation
   
   Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) & 
                       ((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) & 
                          (is.na(IP.mean))), select=c("Species", "Incubation.temperature.set", 
                                                      "Hatched", "IP.min", "IP.max", 
                                                      "Reference"))
   
   out <- universalmclapply(X=1:nrow(Example), FUN=function(i) {
     n <- Example[i, "Hatched"]
     
     priors <- structure(list(Density = c("dunif", "dlnorm"), 
                              Prior1 = c(30, log(mean(outSD$IP.SD))), 
                              Prior2 = c(120, log(sd(outSD$IP.SD))), 
                              SDProp = c(1, 1), 
                              Min = c(30, 0.1), 
                              Max = c(120, 6), 
                              Init = c((Example[i, "IP.min"]+ Example[i, "IP.max"])/2, log(2))), 
                         row.names = c("meanlog", "sdlog"), 
                         class = c("PriorsmcmcComposite", "data.frame"))
     
     
     out_sd_mean_mcmc_LN <- from_min_max(n=n, observed.Minimum=Example[i, "IP.min"], 
                                      observed.Maximum=Example[i, "IP.max"], 
                                      fitted.parameters=c(mean=(Example[i, "IP.min"]+ 
                                                             Example[i, "IP.max"])/2, 
                                                          sd=log(2)), 
                                      priors = priors, 
                                      D="lnorm**", 
                                      n.iter = 10000, n.adapt=15000, thin=30, 
                                      trace=100, adaptive = TRUE)
     
     # plot(out_sd_mean_mcmc, what = "MarkovChain", parameters = "sd")
     
     assign(paste0("out_sd_mean_mcmc_LN_", as.character(i)), out_sd_mean_mcmc_LN)
     save(list = paste0("out_sd_mean_mcmc_LN_", as.character(i)), 
          file = file.path("dataOut", paste0("out_sd_mean_mcmc_LN_", as.character(i), ".Rdata")))
     # rm(list=paste0("out_sd_mean_mcmc_LN_", as.character(i)))
   }, progressbar = TRUE)
   
}

Run the code above in your browser using DataLab