Learn R Programming

HelpersMG (version 6.6)

from_min_max: Distribution from minimum and maximum

Description

Bayesian estimate of distribution when minimum, maximum, median or mean are known.
If D="norm**" or "lnorm**", it will use the approximation of Gumbel based on D.
If D="norm*" or "lnorm*", it will generate D distribution using replicates number of random numbers, and estimate Gumbel parameters from the simulated D distribution.
Otherwise it will estimate parameters of Gumbel distribution based on maximum likelihood.
For D="pois*", "beta*" or "chisq*" only second and third solutions are available.

Usage

from_min_max(
  n = stop("n must be known."),
  fitted.parameters = stop("At least one parameter must be supplied."),
  observed.Minimum,
  observed.Maximum,
  observed.Median = NULL,
  observed.Mean = NULL,
  priors = "dnorm",
  fixed.parameters = NULL,
  D = "norm**",
  n.iter = 10000,
  n.chains = 1,
  n.adapt = 100,
  thin = 30,
  adaptive = FALSE,
  trace = 100,
  replicates = 10000,
  silent = FALSE
)

Value

from_min_max returns a list with output, ML, Bayesian results

Arguments

n

Number of observations

fitted.parameters

The initial value to fit

observed.Minimum

The observed minimum

observed.Maximum

The observed maximum

observed.Median

The observed median (can be omitted)

observed.Mean

The observed mean (can be omitted)

priors

The priors (see MHAlgogen()) or character "dnorm" or "dunif"

fixed.parameters

The fixed parameters

D

The distribution to fit as character. ex "norm" or "lnorm"

n.iter

Number of iterations for each chain

n.chains

Number of chains

n.adapt

Number of iteration to stabilize likelihood

thin

Interval for thinning likelihoods

adaptive

Should an adaptive process for SDProp be used

trace

Or FALSE or period to show progress

replicates

Number of replicates to model D

silent

If TRUE, do not print information

Author

Marc Girondot marc.girondot@gmail.com

Details

from_min_max returns standard deviation and/or mean from minimum and maximum

Examples

Run this code
if (FALSE) {
minobs <- 5
maxobs <- 25
# These two values are not mandatory
meanobs <- 15
medianobs <- 16
n <- 10
# To estimate only the sd of the distribution; mean is fixed
# Note that there is an obligation to have mean even if it
# is in fixed.parameters
# By defaut a normal distribution is fitted

out_sd <- from_min_max(n=n                                , 
                       observed.Minimum=minobs                    , 
                       observed.Maximum=maxobs                    , 
                       fitted.parameters=c(sd=5)                  , 
                       fixed.parameters=c(mean=meanobs)           ,
                       n.iter=10000                               ,
                       trace=TRUE                                 )
                            
plot(out_sd, what="MarkovChain", parameters="sd")
plot(out_sd, what="posterior", parameters="sd")
as.parameters(out_sd, index="quantile")

# To estimate both the sd and mean of the distribution

out_sd_mean_norm <- from_min_max(n=n, 
                            observed.Minimum=minobs                    , 
                            observed.Maximum=maxobs                    , 
                            fitted.parameters=c(mean=15, sd=5)         , 
                            fixed.parameters=NULL                      ,
                            n.iter=10000                               ,
                            D="norm**"                                 ,
                            trace=FALSE                                )
                            
plot(out_sd_mean_norm, what="MarkovChain", parameters="sd", ylim=c(0, 10))
plot(out_sd_mean_norm, what="MarkovChain", parameters="mean", ylim=c(10, 20))
plot(out_sd_mean_norm, what="posterior", parameters="mean",  
     breaks=seq(from=0, to=100, by=1), xlim=c(10, 20))
as.parameters(out_sd_mean_norm, index="quantile")

# Let see what's happened for a lognormal distribution

out_sd_mean_lnorm <- from_min_max(n=n, 
                            observed.Minimum=minobs                    , 
                            observed.Maximum=maxobs                    , 
                            fitted.parameters=c(meanlog=15, sdlog=5)   , 
                            fixed.parameters=NULL                      ,
                            n.iter=10000                               ,
                            D="lnorm**"                                ,
                            trace=FALSE                                )
                            
plot(out_sd_mean_lnorm, what="MarkovChain", parameters="sdlog", ylim=c(0, 10))
plot(out_sd_mean_lnorm, what="MarkovChain", parameters="meanlog", ylim=c(10, 20))
plot(out_sd_mean_lnorm, what="posterior", parameters="meanlog", xlim=c(0, 20), 
     breaks=seq(from=0, to=100, by=1))
as.parameters(out_sd_mean_lnorm, index="quantile")

# To be compared with the rule of thumb:
print(paste0("mean = ", as.character((maxobs + minobs) / 2))) # Mean Not so bad
print(paste0("sd = ", as.character((maxobs - minobs) / 4))) # SD Clearly biased

# Covariation of sd and mean is nearly NULL
cor(x=as.parameters(out_sd_mean_norm, index="all")[, "mean"], 
    y=as.parameters(out_sd_mean_norm, index="all")[, "sd"])^2
plot(x=as.parameters(out_sd_mean_norm, index="all")[, "mean"], 
     y=as.parameters(out_sd_mean_norm, index="all")[, "sd"], 
     xlab="mean", ylab="sd")
     
# Example when minimum, maximum and mean are known

out_sd_mean2 <- from_min_max(n=n                                   , 
                             observed.Minimum=minobs               , 
                             observed.Maximum=maxobs               , 
                             observed.Mean=meanobs                 ,
                             fitted.parameters=c(mean=15, sd=5)    , 
                             fixed.parameters=NULL                 ,
                             n.iter=10000                          ,
                             trace=FALSE                            )
                            
# Example when minimum, maximum, mean and median are known

out_sd_mean3 <- from_min_max(n=n                                   , 
                             observed.Minimum=minobs               , 
                             observed.Maximum=maxobs               , 
                             observed.Mean=meanobs                 ,
                             observed.Median=medianobs             ,
                             fitted.parameters=c(mean=15, sd=5)    , 
                             fixed.parameters=NULL                 ,
                             n.iter=10000                          ,
                             trace=FALSE                            )
                             
plot(out_sd_mean2, what="MarkovChain", parameters="sd")
plot(out_sd_mean2, what="MarkovChain", parameters="mean")
plot(out_sd_mean2, what="posterior", parameters="mean", xlim=c(0, 100), 
     breaks=seq(from=0, to=100, by=5))
as.parameters(out_sd_mean2, index="quantile")

# Example of GEV density function  
# Parametrisation from https://en.wikipedia.org/wiki/Generalized_extreme_value_distribution
dGEV <- getFromNamespace(".dGEV", ns="HelpersMG")
x <- seq(from=-4, to=4, by=0.1)
plot(x, y=dGEV(x=x, 
               location=0, scale=1, shape=-1/2, log=FALSE, sum=FALSE), 
     type="l", col="green", xlab="x", ylab="Density")
lines(x, y=dGEV(x=x, 
                location=0, scale=1, shape=0, log=FALSE, sum=FALSE), col="red")
lines(x, y=dGEV(x=x, 
                location=0, scale=1, shape=1/2, log=FALSE, sum=FALSE), col="blue")
legend("topleft", legend=c("shape=-1/2", "shape=0", "shape=1/2"), 
       lty=1, col=c("green", "red", "blue"))
       
# Note the different parametrisation about shape
dGEV <- getFromNamespace("dgevd", ns="EnvStats")
x <- seq(from=-4, to=4, by=0.1)
plot(x, y=dGEV(x=x, 
               location=0, scale=1, shape=-1/2), 
     type="l", col="green", xlab="x", ylab="Density")
lines(x, y=dGEV(x=x, 
                location=0, scale=1, shape=0), col="red")
lines(x, y=dGEV(x=x, 
                location=0, scale=1, shape=1/2), col="blue")
legend("topleft", legend=c("shape=-1/2", "shape=0", "shape=1/2"), 
       lty=1, col=c("green", "red", "blue"))

# Compute dn using the approximation from Wan et al. (2014)
   get_dn <- function(n) {
     if (n < 2) {
       stop("Sample size n must be at least 2.")
     }
     qnorm((n - 0.375) / (n + 0.25)) * 2
   }
   
   # Estimate standard deviation from min and max
   estimate_sd_from_range <- function(min_val, max_val, n) {
     dn <- get_dn(n)
     range <- max_val - min_val
     sd_estimate <- range / dn
     return(sd_estimate)
   }
   
   # Example usage:
   n <- 10
   min_val <- 5
   max_val <- 25
   
   dn_value <- get_dn(n)
   sd_estimate <- estimate_sd_from_range(min_val, max_val, n)
   
   cat("dn =", dn_value, "\n")
   cat("Estimated SD =", sd_estimate, "\n")
   
   # To generate data from publication
   
   library(parallel)
   library(embryogrowth)
   
   # Values for the prior of SD
   outSD <- subset(DatabaseTSD, subset = (((!is.na(IP.SD)) | 
                   (!is.na(IP.SE))) & (!is.na(Hatched))), 
                   select=c("Hatched", "IP.SE", "IP.SD", "IP.mean"))
   outSD$IP.SD <- ifelse(is.na(outSD$IP.SD), outSD$IP.SE*sqrt(outSD$Hatched), outSD$IP.SD)
   
   # Model estimation
   
   Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) & 
                       ((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) & 
                          (is.na(IP.mean))), select=c("Species", "Incubation.temperature.set", 
                                                      "Hatched", "IP.min", "IP.max", 
                                                      "Reference"))
   
   out <- universalmclapply(X=1:nrow(Example), FUN=function(i) {
     n <- Example[i, "Hatched"]
     
     priors <- structure(list(Density = c("dunif", "dlnorm"), 
                              Prior1 = c(30, log(mean(outSD$IP.SD))), 
                              Prior2 = c(120, log(sd(outSD$IP.SD))), 
                              SDProp = c(1, 1), 
                              Min = c(30, 0.1), 
                              Max = c(120, 6), 
                              Init = c((Example[i, "IP.min"]+ Example[i, "IP.max"])/2, log(2))), 
                         row.names = c("mean", "sd"), 
                         class = c("PriorsmcmcComposite", "data.frame"))
     
     
     out_sd_mean_mcmc <- from_min_max(n=n, observed.Minimum=Example[i, "IP.min"], 
                                      observed.Maximum=Example[i, "IP.max"], 
                                      fitted.parameters=c(mean=(Example[i, "IP.min"]+ 
                                                             Example[i, "IP.max"])/2, 
                                                          sd=log(2)), 
                                      priors = priors, 
                                      D="norm**", 
                                      n.iter = 10000, n.adapt=15000, thin=30, 
                                      trace=100, adaptive = TRUE)
     
     # plot(out_sd_mean_mcmc, what = "MarkovChain", parameters = "sd")
     
     assign(paste0("out_sd_mean_mcmc_", as.character(i)), out_sd_mean_mcmc)
     save(list = paste0("out_sd_mean_mcmc_", as.character(i)), 
          file = file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
     # rm(list=paste0("out_sd_mean_mcmc_", as.character(i)))
   }, progressbar = TRUE)
   
   # Generate table with all results
   
   Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) & 
   ((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) & 
     (is.na(IP.mean))), select=c("Species", "Incubation.temperature.set", 
                                 "Hatched", "IP.min", "IP.max", 
                                 "Reference"))
   
   Example <- cbind(Example, dn=NA)
   Example <- cbind(Example, "SD(Hozo 2005)"=NA)
   Example <- cbind(Example, "SD(Wan 2014)"=NA)
   Example <- cbind(Example, "mean(Wan 2014)"=NA)
   Example <- cbind(Example, "median(SD)"=NA)
   Example <- cbind(Example, "2.5%(SD)"=NA)
   Example <- cbind(Example, "97.5%(SD)"=NA)
   Example <- cbind(Example, "25%(SD)"=NA)
   Example <- cbind(Example, "75%(SD)"=NA)
   Example <- cbind(Example, "median(mean)"=NA)
   Example <- cbind(Example, "2.5%(mean)"=NA)
   Example <- cbind(Example, "97.5%(mean)"=NA)
   Example <- cbind(Example, "25%(mean)"=NA)
   Example <- cbind(Example, "75%(mean)"=NA)
   Example <- cbind(Example, "z(mean)"=NA)
   Example <- cbind(Example, "z(SD)"=NA)
   
   
   library(coda)
   
   for (i in 1:nrow(Example)) {
     
     n <- Example[i, "Hatched"]
     if (n<= 15) {
       Example[i, "SD(Hozo 2005)"] <- (1/sqrt(12))*sqrt(((Example[i, "IP.max"] - 
        Example[i, "IP.min"]))^2+((Example[i, "IP.max"] - Example[i, "IP.min"]))^2/4)
     } else {
       if (n<=70) {
         Example[i, "SD(Hozo 2005)"] <- (Example[i, "IP.max"] - Example[i, "IP.min"])/4
       } else {
         Example[i, "SD(Hozo 2005)"] <- (Example[i, "IP.max"] - Example[i, "IP.min"])/6
       }
     }
     
     Example[i, "dn"] <- get_dn(n)
     Example[i, "SD(Wan 2014)"] <- estimate_sd_from_range(Example[i, "IP.min"], 
            Example[i, "IP.max"], n)
     Example[i, "mean(Wan 2014)"] <- (Example[i, "IP.min"]+ Example[i, "IP.max"])/2
     load(file = file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
     out_sd_mean_mcmc <- get(paste0("out_sd_mean_mcmc_", as.character(i)))
     k <- as.parameters(out_sd_mean_mcmc, index="quantile", probs=c(0.025, 0.25, 0.5, 0.75, 0.975))
     outgk <- geweke.diag(as.mcmc(out_sd_mean_mcmc))
     rm(out_sd_mean_mcmc)
     rm(list=paste0("out_sd_mean_mcmc_", as.character(i)))
     Example[i, "median(SD)"] <- k["50%", "sd"]
     Example[i, "2.5%(SD)"] <- k["2.5%", "sd"]
     Example[i, "97.5%(SD)"] <- k["97.5%", "sd"]
     Example[i, "25%(SD)"] <- k["25%", "sd"]
     Example[i, "75%(SD)"] <- k["75%", "sd"]
     Example[i, "median(mean)"] <- k["50%", "mean"]
     Example[i, "2.5%(mean)"] <- k["2.5%", "mean"]
     Example[i, "97.5%(mean)"] <- k["97.5%", "mean"]
     Example[i, "25%(mean)"] <- k["25%", "mean"]
     Example[i, "75%(mean)"] <- k["75%", "mean"]
     Example[i, "z(mean)"] <- outgk$`1`$z["mean"]
     Example[i, "z(SD)"] <- outgk$`1`$z["sd"]
   }
   
   rownames(Example) <- as.character(1:nrow(Example))
  
  # Figure 1
  layout(mat = matrix(1:4, nrow=2))
  par(mar=c(4, 4, 0, 0))
  plot(out_sd_mean_mcmc_11, what = "MarkovChain", parameters = "mean", ylim=c(70, 76))
  text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y, 
     labels = "A: mean", cex=1.5, pos=4)
  plot(out_sd_mean_mcmc_8, what = "MarkovChain", parameters = "mean", ylim=c(50, 52))
  text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y, 
      labels = "C: mean", cex=1.5, pos=4)
  plot(out_sd_mean_mcmc_11, what = "MarkovChain", parameters = "sd")
  text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y, 
      labels = "B: sd", cex=1.5, pos=4)
  plot(out_sd_mean_mcmc_8, what = "MarkovChain", parameters = "sd")
  text(x=ScalePreviousPlot(x=0.05, y=0.95)$x, y=ScalePreviousPlot(x=0.85, y=0.95)$y, 
      labels = "D: sd", cex=1.5, pos=4)
  
  # Figure 2
  dtafigure2 <- matrix(NA, nrow=nrow(as.parameters(out_sd_mean_mcmc, index = "all")), 
                       ncol=nrow(Example))
  
  for (i in 1:nrow(Example)) {
    # i <- 1
    load(file=file.path("dataOut", paste0("out_sd_mean_mcmc_", as.character(i), ".Rdata")))
    out_sd_mean_mcmc <- get(paste0("out_sd_mean_mcmc_", as.character(i)))
    PPM <- rnorm(nrow(as.parameters(out_sd_mean_mcmc, index = "all")), 
       mean = as.parameters(out_sd_mean_mcmc, index = "all")[, "mean"], 
       sd=as.parameters(out_sd_mean_mcmc, index = "all")[, "sd"])
    dtafigure2[, i] <- PPM
  }
  
  layout(mat = 1)
  par(mar=c(3, 4, 0, 0))
  boxplot(dtafigure2, outline=FALSE, las=1, bty="n", xaxt="n", frame=FALSE, ylim=c(40, 90), 
    col=sapply(as.character(Example$Species), 
      FUN=function(i) switch(i, "Caretta caretta"="white", 
                                 "Chelonia mydas"="green", 
                                 "Dermochelys coriacea"="lightblue")), 
    ylab="Incubation duration in days")
  axis(1, at=1:30, labels = rep(NA, 30))
  Cc <- sum(as.character(Example$Species) == "Caretta caretta")
  CcCm <- sum(as.character(Example$Species) == "Caretta caretta" | 
              as.character(Example$Species) == "Chelonia mydas")
  segments(x0= Cc + 0.5, 
           x1= Cc + 0.5, 
           y0=40, y1=90, lty = 2)
  
  segments(x0= CcCm + 0.5, 
           x1= CcCm + 0.5, 
           y0=40, y1=90, lty = 2)
  
  par(xpd=TRUE)
  text(x=Cc/2, y=33, labels = expression(italic("Caretta caretta")), cex=0.9)
  
  text(x=Cc+(CcCm-Cc)/2, y=32, labels = expression(italic("Chelonia\n  mydas")), cex=0.9)
  text(x=CcCm+(30-CcCm)/2, y=32, labels = expression(italic("Dermochelys\n  coriacea")), cex=0.9)
  
  # With Lognormal
   # To generate data from publication
   
   library(parallel)
   library(embryogrowth)
   
   # Values for the prior of SD
   outSD <- subset(DatabaseTSD, subset = (((!is.na(IP.SD)) | 
                   (!is.na(IP.SE))) & (!is.na(Hatched))), 
                   select=c("Hatched", "IP.SE", "IP.SD", "IP.mean"))
   outSD$IP.SD <- ifelse(is.na(outSD$IP.SD), outSD$IP.SE*sqrt(outSD$Hatched), outSD$IP.SD)
   
   # Model estimation
   
   Example <- subset(DatabaseTSD, subset = (!is.na(IP.min)) & 
                       ((is.na(IP.SE)) & (is.na(IP.SD)) & (!is.na(Hatched)) & 
                          (is.na(IP.mean))), select=c("Species", "Incubation.temperature.set", 
                                                      "Hatched", "IP.min", "IP.max", 
                                                      "Reference"))
   
   out <- universalmclapply(X=1:nrow(Example), FUN=function(i) {
     n <- Example[i, "Hatched"]
     
     priors <- structure(list(Density = c("dunif", "dlnorm"), 
                              Prior1 = c(30, log(mean(outSD$IP.SD))), 
                              Prior2 = c(120, log(sd(outSD$IP.SD))), 
                              SDProp = c(1, 1), 
                              Min = c(30, 0.1), 
                              Max = c(120, 6), 
                              Init = c((Example[i, "IP.min"]+ Example[i, "IP.max"])/2, log(2))), 
                         row.names = c("meanlog", "sdlog"), 
                         class = c("PriorsmcmcComposite", "data.frame"))
     
     
     out_sd_mean_mcmc_LN <- from_min_max(n=n, observed.Minimum=Example[i, "IP.min"], 
                                      observed.Maximum=Example[i, "IP.max"], 
                                      fitted.parameters=c(mean=(Example[i, "IP.min"]+ 
                                                             Example[i, "IP.max"])/2, 
                                                          sd=log(2)), 
                                      priors = priors, 
                                      D="lnorm**", 
                                      n.iter = 10000, n.adapt=15000, thin=30, 
                                      trace=100, adaptive = TRUE)
     
     # plot(out_sd_mean_mcmc, what = "MarkovChain", parameters = "sd")
     
     assign(paste0("out_sd_mean_mcmc_LN_", as.character(i)), out_sd_mean_mcmc_LN)
     save(list = paste0("out_sd_mean_mcmc_LN_", as.character(i)), 
          file = file.path("dataOut", paste0("out_sd_mean_mcmc_LN_", as.character(i), ".Rdata")))
     # rm(list=paste0("out_sd_mean_mcmc_LN_", as.character(i)))
   }, progressbar = TRUE)
   
}

Run the code above in your browser using DataLab