# NOT RUN {
#Simulate some fossil ranges with simFossilRecord
set.seed(444)
record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1,
nTotalTaxa = c(30,40), nExtant = 0)
taxa <- fossilRecord2fossilTaxa(record)
#simulate a fossil record with imperfect sampling with sampleRanges
rangesCont <- sampleRanges(taxa,r = 0.1)
#Now let's use binTimeData to bin in intervals of 5 time units
rangesDisc <- binTimeData(rangesCont,int.length = 5)
#now, get an estimate of the sampling rate (we set it to 0.5 above)
#for discrete data we can estimate the sampling probability per interval (R)
#i.e. this is not the same thing as the instantaneous sampling rate (r)
#can use sRate2sProb to see what we would expect
sRate2sProb(r = 0.1,int.length = 5)
#expect R = ~0.39
#now we can apply freqRat to get sampling probability
SampProb <- freqRat(rangesDisc,plot = TRUE)
SampProb
#est. R = ~0.25
#Not wildly accurate, is it?
#can also calculate extinction rate per interval of time
freqRat(rangesDisc,calcExtinction = TRUE)
#est. ext rate = ~0.44 per interval
#5 time-unit intervals, so ~0.44 / 5 = ~0.08 per time-unite
#That's pretty close to the generating value of 0.01, used in sampleRanges
# }
# NOT RUN {
#################
#The following example code (which is not run by default) examines how
#the freqRat estimates vary with sample size, interval length
#and compare it to using make_durationFreqDisc
#how good is the freqRat at 20 sampled taxa on avg?
set.seed(444)
r <- runif(100)
int.length = 1
R <- sapply(r,sRate2sProb,int.length = 1) #estimate R from r, assuming stuff like p = q
ntaxa <- freqRats <- numeric()
for(i in 1:length(r)){
#assuming budding model
record <- simFossilRecord(p = 0.1, q = 0.1, r = r[i], nruns = 1,
nSamp = c(15,25), nExtant = 0, plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
freqRats[i] <- freqRat(timeList)
}
plot(R,freqRats);abline(0,1)
#without the gigantic artifacts bigger than 1...
plot(R,freqRats,ylim = c(0,1));abline(0,1)
#very worrisome lookin'!
#how good is it at 100 sampled taxa on average?
set.seed(444)
r <- runif(100)
int.length = 1
R <- sapply(r,sRate2sProb,int.length = 1)
ntaxa <- freqRats <- numeric()
for(i in 1:length(r)){
#assuming budding model
record <- simFossilRecord(p = 0.1, q = 0.1, r = r[i], nruns = 1,
nSamp = c(80,150), nExtant = 0, plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
freqRats[i] <- freqRat(timeList)
}
plot(R,freqRats,ylim = c(0,1));abline(0,1)
#not so hot, eh?
################
#LETS CHANGE THE TIME BIN LENGTH!
#how good is it at 100 sampled taxa on average, with longer time bins?
set.seed(444)
r <- runif(100)
int.length <- 10
R <- sapply(r,sRate2sProb,int.length = int.length)
ntaxa <- freqRats <- numeric()
for(i in 1:length(r)){
#assuming budding model
record <- simFossilRecord(p = 0.1, q = 0.1, r = r[i], nruns = 1,
nSamp = c(80,150), nExtant = 0, plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
freqRats[i] <- freqRat(timeList)
}
plot(R,freqRats,ylim = c(0,1));abline(0,1)
#things get more accurate as interval length increases... odd, eh?
#how good is it at 20 sampled taxa on average, with longer time bins?
set.seed(444)
r <- runif(100)
int.length <- 10
R <- sapply(r,sRate2sProb,int.length = int.length)
ntaxa <- freqRats <- numeric()
for(i in 1:length(r)){
#assuming budding model
record <- simFossilRecord(p = 0.1, q = 0.1, r = r[i], nruns = 1,
nSamp = c(15,25), nExtant = 0, plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
freqRats[i] <- freqRat(timeList)
}
plot(R,freqRats,ylim = c(0,1));abline(0,1)
#still not so hot at low sample sizes, even with longer bins
########################
#ML METHOD
#how good is the ML method at 20 taxa, 1 time-unit bins?
set.seed(444)
r <- runif(100)
int.length <- 1
R <- sapply(r,sRate2sProb,int.length = int.length)
ntaxa <- ML_sampProb <- numeric()
for(i in 1:length(r)){
#assuming budding model
record <- simFossilRecord(p = 0.1, q = 0.1, r = r[i], nruns = 1,
nSamp = c(15,25), nExtant = 0, plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
likFun <- make_durationFreqDisc(timeList)
ML_sampProb[i] <- optim(parInit(likFun),likFun,
lower = parLower(likFun),upper = parUpper(likFun),
method = "L-BFGS-B",control = list(maxit = 1000000))[[1]][2]
}
plot(R,ML_sampProb);abline(0,1)
# Not so great due to likelihood surface ridges
# but it returns values between 0-1
#how good is the ML method at 100 taxa, 1 time-unit bins?
set.seed(444)
r <- runif(100)
int.length <- 1
R <- sapply(r,sRate2sProb,int.length = int.length)
ntaxa <- ML_sampProb <- numeric()
for(i in 1:length(r)){
#assuming budding model
record <- simFossilRecord(p = 0.1, q = 0.1, r = r[i], nruns = 1,
nSamp = c(80,150), nExtant = 0, plot = TRUE)
ranges <- fossilRecord2fossilRanges(record)
timeList <- binTimeData(ranges,int.length = int.length)
ntaxa[i] <- nrow(timeList[[2]])
likFun <- make_durationFreqDisc(timeList)
ML_sampProb[i] <- optim(parInit(likFun),likFun,
lower = parLower(likFun),upper = parUpper(likFun),
method = "L-BFGS-B",control = list(maxit = 1000000))[[1]][2]
}
plot(R,ML_sampProb);abline(0,1)
#Oh, fairly nice, although still a biased uptick as R gets larger
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab