#Simulate some fossil ranges with simFossilRecord
set.seed(444)
record<-simFossilRecord(p=0.1, q=0.1, nruns=1,
nTotalTaxa=c(30,40), nExtant=0)
taxa<-fossilRecord2fossilTaxa(record)
#simulate a fossil record with imperfect sampling with sampleRanges
rangesCont <- sampleRanges(taxa,r=0.1)
#Now let's use binTimeData to bin in intervals of 5 time units
rangesDisc <- binTimeData(rangesCont,int.length=5)
#now, get an estimate of the sampling rate (we set it to 0.5 above)
#for discrete data we can estimate the sampling probability per interval (R)
#i.e. this is not the same thing as the instantaneous sampling rate (r)
#can use sRate2sProb to see what we would expect
sRate2sProb(r=0.1,int.length=5)
#expect R = ~0.39
#now we can apply freqRat to get sampling probability
SampProb <- freqRat(rangesDisc,plot=TRUE)
SampProb
#est. R = ~0.25
#Not wildly accurate, is it?
#can also calculate extinction rate per interval of time
freqRat(rangesDisc,calcExtinction=TRUE)
#est. ext rate = ~0.44 per interval
#5 time-unit intervals, so ~0.44 / 5 = ~0.08 per time-unite
#That's pretty close to the generating value of 0.01, used in sampleRanges
#################
#The following example code (which is not run by default) examines how
#the freqRat estimates vary with sample size, interval length
#and compare it to using make_durationFreqDisc
#how good is the freqRat at 20 sampled taxa on avg?
set.seed(444)
r<-runif(100)
int.length=1
R<-sapply(r,sRate2sProb,int.length=1) #estimate R from r, assuming stuff like p=q
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
record<-simFossilRecord(p=0.1, q=0.1, r=r[i], nruns=1,
nSamp=c(15,25), nExtant=0, plot=TRUE)
ranges<-fossilRecord2fossilRanges(record)
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-freqRat(timeList)
}
plot(R,freqRats);abline(0,1)
#without the gigantic artifacts bigger than 1...
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#very worrisome lookin'!
#how good is it at 100 sampled taxa on average?
set.seed(444)
r<-runif(100)
int.length=1
R<-sapply(r,sRate2sProb,int.length=1)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
record<-simFossilRecord(p=0.1, q=0.1, r=r[i], nruns=1,
nSamp=c(80,150), nExtant=0, plot=TRUE)
ranges<-fossilRecord2fossilRanges(record)
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#not so hot, eh?
################
#LETS CHANGE THE TIME BIN LENGTH!
#how good is it at 100 sampled taxa on average, with longer time bins?
set.seed(444)
r<-runif(100)
int.length<-10
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
record<-simFossilRecord(p=0.1, q=0.1, r=r[i], nruns=1,
nSamp=c(80,150), nExtant=0, plot=TRUE)
ranges<-fossilRecord2fossilRanges(record)
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#things get more accurate as interval length increases... odd, eh?
#how good is it at 20 sampled taxa on average, with longer time bins?
set.seed(444)
r<-runif(100)
int.length<-10
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-freqRats<-numeric()
for(i in 1:length(r)){
#assuming budding model
record<-simFossilRecord(p=0.1, q=0.1, r=r[i], nruns=1,
nSamp=c(15,25), nExtant=0, plot=TRUE)
ranges<-fossilRecord2fossilRanges(record)
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
freqRats[i]<-freqRat(timeList)
}
plot(R,freqRats,ylim=c(0,1));abline(0,1)
#still not so hot at low sample sizes, even with longer bins
########################
#ML METHOD
#how good is the ML method at 20 taxa, 1 time-unit bins?
set.seed(444)
r<-runif(100)
int.length<-1
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-ML_sampProb<-numeric()
for(i in 1:length(r)){
#assuming budding model
record<-simFossilRecord(p=0.1, q=0.1, r=r[i], nruns=1,
nSamp=c(15,25), nExtant=0, plot=TRUE)
ranges<-fossilRecord2fossilRanges(record)
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
likFun<-make_durationFreqDisc(timeList)
ML_sampProb[i]<-optim(parInit(likFun),likFun,
lower=parLower(likFun),upper=parUpper(likFun),
method="L-BFGS-B",control=list(maxit=1000000))[[1]][2]
}
plot(R,ML_sampProb);abline(0,1)
# Not so great due to likelihood surface ridges
# but it returns values between 0-1
#how good is the ML method at 100 taxa, 1 time-unit bins?
set.seed(444)
r<-runif(100)
int.length<-1
R<-sapply(r,sRate2sProb,int.length=int.length)
ntaxa<-ML_sampProb<-numeric()
for(i in 1:length(r)){
#assuming budding model
record<-simFossilRecord(p=0.1, q=0.1, r=r[i], nruns=1,
nSamp=c(80,150), nExtant=0, plot=TRUE)
ranges<-fossilRecord2fossilRanges(record)
timeList<-binTimeData(ranges,int.length=int.length)
ntaxa[i]<-nrow(timeList[[2]])
likFun<-make_durationFreqDisc(timeList)
ML_sampProb[i]<-optim(parInit(likFun),likFun,
lower=parLower(likFun),upper=parUpper(likFun),
method="L-BFGS-B",control=list(maxit=1000000))[[1]][2]
}
plot(R,ML_sampProb);abline(0,1)
#Oh, fairly nice, although still a biased uptick as R gets largerRun the code above in your browser using DataLab