# example of the contaminated sunspot data
example1 <- data.frame(year = 1700:1988,sunspot = as.numeric(sunspot.year))
example1[sample(1:289,30),'sunspot'] <- NA # contaminate data with missing values
example1[c(5,30,50),'sunspot'] <- c(-50,300,400) # contaminate data with outliers
example1 <- example1[-(70:100),] # create gap in the data
bin.period <- 11 # aggregation performed every 11 years (the year is numeric here)
bin.side <- 1989 # give one side of a bin
bin.FUN <- 'mean'
bin.max.f.NA <- 0.2 # maximum of 20% of missing data per bin
ylim <- c(0,Inf) # negative values are impossible
list.main <- ctbi(example1,bin.period=bin.period,
bin.side=bin.side,bin.FUN=bin.FUN,
ylim=ylim,bin.max.f.NA=bin.max.f.NA)
data0.example1 <- list.main$data0 # cleaned raw dataset
data1.example1 <- list.main$data1 # aggregated dataset.
mean.cycle.example1 <- list.main$mean.cycle # this data set shows a moderate seasonality
summary.bin.example1 <- list.main$summary.bin # confirmed with SCI = 0.50
summary.outlier.example1 <- list.main$summary.outlier
plot(mean.cycle.example1[,'generic.time.bin1'],
mean.cycle.example1[,'mean'],type='l',ylim=c(-80,80),
ylab='sunspot cycle',
xlab='11 years window')
lines(mean.cycle.example1[,'generic.time.bin1'],
mean.cycle.example1[,'mean']+mean.cycle.example1[,'sd'],type='l',lty=2)
lines(mean.cycle.example1[,'generic.time.bin1'],
mean.cycle.example1[,'mean']-mean.cycle.example1[,'sd'],type='l',lty=2)
title(paste0('mean cycle (moderate cyclicity: SCI = ',summary.bin.example1['SCI'],')'))
# plot tool:
ctbi.plot(list.main,show.n.bin=10)
# example of the beaver data
temp.beaver <- beaver1[,'temp']
t.char <- as.character(beaver1[,'time'])
minutes <- substr(t.char,nchar(t.char)-1,nchar(t.char))
hours <- substr(t.char,nchar(t.char)-3,nchar(t.char)-2)
hours[hours==""] <- '0'
days <- c(rep(12,91),rep(13,23))
time.beaver <- as.POSIXct(paste0('2000-12-',days,' ',hours,':',minutes,':00'),tz='UTC')
example2 <- data.frame(time=time.beaver,temp=temp.beaver)
bin.period <- '1 hour' # aggregation performed every hour
bin.side <- as.POSIXct('2000-12-12 00:00:00',tz='UTC') # give one side of a bin
bin.FUN <- 'mean' # aggregation operator
bin.max.f.NA <- 0.2 # maximum of 20% of missing data per bin
ylim <- c(-Inf,Inf)
list.main <- ctbi(example2,bin.period=bin.period,
bin.side=bin.side,bin.FUN=bin.FUN,
ylim=ylim,bin.max.f.NA=bin.max.f.NA)
data0.example2 <- list.main$data0 # cleaned raw dataset
data1.example2 <- list.main$data1 # aggregated dataset.
lower.threshold <- list.main$summary.outlier['lower.outlier.threshold']
upper.threshold <- list.main$summary.outlier['upper.outlier.threshold']
hist(data0.example2[,'residuals'],xlim=c(-0.5,0.5),30,main='beaver residuals')
abline(v=c(lower.threshold,upper.threshold),col='red',lwd=2) # show the histogram of the residuals
Run the code above in your browser using DataLab