k
.
An active bin is a bin which contains at least min_n
observations.getbw(x, k = NULL, min_n = NULL, warn = FALSE)
min_n
observations. The default is
k <- 1 + 2*ceiling(log(N)/log(2))
.min_n = max(log(N/10)/log(10),1)
."bw"
, "k"
and "outlier"
. The first one is the binwidth, the second one is the number of active bins which is as close as possible to the specified parameter k
. "outlier"
is a logical vector indicating which bins are not active.require(scales)
hist(x <- rexp(200,1/10),breaks=gb<-getbw(x,24,min_n = 5, warn=TRUE),
col = alpha(attr(gb,"outlier")+1,0.3))
hist(x <- rexp(2000,1/10),breaks=gb<-getbw(x,24,min_n = 5,warn=TRUE),
col = alpha(attr(gb,"outlier")+1,0.3))
x <- rlnorm(1000,log(10),log(4))
x <- c(x, rnorm(500,400,30))
hist(x ,breaks=gb<-getbw(x,24,min_n = 5,warn=TRUE),
col = alpha(attr(gb,"outlier")+1,0.3))
x <- rlnorm(1000,log(10),log(4))
x <- c(x, rnorm(500,800,30))
hist(x ,breaks=gb<-getbw(x,24,min_n = 5,warn=TRUE),
col = alpha(attr(gb,"outlier")+1,0.3))
bws1 <-replicate(1000,{
x <- rexp(200,1/10)
gb <- getbw(x,20)
attr(gb,"bw")
})
hist(bws1,breaks=getbw(bws1,30))
bws2 <-replicate(1000,{
x <- rnorm(200)
x <- x/rnorm(200)
gb <- getbw(x,20)
attr(gb,"bw")
})
hist(bws2,breaks=getbw(bws2,30))
mov <- read.table("http://www.rosuda.org/lehre/WS1213-f/MovieLens.txt",
header=T, sep="\t")
require(extracat)
with(mov,plot(MovieVotes,meanMovieRat,
pch=19,col=alpha("black",0.2)))
with(mov, fluctile(table(cutbw(MovieVotes,30),
cutbw(meanMovieRat,30))))
with(USR, fluctile(table(occupation,cutbw(meanUserRat,30)),
dir="h",just="b"))
with(USR, fluctile(log(1+table(cutbw(Pct.Animation,50),
cutbw(Pct.Children.s,50)))))
with(USR, barplot(table(cutbw(Pct.Animation,50,min_n=5))))
Run the code above in your browser using DataLab