tablefreq(iris)
tablefreq(iris, c("Sepal.Length","Species"))
a <- tablefreq(iris,freq="Sepal.Length")
tablefreq(a, freq="Sepal.Width")
library(dplyr)
iris %>% tablefreq("Species")
tfq <- tablefreq(iris[,c(1:2)])
chunk1 <- iris[1:10,c(1:2)]
chunk2 <- iris[c(11:20),]
chunk3 <- iris[-c(1:20),]
a <- tablefreq(chunk1)
a <- update(a,chunk2)
a <- update(a,chunk3)
a
## External databases
library(dplyr)
if(require(RSQLite)){
hflights_sqlite <- tbl(hflights_sqlite(), "hflights")
hflights_sqlite
tbl_vars(hflights_sqlite)
tablefreq(hflights_sqlite,vars=c("Year","Month"),freq="DayofMonth")
}
##
## Graphs
##
if(require(ggplot2) && require(hflights)){
library(dplyr)
## One variable
## Bar plot
tt <- as.data.frame(tablefreq(hflights[,"ArrDelay"]))
p <- ggplot() + geom_bar(aes(x=x, y=freq), data=tt, stat="identity")
print(p)
## Histogram
p <- ggplot() + geom_histogram(aes(x=x, weight= freq), data = tt)
print(p)
## Density
tt <- tt[complete.cases(tt),] ## remove missing values
tt$w <- tt$freq / sum(tt$freq) ## weights must sum 1
p <- ggplot() + geom_density(aes(x=x, weight= w), data = tt)
print(p)
##
## Two distributions
##
## A numeric and a factor variable
td <- tablefreq(hflights[,c("TaxiIn","Origin")])
td <- td[complete.cases(td),]
## Bar plot
p <- ggplot() + geom_bar(aes(x=TaxiIn, weight= freq, colour = Origin),
data = td, position ="dodge")
print(p)
## Density
## compute the relative frequencies for each group
td <- td %.% group_by(Origin) %.%
mutate( ngroup= sum(freq), wgroup= freq/ngroup)
p <- ggplot() + geom_density(aes(x=TaxiIn, weight=wgroup, colour = Origin),
data = td)
print(p)
## For each group, plot its values
p <- ggplot() + geom_point(aes(x=Origin, y=TaxiIn, size=freq),
data = td, alpha= 0.6)
print(p)
## Two numeric variables
tc <- tablefreq(hflights[,c("TaxiIn","TaxiOut")])
tc <- tc[complete.cases(tc),]
p <- ggplot() + geom_point(aes(x=TaxiIn, y=TaxiOut, size=freq),
data = tc, color = "red", alpha=0.5)
print(p)
## Two factors
tf <- tablefreq(hflights[,c("UniqueCarrier","Origin")])
tf <- tf[complete.cases(tf),]
## Bar plot
p <- ggplot() + geom_bar(aes(x=Origin, fill=UniqueCarrier, weight= freq),
data = tf)
print(p)
}
Run the code above in your browser using DataLab