## Create a numeric vector (with missing values)
n <- 102
y <- c(0,0,NA,NA,NA,runif(n-5))
## Create a factor vector (with missing values)
g1 <- factor(sample(c("A","B","C","NA"),n,replace=TRUE))
## Create a factor vector with unknowns
g2 <- factor(sample(c("male","female","UNKNOWN"),n,replace=TRUE))
# Put into a data.frame (with some extra variables)
d <- data.frame(dy=y,dg1=g1,dg2=g2,
dw=sample(1:3,n,replace=TRUE),
dv=sample(1:3,n,replace=TRUE))
# typical output of summary() for a numeric variable
summary(y)
# this function
Summarize(y,digits=3)
Summarize(~dy,data=d,digits=3)
Summarize(dy~1,data=d,digits=3)
## Factor vector (excluding "NA"s in second call)
Summarize(~dg1,data=d)
Summarize(~dg1,data=d,exclude="NA")
## Factor vector with UNKNOWNs
Summarize(~dg2,data=d)
Summarize(~dg2,data=d,exclude="UNKNOWN")
## Numeric vector by levels of a factor variable
Summarize(dy~dg1,data=d,digits=3)
Summarize(dy~dg1,data=d,digits=3,exclude="NA")
Summarize(dy~dg2,data=d,digits=3)
Summarize(dy~dg2,data=d,digits=3,exclude="UNKNOWN")
## What happens if RHS of formula is not a factor
Summarize(dy~dw,data=d,digits=3)
Summarize(y~dw*dv,data=d,digits=3)
## Summarize factor variable by a factor variable
Summarize(dg1~dg2,data=d)
Summarize(dg1~dg2,data=d,exclude="NA")
Summarize(dg1~dg2,data=d,exclude=c("NA","UNKNOWN"))
Summarize(dg1~dg2,data=d,percent="none")
Summarize(dg1~dg2,data=d,percent="column")
Summarize(dg1~dg2,data=d,percent="total")
## Summarizing all variables in a data frame
lapply(as.list(d),Summarize,digits=4)
Run the code above in your browser using DataLab