example(data.table) # to run these examples at the prompt
DF = data.frame(x=rep(c("a","b","c"),each=3), y=c(1,3,6), v=1:9)
DT = data.table(x=rep(c("a","b","c"),each=3), y=c(1,3,6), v=1:9)
DF
DT
identical(dim(DT),dim(DF)) # TRUE
identical(DF$a, DT$a) # TRUE
is.list(DF) # TRUE
is.list(DT) # TRUE
is.data.frame(DT) # TRUE
tables()
DT[2] # 2nd row
DT[,v] # v column (as vector)
DT[,list(v)] # v column (as data.table)
DT[2:3,sum(v)] # sum(v) over rows 2 and 3
DT[2:5,cat(v,"")] # just for j's side effect
DT[c(FALSE,TRUE)] # even rows (usual recycling)
DT[,2,with=FALSE] # 2nd column
colNum = 2
DT[,colNum,with=FALSE] # same
setkey(DT,x) # set a 1-column key. No quotes, for convenience.
setkeyv(DT,"x") # same (v in setkeyv stands for vector)
v="x"
setkeyv(DT,v) # same
# key(DT)<-"x" # copies whole table, please use set* functions instead
DT["a"] # binary search (fast)
DT[x=="a"] # vector scan (slow)
DT[,sum(v),by=x] # keyed by
DT[,sum(v),by=key(DT)] # same
DT[,sum(v),by=y] # ad hoc by
DT["a",sum(v)] # j for one group
DT[c("a","b"),sum(v)] # j for two groups
X = data.table(c("b","c"),foo=c(4,2))
X
DT[X] # join
DT[X,sum(v)] # join and eval j for each row in i
DT[X,mult="first"] # first row of each group
DT[X,mult="last"] # last row of each group
DT[X,sum(v)*foo] # join inherited scope
setkey(DT,x,y) # 2-column key
setkeyv(DT,c("x","y")) # same
DT["a"] # join to 1st column of key
DT[J("a")] # same. J() stands for Join, an alias for list()
DT[list("a")] # same
DT[.("a")] # same. In the style of package plyr.
DT[J("a",3)] # join to 2 columns
DT[.("a",3)] # same
DT[J("a",3:6)] # join 4 rows (2 missing)
DT[J("a",3:6),nomatch=0] # remove missing
DT[J("a",3:6),roll=TRUE] # rolling join (locf)
DT[,sum(v),by=list(y%%2)] # by expression
DT[,.SD[2],by=x] # 2nd row of each group
DT[,tail(.SD,2),by=x] # last 2 rows of each group
DT[,lapply(.SD,sum),by=x] # apply through columns by group
DT[,list(MySum=sum(v),
MyMin=min(v),
MyMax=max(v)),
by=list(x,y%%2)] # by 2 expressions
DT[,sum(v),x][V1<20] # compound query
DT[,sum(v),x][order(-V1)] # ordering results
print(DT[,z:=42L]) # add new column by reference
print(DT[,z:=NULL]) # remove column by reference
print(DT["a",v:=42L]) # subassign to existing v column by reference
print(DT["b",v2:=84L]) # subassign to new column by reference (NA padded)
DT[,m:=mean(v),by=x][] # add new column by reference by group
# NB: postfix [] is shortcut to print()
DT[,.SD[which.min(v)],by=x][] # nested query by group
DT[!J("a")] # not join
DT[!"a"] # same
DT[!2:4] # all rows other than 2:4
DT[x!="b" | y!=3] # multiple vector scanning approach, slow
DT[!J("b",3)] # same result but much faster
# Follow r-help posting guide, support is here (*not* r-help) :
# datatable-help@lists.r-forge.r-project.org
# or
# http://stackoverflow.com/questions/tagged/data.table
vignette("datatable-intro")
vignette("datatable-faq")
vignette("datatable-timings")
test.data.table() # over 700 low level tests
update.packages() # keep up to date
Run the code above in your browser using DataLab