nr = 1000000
D = rep(1:5,nr/5)
system.time(DF <<- data.frame(colA=D, colB=D)) # 2.08
system.time(DT <<- data.table(colA=D, colB=D)) # 0.15 (over 10 times faster to create)
identical(as.data.table(DF), DT)
identical(dim(DT),dim(DF))
object.size(DF)/object.size(DT) # 10 times less memory
tt = subset(DF,colA>3)
ss = DT[colA>3]
identical(as.data.table(tt), ss)
mean(subset(DF,colA+colB>5,"colB"))
mean(DT[colA+colB>5]$colB)
tt = with(subset(DF,colA>3),colA+colB)
ss = with(DT[colA>3],colA+colB) # but could be: DT[colA>3,colA+colB] (not yet implemented)
identical(tt, ss)
tt = DF[with(DF,tapply(1:nrow(DF),colB,last)),] # select last row grouping by colB
ss = DT[tapply(1:nrow(DT),colB,last)] # but could be: DT[last,group=colB] (not yet implemented)
identical(as.data.table(tt), ss)
Lkp=1:3
tt = DF[with(DF,colA %in% Lkp),]
ss = DT[colA %in% Lkp] # expressions inside the [] can see objects in the calling frame
identical(as.data.table(tt), ss)
Run the code above in your browser using DataLab