orders<-Matrix(as.matrix(data.frame(orderNum=1:1000,
customer=sample(100,1000,TRUE))))
cancelledOrders<-Matrix(as.matrix(data.frame(orderNum=sample(1000,100),
cancelled=1)))
skus<-Matrix(as.matrix(data.frame(orderNum=sample(1000,10000,TRUE),
sku=sample(1000,10000,TRUE), amount=runif(10000))))
a<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'])
b<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'],all.x=FALSE)
c<-merge(orders,skus,orders[,'orderNum'],skus[,'orderNum'])
#The above Matrices could be converted to matrices or data.frames and handled in other methods.
#However, this is not possible in the sparse case, which can be handled by this function:
sm<-cbind2(1:200000,rsparsematrix(200000,10000,density=.0001))
sm2<-cbind2(sample(1:200000,50000,TRUE),rsparsematrix(200000,10,density=.01))
sm3<-merge.Matrix(sm,sm2,by.x=sm[,1],by.y=sm2[,1])
## Not run:
# #merge.Matrix can also handle many other data types, such as data frames, and is generally fast.
# orders<-data.frame(orderNum=as.character(sample(1e5, 1e6, TRUE)),
# sku=sample(1e3, 1e6, TRUE),
# customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE)
# cancelledOrders<-data.frame(orderNum=as.character(sample(1e5,1e4)),
# cancelled=1,stringsAsFactors=FALSE)
# system.time(a<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'],
# cancelledOrders[,'orderNum']))
# system.time(b<-merge.data.frame(orders,cancelledOrders,all.x = TRUE,all.y=TRUE))
# system.time(c<-dplyr::full_join(orders,cancelledOrders))
# system.time({require(data.table);
# d<-merge(data.table(orders),data.table(cancelledOrders),
# by='orderNum',all=TRUE,allow.cartesian=TRUE)})
#
# orders<-data.frame(orderNum=sample(1e5, 1e6, TRUE), sku=sample(1e3, 1e6,
# TRUE), customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE)
# cancelledOrders<-data.frame(orderNum=sample(1e5,1e4),cancelled=1,stringsAsFactors=FALSE)
# system.time(b<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'],
# cancelledOrders[,'orderNum']))
# system.time(e<-dplyr::full_join(orders,cancelledOrders))
# system.time({require(data.table);
# d<-merge(data.table(orders),data.table(cancelledOrders),
# by='orderNum',all=TRUE,allow.cartesian=TRUE)})
#
# #In certain cases, merge.Matrix can be much faster than alternatives.
# one<-as.character(1:1000000) two<-as.character(sample(1:1000000,1e5,TRUE))
# system.time(b<-merge.Matrix(one,two,one,two))
# system.time(c<-dplyr::full_join(data.frame(key=one),data.frame(key=two)))
# system.time({require(data.table);
# d<-merge(data.table(data.frame(key=one)),data.table(data.frame(key=two)),
# by='key',all=TRUE,allow.cartesian=TRUE)})
# ## End(Not run)
Run the code above in your browser using DataLab