Matrix.utils (version 0.9.1)

merge.Matrix: Merges two Matrices or matrix-like objects

Description

Implementation of merge for Matrix. By explicitly calling merge.Matrix it will also work for matrix, for data.frame, and vector objects as a much faster alternative to the built-in merge.

Usage

"merge"(x, y, by.x, by.y, all.x = TRUE, all.y = TRUE, ...)
join.Matrix(x, y, by.x, by.y, all.x = TRUE, all.y = TRUE, ...)

Arguments

x
Matrix or matrix-like object
y
Matrix or matrix-like object
by.x
vector indicating the names to match from Matrix x
by.y
vector indicating the names to match from Matrix y
all.x
logical; if TRUE, then each value in x will be included even if it has no matching values in y
all.y
logical; if TRUE, then each value in y will be included even if it has no matching values in x
...
arguments to be passed to or from methods. Currently ignored

Details

#' all.x/all.y correspond to the four types of database joins in the following way:

left
all.x=TRUE, all.y=FALSE
right
all.x=FALSE, all.y=TRUE
inner
all.x=FALSE, all.y=FALSE
full
all.x=TRUE, all.y=TRUE

Note that NA values will match other NA values.

Examples

Run this code

orders<-Matrix(as.matrix(data.frame(orderNum=1:1000, 
customer=sample(100,1000,TRUE)))) 
cancelledOrders<-Matrix(as.matrix(data.frame(orderNum=sample(1000,100), 
cancelled=1))) 
skus<-Matrix(as.matrix(data.frame(orderNum=sample(1000,10000,TRUE), 
sku=sample(1000,10000,TRUE), amount=runif(10000)))) 
a<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'])
b<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'],all.x=FALSE)
c<-merge(orders,skus,orders[,'orderNum'],skus[,'orderNum'])

#The above Matrices could be converted to matrices or data.frames and handled in other methods.  
#However, this is not possible in the sparse case, which can be handled by this function:
sm<-cbind2(1:200000,rsparsematrix(200000,10000,density=.0001))
sm2<-cbind2(sample(1:200000,50000,TRUE),rsparsematrix(200000,10,density=.01))
sm3<-merge.Matrix(sm,sm2,by.x=sm[,1],by.y=sm2[,1])

 ## Not run: 
# #merge.Matrix can also handle many other data types, such as data frames, and is generally fast.
# orders<-data.frame(orderNum=as.character(sample(1e5, 1e6, TRUE)),
#    sku=sample(1e3, 1e6, TRUE),
#    customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE)
# cancelledOrders<-data.frame(orderNum=as.character(sample(1e5,1e4)),
#    cancelled=1,stringsAsFactors=FALSE)
# system.time(a<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'],
#    cancelledOrders[,'orderNum']))
# system.time(b<-merge.data.frame(orders,cancelledOrders,all.x = TRUE,all.y=TRUE))
# system.time(c<-dplyr::full_join(orders,cancelledOrders))
# system.time({require(data.table);
# d<-merge(data.table(orders),data.table(cancelledOrders),
#    by='orderNum',all=TRUE,allow.cartesian=TRUE)})
# 
# orders<-data.frame(orderNum=sample(1e5, 1e6, TRUE), sku=sample(1e3, 1e6,
# TRUE), customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE) 
# cancelledOrders<-data.frame(orderNum=sample(1e5,1e4),cancelled=1,stringsAsFactors=FALSE)
# system.time(b<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'], 
# cancelledOrders[,'orderNum'])) 
# system.time(e<-dplyr::full_join(orders,cancelledOrders)) 
# system.time({require(data.table);
#  d<-merge(data.table(orders),data.table(cancelledOrders),
#  by='orderNum',all=TRUE,allow.cartesian=TRUE)})
# 
# #In certain cases, merge.Matrix can be much faster than alternatives. 
# one<-as.character(1:1000000) two<-as.character(sample(1:1000000,1e5,TRUE)) 
# system.time(b<-merge.Matrix(one,two,one,two)) 
# system.time(c<-dplyr::full_join(data.frame(key=one),data.frame(key=two))) 
# system.time({require(data.table);
#  d<-merge(data.table(data.frame(key=one)),data.table(data.frame(key=two)),
#  by='key',all=TRUE,allow.cartesian=TRUE)})
# ## End(Not run)

Run the code above in your browser using DataLab