df1 <- data.frame(x = 1:10,
y=letters[1:10],
stringsAsFactors=FALSE)
df2 <- data.frame(y=letters[1:11],
x2 = 1:11,
stringsAsFactors=FALSE)
mc1 <- mergeCheck(x=df1,y=df2,by="y")
## Notice as opposed to most merge/join algorithms, `mergeCheck` by
#default retains both row and column order from x
library(data.table)
merge(as.data.table(df1),as.data.table(df2))
## Here we get a duplicate of a df1 row in the result. If we only
## check dimensions, we make a mistake. `mergeCheck` captures the
## error - and tell us where to find the problem (ID 31 and 180):
if (FALSE) {
pk <- readRDS(file=system.file("examples/data/xgxr2.rds",package="NMdata"))
dt.cov <- pk[,.(ID=unique(ID))]
dt.cov[,COV:=sample(1:5,size=.N,replace=TRUE)]
dt.cov <- dt.cov[c(1,1:(.N-1))]
res.merge <- merge(pk,dt.cov,by="ID")
dims(pk,dt.cov,res.merge)
mergeCheck(pk,dt.cov,by="ID")
}
Run the code above in your browser using DataLab