filename <- system.file(package="RefNet", "extdata", "tbl.dups.RData")
load(filename)
preferred.types <- c("direct", "physical", "aggregation")
# get the best from dupGroup 1
best.1 <- pickBestFromDupGroup(1, tbl.dups, preferred.types)
tbl.dups[best.1, c("A.common", "B.common", "type", "provider", "publicationID")]
# get all of the best. not every dupGroup will pass muster
dupGroups <- sort(unique(tbl.dups$dupGroup))
bestOfDups <- unlist(lapply(dupGroups, function(dupGroup)
pickBestFromDupGroup(dupGroup, tbl.dups, preferred.types)))
deleters <- which(is.na(bestOfDups))
if(length(deleters) > 0)
bestOfDups <- bestOfDups[-deleters]
length(bestOfDups)
tbl.dups[bestOfDups, c("A.common", "B.common", "type", "provider", "publicationID")]
Run the code above in your browser using DataLab