MergeProbDup: Merge two objects of class `ProbDup`.

Description

MergeProbDup merges two objects of class ProbDup into a single one.

Usage

MergeProbDup(pdup1, pdup2)

Arguments

pdup1

An object of class ProbDup.

pdup2

An object of class ProbDup.

Value

An object of class ProbDup with the merged list of fuzzy, phonetic and semantic probable duplicate sets.

Examples

Run this code

## Not run: 
# #' # Load PGR passport database
# GN <- GN1000
# 
# # Specify as a vector the database fields to be used
# GNfields <- c("NationalID", "CollNo", "DonorID", "OtherID1", "OtherID2")
# 
# # Clean the data
# GN[GNfields] <- lapply(GN[GNfields], function(x) DataClean(x))
# y1 <- list(c("Gujarat", "Dwarf"), c("Castle", "Cary"), c("Small", "Japan"),
# c("Big", "Japan"), c("Mani", "Blanco"), c("Uganda", "Erect"),
# c("Mota", "Company"))
# y2 <- c("Dark", "Light", "Small", "Improved", "Punjab", "SAM")
# y3 <- c("Local", "Bold", "Cary", "Mutant", "Runner", "Giant", "No.",
#         "Bunch", "Peanut")
# GN[GNfields] <- lapply(GN[GNfields], function(x) MergeKW(x, y1, delim = c("space", "dash")))
# GN[GNfields] <- lapply(GN[GNfields], function(x) MergePrefix(x, y2, delim = c("space", "dash")))
# GN[GNfields] <- lapply(GN[GNfields], function(x) MergeSuffix(x, y3, delim = c("space", "dash")))
# 
# # Generate KWIC index
# GNKWIC <- KWIC(GN, GNfields)
# 
# # Specify the exceptions as a vector
# exep <- c("A", "B", "BIG", "BOLD", "BUNCH", "C", "COMPANY", "CULTURE", 
#          "DARK", "E", "EARLY", "EC", "ERECT", "EXOTIC", "FLESH", "GROUNDNUT", 
#          "GUTHUKAI", "IMPROVED", "K", "KUTHUKADAL", "KUTHUKAI", "LARGE", 
#          "LIGHT", "LOCAL", "OF", "OVERO", "P", "PEANUT", "PURPLE", "R", 
#          "RED", "RUNNER", "S1", "SAM", "SMALL", "SPANISH", "TAN", "TYPE", 
#          "U", "VALENCIA", "VIRGINIA", "WHITE")
#           
# # Specify the synsets as a list
# syn <- list(c("CHANDRA", "AH114"), c("TG1", "VIKRAM"))
# 
# # Fetch probable duplicate sets
# GNdup <- ProbDup(kwic1 = GNKWIC, method = "a", excep = exep, fuzzy = TRUE,
#                  phonetic = TRUE, encoding = "primary", 
#                  semantic = TRUE, syn = syn)
#                  
# # Split the probable duplicate sets
# GNdupSplit <- SplitProbDup(GNdup, splitat = c(10, 10, 10))
# 
# # Merge the split sets
# GNdupMerged <- MergeProbDup(GNdupSplit[[1]], GNdupSplit[[3]])
# 
# ## End(Not run)

Run the code above in your browser using DataLab

Description

Usage

Arguments

Value

See Also

Examples