# last name with Z
first <- 'João'
last <- 'Souza'
middle <-'Santos'
soundexBR(c(first, middle, last))
# with S, instead of Z
first <- 'João'
last <- 'Sousa'
soundexBR(c(first, last))
# Miscelania
c('João Souza', 'Joao Sousa', 'Joao dos Santos Souza',
'John Souza') -> names
soundexBR(names)
names <- c('Ana Karolina Kuhnen',
'Ana Carolina Kuhnen', 'Ana Karolina',
'Dilma Vana Rousseff', 'Dilma Rousef')
soundexBR(names)
# Example with RecordLinkage
#Some data:
mydata1 <- data.frame(
fname <- c('Ricardo','Maria','Tereza','Pedro','José', 'Germano'),
lname <- c('Cunha','Andrade','Silva','Soares','Silva','Lima'),
age <- c(67,89,78,65,68,67),
birth <- c(1945,1923,1934,1947,1944,1945),
date <- c(20120907,20120703,20120301,20120805,20121004,20121209) )
mydata2 <- data.frame(
fname <- c('Maria','Lúcia','Paulo','Marcos', 'Ricardo', 'Germanio'),
lname <- c('Andrade','Silva','Soares','Pereira','Cunha','Lima'),
age <- c(67,88,78,60,68,80),
birth <- c(1945,1924,1934,1952,1944,1932),
date <- c(20121208,20121103,20120302,20120105,20121004,20121209) )
# Must call RecordLinkage package
pairs <- compare.linkage(mydata1, mydata2,
blockfld = list(c(1,2,4),c(1,2)),
phonetic <- c(1,2), phonfun = soundexBR, strcmp = FALSE,
strcmpfun <- jarowinkler, exclude=FALSE,identity1 = NA,
identity2 = NA, n_match <- NA, n_non_match = NA)
print(pairs)
editMatch(pairs)
# To access information in the object:
weights <- epiWeights(pairs, e = 0.01, f = pairs$frequencies)
hist(weights$Wdata, plot = FALSE) # Plot TRUE
getPairs(pairs, max.weight = Inf, min.weight = -Inf)Run the code above in your browser using DataLab