##
## 1. Parse standard first-last name format
##
tst <- c('Joe Smith (AL)', 'Teresa Angelica Sanchez de Gomez',
'John Brown, Jr.', 'John Brown Jr.',
'John W. Brown III', 'John Q. Brown,I',
'Linda Rosa Smith-Johnson', 'Anastasio Somoza Debayle',
'Ra_l Vel_zquez')
library(Ecdat)
parsed <- parseName(tst)
tst2 <- matrix(c('Smith', 'Joe', 'Gomez', 'Teresa Angelica Sanchez de',
'Brown', 'John, Jr.', 'Brown', 'John, Jr.',
'Brown', 'John W., III', 'Brown', 'John Q., I',
'Smith-Johnson', 'Linda Rosa', 'Debayle', 'Anastasio Somoza',
'Velazquez', 'Raul'),
ncol=2, byrow=TRUE)
# NOTE: This second to last example is in the Spanish tradition
# and is handled incorrectly by the current algorithm.
# The correct answer should be "Somoza Debayle", "Anastasio".
# However, fixing that would complicate the algorithm excessively for now.
colnames(tst2) <- c("surname", 'givenName')
stopifnot(
all.equal(parsed, tst2)
)
##
## 2. Parse "surname, given name" format
##
tst3 <- c('Smith (AL),Joe', 'Sanchez de Gomez, Teresa Angelica',
'Brown, John, Jr.', 'Brown, John W., III', 'Brown, John Q., I',
'Smith-Johnson, Linda Rosa', 'Somoza Debayle, Anastasio',
'Vel_zquez, Ra_l')
tst4 <- parseName(tst3)
tst5 <- matrix(c('Smith', 'Joe', 'Sanchez de Gomez', 'Teresa Angelica',
'Brown', 'John, Jr.', 'Brown', 'John W., III', 'Brown', 'John Q., I',
'Smith-Johnson', 'Linda Rosa', 'Somoza Debayle', 'Anastasio',
'Velazquez', 'Raul'),
ncol=2, byrow=TRUE)
colnames(tst5) <- c("surname", 'givenName')
stopifnot(
all.equal(tst4, tst5)
)Run the code above in your browser using DataLab