Usage
subNonStandardNames(x,
standardCharacters=c(letters, LETTERS, ' ','.', ',', 0:9,
'"', "'", '-', '_', '(', ')', '[', ']', ''),
replacement='_',
gsubList=list(list(pattern='\\\\\\\\|\\\\',
replacement='"')),
removeSecondLine=TRUE,
nonStandardNames=Ecdat::nonEnglishNames, ...)
- x
{
character vector in which it is desired replace
nonStandardNames[, 1] in
subNonStandardCharacters(x, ...) with the corresponding
element of nonStandardNames[, 2].
}
- standardCharacters, replacement, gsubList, ...
{
arguments passed to subNonStandardCharacters
}
- removeSecondLine
{
logical: If TRUE, delete anything following "\n" and return it as
an attribute "secondLine"
}
- nonStandardNames
{
data.frame or character matrix with two columns: Replace any
substring of x matching nonStandardNames[, 1] with the
corresponding elemeng of nonStandardNames[, 2]
}
1. removeSecondLine
2. x. <- subNonStandardCharacters(x, standardCharacters, replacement,
...)
3. Loop over all rows of nonStandardNames substituting anything
matching nonEnglishData[i, 1] with nonEnglishData[i,
2].
4. Eliminate leading and trailing blanks.
a character vector with all nonStandardCharacters replaced first
by replacement and then by the second column of
nonStandardNames for any that match the first column.
[object Object]
sub
nonEnglishNames
subNonStandardCharactersNames <- c('Raul', 'Ra`l', 'Torres,Raul', 'Torres, Raul',
"Robert C. \\Bobby\\\\", 'Ed --Vacancy')
# confusion in character sets can create
# names like Names[2]
library(Ecdat)Name2 <- subNonStandardNames(Names)
Name2
Name2. <- c('Raul', 'Raul', Names[3:4],
'Robert C. "Bobby"', 'Ed')
attr(Name2., 'secondLine') <- c(rep(NA, 5), ' --Vacancy')
Name2.
stopifnot(
all.equal(Name2, Name2.)
)
manip