Usage
subNonStandardCharacters(x,
standardCharacters=c(letters, LETTERS, ' ','.', ',', 0:9,
'"', "'", '-', '_', '(', ')', '[', ']', ''),
replacement='_',
gsubList=list(list(pattern='\\\\\\\\|\\\\',
replacement='"')),
... )
- x
{
character vector in which it is desired to find the first and last
character not in standardCharacters and replace that
substring by replacement.
}
- standardCharacters
{
a character vector of acceptable characters to keep.
}
- replacement
{
a character to replace the subtring starting and ending with
characters not in standardCharacters.
}
- gsubList
{
list of lists of pattern and replacement arguments
to be called in succession before looking for nonStandardCharacters
}
- ...
{
optional arguments passed to strsplit
}
1. for(il in 1:length(gsubList))x <- gsub(
gsubList[[il]][["pattern"]], gsublist[[il]][['replacement']], x)
2. nx <- length(x)
3. x. <- strsplit(x, "", ...)
4. for(ix in 1:nx) find the first and last standardCharacters
in x.[ix] and substitute replacement for everything in between.
NOTE: On 13 May 2013 Jeff Newmiller at the University of California,
Davis, wrote, 'I think it is a fools errand to think that you can
automatically "normalize" arbitrary Unicode characters to an ASCII
form that everyone will agree on.' (This was a reply on
r-help@r-project.org, subject: "Re: [R] Matching names with non-
English characters".) Doubtless someone has software to do a better
job of this than what this function does, but I've so far been unable
to find it in R. If you know of a better solution to this problem,
I'd be pleased to hear from you. Spencer Graves
a character vector with everthing between the first and last character
not in standardCharacters replaced by replacement.
[object Object]
sub, strsplit,
grepNonStandardCharacters,
subNonStandardNames
encoded_text_to_latex
subNonStandardNames# Consider Names = Ruben, Avila and Jose, where "e" and "A" in
# these examples carry an accent. With the default values
# for standardCharacters and replacement, these would become
# Rub_en, _vila, and Jos_.
# (The standard checks for R packages complains about
# non-standard characters, so none are included here.)
#
nonstdNames <- c('Ra`l', 'Ra`', '`l', 'Torres, Raul',
"Robert C. \\Bobby\\\\", NA, '', ' ')# confusion in character sets can create
# names like Names[2]
Name2 <- subNonStandardCharacters(nonstdNames)
Name2. <- c('Ra_l', 'Ra_', '_l', nonstdNames[4],
'Robert C. "Bobby"', NA, '', ' ')
stopifnot(
all.equal(Name2, Name2.)
)
manip