rebus.base (version 0.0-3)

ClassGroups: Character classes

Description

Match character classes.

Usage

alnum(lo, hi, char_class = TRUE)

alpha(lo, hi, char_class = TRUE)

blank(lo, hi, char_class = TRUE)

cntrl(lo, hi, char_class = TRUE)

digit(lo, hi, char_class = TRUE)

graph(lo, hi, char_class = TRUE)

lower(lo, hi, char_class = TRUE)

printable(lo, hi, char_class = TRUE)

punct(lo, hi, char_class = TRUE)

space(lo, hi, char_class = TRUE)

upper(lo, hi, char_class = TRUE)

hex_digit(lo, hi, char_class = TRUE)

any_char(lo, hi)

grapheme(lo, hi)

newline(lo, hi)

dgt(lo, hi, char_class = FALSE)

wrd(lo, hi, char_class = FALSE)

spc(lo, hi, char_class = FALSE)

not_dgt(lo, hi, char_class = FALSE)

not_wrd(lo, hi, char_class = FALSE)

not_spc(lo, hi, char_class = FALSE)

ascii_digit(lo, hi, char_class = TRUE)

ascii_lower(lo, hi, char_class = TRUE)

ascii_upper(lo, hi, char_class = TRUE)

ascii_alpha(lo, hi, char_class = TRUE)

ascii_alnum(lo, hi, char_class = TRUE)

char_range(lo, hi, char_class = lo < hi)

Arguments

lo
A non-negative integer. Minimum number of repeats, when grouped.
hi
positive integer. Maximum number of repeats, when grouped.
char_class
A logical value. Should x be wrapped in a character class? If NA, the function guesses whether that's a good idea.

Value

A character vector representing part or all of a regular expression.

References

http://www.regular-expressions.info/shorthand.html and http://www.rexegg.com/regex-quickstart.html#posix

See Also

regex, Unicode

Examples

Run this code
# R character classes
alnum()
alpha()
blank()
cntrl()
digit()
graph()
lower()
printable()
punct()
space()
upper()
hex_digit()

# Special chars
any_char()
grapheme()
newline()

# Generic classes
dgt()
wrd()
spc()

# Generic negated classes
not_dgt()
not_wrd()
not_spc()

# Non-locale-specific classes
ascii_digit()
ascii_lower()
ascii_upper()

# Don't provide a class wrapper
digit(char_class = FALSE) # same as DIGIT

# Match repeated values
digit(3)
digit(3, 5)
digit(0)
digit(1)
digit(0, 1)

# Ranges of characters
char_range(0, 7) # octal number

# Usage
(rx <- digit(3))
stringi::stri_detect_regex(c("123", "one23"), rx)

# Some classes behave differently under different engines
# In particular PRCE and Perl recognise all these characters
# as punctuation but ICU does not
p <- c(
  "!", "@", "#", "$", "%", "^", "&", "*", "(", ")", "[", "]", "{", "}", ";",
  ":", "'", '"', ",", "<", ">", ".", "/", "?", "\\", "|", "`", "~"
)
icu_matched <- stringi::stri_detect_regex(p, punct())
p[icu_matched]
p[!icu_matched]
pcre_matched <- grepl(punct(), p)
p[pcre_matched]
p[!pcre_matched]

# A grapheme is a character that can be defined by more than one code point
# PCRE does not recognise the concept.
x <- c("Chloe", "Chlo\u00e9", "Chlo\u0065\u0301")
stringi::stri_match_first_regex(x, "Chlo" %R% capture(grapheme()))

# newline() matches three types of line ending: \r, \n, \r\n.
# You can standardize line endings using
stringi::stri_replace_all_regex("foo\nbar\r\nbaz\rquux", NEWLINE, "\n")

Run the code above in your browser using DataCamp Workspace