### Match a hex colour, like `"#99af01"`
# This reads *Match a hash, followed by six hexadecimal values.*
"#" %R% hex_digit(6)
# To match only a hex colour and nothing else, you can add anchors to the
# start and end of the expression.
START %R% "#" %R% hex_digit(6) %R% END
### Simple email address matching.
# This reads *Match one or more letters, numbers, dots, underscores, percents,
# plusses or hyphens. Then match an 'at' symbol. Then match one or more letters,
# numbers, dots, or hyphens. Then match a dot. Then match two to four letters.*
one_or_more(char_class(ASCII_ALNUM %R% "._%+-")) %R%
"@" %R%
one_or_more(char_class(ASCII_ALNUM %R% ".-")) %R%
DOT %R%
ascii_alpha(2, 4)
### IP address matching.
# First we need an expression to match numbers between 0 and 255. Both the
# following syntaxes read *Match two then five then a number between zero and
# five. Or match two then a number between zero and four then a digit. Or match
# an optional zero or one followed by an optional digit folowed by a compulsory
# digit. Make this a single token, but don't capture it.*
# Using the \%|\% operator
ip_element <- group(
"25" %R% char_range(0, 5) %|%
"2" %R% char_range(0, 4) %R% ascii_digit() %|%
optional(char_class("01")) %R% optional(ascii_digit()) %R% ascii_digit()
)
# The same again, this time using the or function
ip_element <- or(
"25" %R% char_range(0, 5),
"2" %R% char_range(0, 4) %R% ascii_digit(),
optional(char_class("01")) %R% optional(ascii_digit()) %R% ascii_digit()
)
# It's easier to write using number_range, though it isn't quite as optimal
# as handcrafted regexes.
number_range(0, 255, allow_leading_zeroes = TRUE)
# Now an IP address consists of 4 of these numbers separated by dots. This
# reads *Match a word boundary. Then create a token from an `ip_element`
# followed by a dot, and repeat it three times. Then match another `ip_element`
# followed by a word boundary.*
BOUNDARY %R%
repeated(group(ip_element %R% DOT), 3) %R%
ip_element %R%
BOUNDARY
Run the code above in your browser using DataLab