Unicode

0th

Percentile

Unicode classes

Match ranges of unicode characters. In particular, you can match characters from a particular language.

Keywords
datasets
Usage
armenian(lo, hi, char_class = TRUE)
armenian_ligatures(lo, hi, char_class = TRUE)
caucasian_albanian(lo, hi, char_class = TRUE)
cypriot_syllabary(lo, hi, char_class = TRUE)
cyrillic(lo, hi, char_class = TRUE)
cyrillic_supplement(lo, hi, char_class = TRUE)
cyrillic_extended_a(lo, hi, char_class = TRUE)
cyrillic_extended_b(lo, hi, char_class = TRUE)
elbasan(lo, hi, char_class = TRUE)
georgian(lo, hi, char_class = TRUE)
georgian_supplement(lo, hi, char_class = TRUE)
glagolitic(lo, hi, char_class = TRUE)
gothic(lo, hi, char_class = TRUE)
greek_and_coptic(lo, hi, char_class = TRUE)
greek_extended(lo, hi, char_class = TRUE)
latin(lo, hi, char_class = TRUE)
latin_1_supplement(lo, hi, char_class = TRUE)
latin_extended_a(lo, hi, char_class = TRUE)
latin_extended_b(lo, hi, char_class = TRUE)
latin_extended_c(lo, hi, char_class = TRUE)
latin_extended_d(lo, hi, char_class = TRUE)
latin_extended_e(lo, hi, char_class = TRUE)
latin_extended_additional(lo, hi, char_class = TRUE)
latin_ligatures(lo, hi, char_class = TRUE)
linear_a(lo, hi, char_class = TRUE)
linear_b_syllabary(lo, hi, char_class = TRUE)
linear_b_ideograms(lo, hi, char_class = TRUE)
ogham(lo, hi, char_class = TRUE)
old_italic(lo, hi, char_class = TRUE)
old_permic(lo, hi, char_class = TRUE)
phaistos_disc(lo, hi, char_class = TRUE)
runic(lo, hi, char_class = TRUE)
shavian(lo, hi, char_class = TRUE)
duployan(lo, hi, char_class = TRUE)
shorthand_format_controls(lo, hi, char_class = TRUE)
ipa_extensions(lo, hi, char_class = TRUE)
phonetic_extensions(lo, hi, char_class = TRUE)
phonetic_extensions_supplement(lo, hi, char_class = TRUE)
modifier_tone_letters(lo, hi, char_class = TRUE)
spacing_modifier_letters(lo, hi, char_class = TRUE)
superscripts_and_subscripts(lo, hi, char_class = TRUE)
combining_diacritic_marks(lo, hi, char_class = TRUE)
combining_diacritic_supplement(lo, hi, char_class = TRUE)
combining_diacritic_extended(lo, hi, char_class = TRUE)
combining_half_marks(lo, hi, char_class = TRUE)
bamun(lo, hi, char_class = TRUE)
bamun_supplement(lo, hi, char_class = TRUE)
bassa_vah(lo, hi, char_class = TRUE)
coptic(lo, hi, char_class = TRUE)
coptic_epact_numbers(lo, hi, char_class = TRUE)
egyptian_hieroglyphs(lo, hi, char_class = TRUE)
ethiopic(lo, hi, char_class = TRUE)
ethiopic_supplement(lo, hi, char_class = TRUE)
ethiopic_extended(lo, hi, char_class = TRUE)
ethiopic_extended_a(lo, hi, char_class = TRUE)
mende_kikakui(lo, hi, char_class = TRUE)
meroitic_cursive(lo, hi, char_class = TRUE)
meroitic_hieroglyphs(lo, hi, char_class = TRUE)
nko(lo, hi, char_class = TRUE)
osmanya(lo, hi, char_class = TRUE)
tifinagh(lo, hi, char_class = TRUE)
vai(lo, hi, char_class = TRUE)
arabic(lo, hi, char_class = TRUE)
arabic_supplement(lo, hi, char_class = TRUE)
arabic_extended_a(lo, hi, char_class = TRUE)
arabic_presentation_forms_a(lo, hi, char_class = TRUE)
arabic_presentation_forms_b(lo, hi, char_class = TRUE)
imperial_aramaic(lo, hi, char_class = TRUE)
avestan(lo, hi, char_class = TRUE)
carian(lo, hi, char_class = TRUE)
cuneiform(lo, hi, char_class = TRUE)
cuneiform_numbers_and_punctuation(lo, hi, char_class = TRUE)
old_persian(lo, hi, char_class = TRUE)
ugaritic(lo, hi, char_class = TRUE)
hebrew(lo, hi, char_class = TRUE)
lycian(lo, hi, char_class = TRUE)
lydian(lo, hi, char_class = TRUE)
mandaic(lo, hi, char_class = TRUE)
nabataean(lo, hi, char_class = TRUE)
old_north_arabian(lo, hi, char_class = TRUE)
old_south_arabian(lo, hi, char_class = TRUE)
pahlavi_inscriptional(lo, hi, char_class = TRUE)
pahlavi_psalter(lo, hi, char_class = TRUE)
palmyrene(lo, hi, char_class = TRUE)
phoenician(lo, hi, char_class = TRUE)
samaritan(lo, hi, char_class = TRUE)
syriac(lo, hi, char_class = TRUE)
manichaean(lo, hi, char_class = TRUE)
mongolian(lo, hi, char_class = TRUE)
old_turkic(lo, hi, char_class = TRUE)
phags_pa(lo, hi, char_class = TRUE)
tibetan(lo, hi, char_class = TRUE)
bengali_and_assamese(lo, hi, char_class = TRUE)
brahmi(lo, hi, char_class = TRUE)
chakma(lo, hi, char_class = TRUE)
devanagari(lo, hi, char_class = TRUE)
devanagari_extended(lo, hi, char_class = TRUE)
grantha(lo, hi, char_class = TRUE)
gujarati(lo, hi, char_class = TRUE)
gurmukhi(lo, hi, char_class = TRUE)
kaithi(lo, hi, char_class = TRUE)
kannada(lo, hi, char_class = TRUE)
kharoshthi(lo, hi, char_class = TRUE)
khojki(lo, hi, char_class = TRUE)
khudawadi(lo, hi, char_class = TRUE)
lepcha(lo, hi, char_class = TRUE)
limbu(lo, hi, char_class = TRUE)
mahajani(lo, hi, char_class = TRUE)
malayalam(lo, hi, char_class = TRUE)
meetei_mayek(lo, hi, char_class = TRUE)
meetei_mayek_extensions(lo, hi, char_class = TRUE)
modi(lo, hi, char_class = TRUE)
mro(lo, hi, char_class = TRUE)
ol_chiki(lo, hi, char_class = TRUE)
oriya(lo, hi, char_class = TRUE)
saurashtra(lo, hi, char_class = TRUE)
sharada(lo, hi, char_class = TRUE)
siddham(lo, hi, char_class = TRUE)
sinhala(lo, hi, char_class = TRUE)
sinhala_archaic_numbers(lo, hi, char_class = TRUE)
sora_sompeng(lo, hi, char_class = TRUE)
syloti_nagri(lo, hi, char_class = TRUE)
takri(lo, hi, char_class = TRUE)
tamil(lo, hi, char_class = TRUE)
telugu(lo, hi, char_class = TRUE)
thaana(lo, hi, char_class = TRUE)
tirhuta(lo, hi, char_class = TRUE)
vedic_extensions(lo, hi, char_class = TRUE)
warang_citi(lo, hi, char_class = TRUE)
cham(lo, hi, char_class = TRUE)
kayah_li(lo, hi, char_class = TRUE)
khmer(lo, hi, char_class = TRUE)
khmer_symbols(lo, hi, char_class = TRUE)
lao(lo, hi, char_class = TRUE)
myanmar(lo, hi, char_class = TRUE)
myanmar_extended_a(lo, hi, char_class = TRUE)
myanmar_extended_b(lo, hi, char_class = TRUE)
new_tai_lue(lo, hi, char_class = TRUE)
pahawh_hmong(lo, hi, char_class = TRUE)
pau_cin_hau(lo, hi, char_class = TRUE)
tai_le(lo, hi, char_class = TRUE)
tai_tham(lo, hi, char_class = TRUE)
tai_viet(lo, hi, char_class = TRUE)
thai(lo, hi, char_class = TRUE)
balinese(lo, hi, char_class = TRUE)
batak(lo, hi, char_class = TRUE)
buginese(lo, hi, char_class = TRUE)
buhid(lo, hi, char_class = TRUE)
hanunoo(lo, hi, char_class = TRUE)
javanese(lo, hi, char_class = TRUE)
rejang(lo, hi, char_class = TRUE)
sundanese(lo, hi, char_class = TRUE)
sundanese_supplement(lo, hi, char_class = TRUE)
tagalog(lo, hi, char_class = TRUE)
tagbanwa(lo, hi, char_class = TRUE)
bopomofo(lo, hi, char_class = TRUE)
bopomofo_extended(lo, hi, char_class = TRUE)
cjk_unified_ideographs(lo, hi, char_class = TRUE)
cjk_unified_ideographs_extension_a(lo, hi, char_class = TRUE)
cjk_unified_ideographs_extension_b(lo, hi, char_class = TRUE)
cjk_unified_ideographs_extension_c(lo, hi, char_class = TRUE)
cjk_unified_ideographs_extension_d(lo, hi, char_class = TRUE)
cjk_compatibility_ideographs(lo, hi, char_class = TRUE)
cjk_compatibility_ideographs_supplement(lo, hi, char_class = TRUE)
kangxi_radicals(lo, hi, char_class = TRUE)
kangxi_radicals_supplement(lo, hi, char_class = TRUE)
cjk_strokes(lo, hi, char_class = TRUE)
cjk_ideographic_description_characters(lo, hi, char_class = TRUE)
hangul_jamo(lo, hi, char_class = TRUE)
hangul_jamo_extended_a(lo, hi, char_class = TRUE)
hangul_jamo_extended_b(lo, hi, char_class = TRUE)
hangul_compatibility_jamo(lo, hi, char_class = TRUE)
hangul_syllables(lo, hi, char_class = TRUE)
hiragana(lo, hi, char_class = TRUE)
katakana(lo, hi, char_class = TRUE)
katakana_phonetic_extensions(lo, hi, char_class = TRUE)
kana_supplement(lo, hi, char_class = TRUE)
kanbun(lo, hi, char_class = TRUE)
lisu(lo, hi, char_class = TRUE)
miao(lo, hi, char_class = TRUE)
yi_syllables(lo, hi, char_class = TRUE)
yi_radicals(lo, hi, char_class = TRUE)
cherokee(lo, hi, char_class = TRUE)
deseret(lo, hi, char_class = TRUE)
unified_canadian_aboriginal_syllabics(lo, hi, char_class = TRUE)
unified_canadian_aboriginal_syllabics_extended(lo, hi, char_class = TRUE)
alphabetic_presentation_forms(lo, hi, char_class = TRUE)
halfwidth_and_fullwidth_forms(lo, hi, char_class = TRUE)
general_punctuation(lo, hi, char_class = TRUE)
latin_1_punctuation(lo, hi, char_class = TRUE)
small_form_variants(lo, hi, char_class = TRUE)
supplemental_punctuation(lo, hi, char_class = TRUE)
cjk_symbols_and_punctuation(lo, hi, char_class = TRUE)
cjk_compatibility_forms(lo, hi, char_class = TRUE)
fullwidth_ascii_punctuation(lo, hi, char_class = TRUE)
vertical_forms(lo, hi, char_class = TRUE)
letterlike_symbols(lo, hi, char_class = TRUE)
ancient_symbols(lo, hi, char_class = TRUE)
mathematical_alphanumeric_symbols(lo, hi, char_class = TRUE)
arabic_mathematical_alphanumeric_symbols(lo, hi, char_class = TRUE)
enclosed_alphanumerics(lo, hi, char_class = TRUE)
enclosed_alphanumeric_supplement(lo, hi, char_class = TRUE)
enclosed_cjk_letters_and_months(lo, hi, char_class = TRUE)
enclosed_ideographic_supplement(lo, hi, char_class = TRUE)
cjk_compatibility(lo, hi, char_class = TRUE)
miscellaneous_technical(lo, hi, char_class = TRUE)
control_pictures(lo, hi, char_class = TRUE)
optical_character_recognition(lo, hi, char_class = TRUE)
combining_diacritic_marks_for_symbols(lo, hi, char_class = TRUE)
aegean_numbers(lo, hi, char_class = TRUE)
ancient_greek_numbers(lo, hi, char_class = TRUE)
fullwidth_ascii_digits(lo, hi, char_class = TRUE)
common_indic_number_forms(lo, hi, char_class = TRUE)
coptic_epact_numbers(lo, hi, char_class = TRUE)
counting_rod_numerals(lo, hi, char_class = TRUE)
number_forms(lo, hi, char_class = TRUE)
rumi_numeral_symbols(lo, hi, char_class = TRUE)
sinhala_archaic_numbers(lo, hi, char_class = TRUE)
math_arrows(lo, hi, char_class = TRUE)
supplemental_arrows_a(lo, hi, char_class = TRUE)
supplemental_arrows_a(lo, hi, char_class = TRUE)
supplemental_arrows_a(lo, hi, char_class = TRUE)
additional_arrows(lo, hi, char_class = TRUE)
supplemental_mathematical_operators(lo, hi, char_class = TRUE)
miscellaneous_mathematical_symbols_a(lo, hi, char_class = TRUE)
miscellaneous_mathematical_symbols_b(lo, hi, char_class = TRUE)
floors_and_ceilings(lo, hi, char_class = TRUE)
invisible_operators(lo, hi, char_class = TRUE)
geometric_shapes(lo, hi, char_class = TRUE)
box_drawing(lo, hi, char_class = TRUE)
block_elements(lo, hi, char_class = TRUE)
geometric_shapes_extended(lo, hi, char_class = TRUE)
alchemical_symbols(lo, hi, char_class = TRUE)
braille_patterns(lo, hi, char_class = TRUE)
currency_symbols(lo, hi, char_class = TRUE)
dingbats(lo, hi, char_class = TRUE)
ornamental_dingbats(lo, hi, char_class = TRUE)
emoticons(lo, hi, char_class = TRUE)
chess_checkers_draughts(lo, hi, char_class = TRUE)
domino_tiles(lo, hi, char_class = TRUE)
japanese_chess(lo, hi, char_class = TRUE)
mahjong_tiles(lo, hi, char_class = TRUE)
playing_cards(lo, hi, char_class = TRUE)
card_suits(lo, hi, char_class = TRUE)
miscellaneous_symbols_and_pictographs(lo, hi, char_class = TRUE)
musical_symbols(lo, hi, char_class = TRUE)
ancient_greek_musical_notation(lo, hi, char_class = TRUE)
byzantine_musical_symbols(lo, hi, char_class = TRUE)
transport_and_map_symbols(lo, hi, char_class = TRUE)
yijing_mono_di_and_trigrams(lo, hi, char_class = TRUE)
yijing_hexagram_symbols(lo, hi, char_class = TRUE)
tai_xuan_jing_symbols(lo, hi, char_class = TRUE)
specials(lo, hi, char_class = TRUE)
tags(lo, hi, char_class = TRUE)
variation_selectors(lo, hi, char_class = TRUE)
variation_selectors_supplement(lo, hi, char_class = TRUE)
private_use_area(lo, hi, char_class = TRUE)
supplementary_private_use_area_a(lo, hi, char_class = TRUE)
supplementary_private_use_area_b(lo, hi, char_class = TRUE)
ARMENIAN
ARMENIAN_LIGATURES
CAUCASIAN_ALBANIAN
CYPRIOT_SYLLABARY
CYRILLIC
CYRILLIC_SUPPLEMENT
CYRILLIC_EXTENDED_A
CYRILLIC_EXTENDED_B
ELBASAN
GEORGIAN
GEORGIAN_SUPPLEMENT
GLAGOLITIC
GOTHIC
GREEK_AND_COPTIC
GREEK_EXTENDED
LATIN
LATIN_1_SUPPLEMENT
LATIN_EXTENDED_A
LATIN_EXTENDED_B
LATIN_EXTENDED_C
LATIN_EXTENDED_D
LATIN_EXTENDED_E
LATIN_EXTENDED_ADDITIONAL
LATIN_LIGATURES
LINEAR_A
LINEAR_B_SYLLABARY
LINEAR_B_IDEOGRAMS
OGHAM
OLD_ITALIC
OLD_PERMIC
PHAISTOS_DISC
RUNIC
SHAVIAN
DUPLOYAN
SHORTHAND_FORMAT_CONTROLS
IPA_EXTENSIONS
PHONETIC_EXTENSIONS
PHONETIC_EXTENSIONS_SUPPLEMENT
MODIFIER_TONE_LETTERS
SPACING_MODIFIER_LETTERS
SUPERSCRIPTS_AND_SUBSCRIPTS
COMBINING_DIACRITIC_MARKS
COMBINING_DIACRITIC_SUPPLEMENT
COMBINING_DIACRITIC_EXTENDED
COMBINING_HALF_MARKS
BAMUN
BAMUN_SUPPLEMENT
BASSA_VAH
COPTIC
COPTIC_EPACT_NUMBERS
EGYPTIAN_HIEROGLYPHS
ETHIOPIC
ETHIOPIC_SUPPLEMENT
ETHIOPIC_EXTENDED
ETHIOPIC_EXTENDED_A
MENDE_KIKAKUI
MEROITIC_CURSIVE
MEROITIC_HIEROGLYPHS
NKO
OSMANYA
TIFINAGH
VAI
ARABIC
ARABIC_SUPPLEMENT
ARABIC_EXTENDED_A
ARABIC_PRESENTATION_FORMS_A
ARABIC_PRESENTATION_FORMS_B
IMPERIAL_ARAMAIC
AVESTAN
CARIAN
CUNEIFORM
CUNEIFORM_NUMBERS_AND_PUNCTUATION
OLD_PERSIAN
UGARITIC
HEBREW
LYCIAN
LYDIAN
MANDAIC
NABATAEAN
OLD_NORTH_ARABIAN
OLD_SOUTH_ARABIAN
PAHLAVI_INSCRIPTIONAL
PAHLAVI_PSALTER
PALMYRENE
PHOENICIAN
SAMARITAN
SYRIAC
MANICHAEAN
MONGOLIAN
OLD_TURKIC
PHAGS_PA
TIBETAN
BENGALI_AND_ASSAMESE
BRAHMI
CHAKMA
DEVANAGARI
DEVANAGARI_EXTENDED
GRANTHA
GUJARATI
GURMUKHI
KAITHI
KANNADA
KHAROSHTHI
KHOJKI
KHUDAWADI
LEPCHA
LIMBU
MAHAJANI
MALAYALAM
MEETEI_MAYEK
MEETEI_MAYEK_EXTENSIONS
MODI
MRO
OL_CHIKI
ORIYA
SAURASHTRA
SHARADA
SIDDHAM
SINHALA
SINHALA_ARCHAIC_NUMBERS
SORA_SOMPENG
SYLOTI_NAGRI
TAKRI
TAMIL
TELUGU
THAANA
TIRHUTA
VEDIC_EXTENSIONS
WARANG_CITI
CHAM
KAYAH_LI
KHMER
KHMER_SYMBOLS
LAO
MYANMAR
MYANMAR_EXTENDED_A
MYANMAR_EXTENDED_B
NEW_TAI_LUE
PAHAWH_HMONG
PAU_CIN_HAU
TAI_LE
TAI_THAM
TAI_VIET
THAI
BALINESE
BATAK
BUGINESE
BUHID
HANUNOO
JAVANESE
REJANG
SUNDANESE
SUNDANESE_SUPPLEMENT
TAGALOG
TAGBANWA
BOPOMOFO
BOPOMOFO_EXTENDED
CJK_UNIFIED_IDEOGRAPHS
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
CJK_COMPATIBILITY_IDEOGRAPHS
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
KANGXI_RADICALS
KANGXI_RADICALS_SUPPLEMENT
CJK_STROKES
CJK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS
HANGUL_JAMO
HANGUL_JAMO_EXTENDED_A
HANGUL_JAMO_EXTENDED_B
HANGUL_COMPATIBILITY_JAMO
HANGUL_SYLLABLES
HIRAGANA
KATAKANA
KATAKANA_PHONETIC_EXTENSIONS
KANA_SUPPLEMENT
KANBUN
LISU
MIAO
YI_SYLLABLES
YI_RADICALS
CHEROKEE
DESERET
UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED
ALPHABETIC_PRESENTATION_FORMS
HALFWIDTH_AND_FULLWIDTH_FORMS
GENERAL_PUNCTUATION
LATIN_1_PUNCTUATION
SMALL_FORM_VARIANTS
SUPPLEMENTAL_PUNCTUATION
CJK_SYMBOLS_AND_PUNCTUATION
CJK_COMPATIBILITY_FORMS
FULLWIDTH_ASCII_PUNCTUATION
VERTICAL_FORMS
LETTERLIKE_SYMBOLS
ANCIENT_SYMBOLS
MATHEMATICAL_ALPHANUMERIC_SYMBOLS
ARABIC_MATHEMATICAL_ALPHANUMERIC_SYMBOLS
ENCLOSED_ALPHANUMERICS
ENCLOSED_ALPHANUMERIC_SUPPLEMENT
ENCLOSED_CJK_LETTERS_AND_MONTHS
ENCLOSED_IDEOGRAPHIC_SUPPLEMENT
CJK_COMPATIBILITY
MISCELLANEOUS_TECHNICAL
CONTROL_PICTURES
OPTICAL_CHARACTER_RECOGNITION
COMBINING_DIACRITIC_MARKS_FOR_SYMBOLS
AEGEAN_NUMBERS
ANCIENT_GREEK_NUMBERS
FULLWIDTH_ASCII_DIGITS
COMMON_INDIC_NUMBER_FORMS
COPTIC_EPACT_NUMBERS
COUNTING_ROD_NUMERALS
NUMBER_FORMS
RUMI_NUMERAL_SYMBOLS
SINHALA_ARCHAIC_NUMBERS
MATH_ARROWS
SUPPLEMENTAL_ARROWS_A
SUPPLEMENTAL_ARROWS_A
SUPPLEMENTAL_ARROWS_A
ADDITIONAL_ARROWS
SUPPLEMENTAL_MATHEMATICAL_OPERATORS
MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
FLOORS_AND_CEILINGS
INVISIBLE_OPERATORS
GEOMETRIC_SHAPES
BOX_DRAWING
BLOCK_ELEMENTS
GEOMETRIC_SHAPES_EXTENDED
ALCHEMICAL_SYMBOLS
BRAILLE_PATTERNS
CURRENCY_SYMBOLS
DINGBATS
ORNAMENTAL_DINGBATS
EMOTICONS
CHESS_CHECKERS_DRAUGHTS
DOMINO_TILES
JAPANESE_CHESS
MAHJONG_TILES
PLAYING_CARDS
CARD_SUITS
MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS
MUSICAL_SYMBOLS
ANCIENT_GREEK_MUSICAL_NOTATION
BYZANTINE_MUSICAL_SYMBOLS
TRANSPORT_AND_MAP_SYMBOLS
YIJING_MONO_DI_AND_TRIGRAMS
YIJING_HEXAGRAM_SYMBOLS
TAI_XUAN_JING_SYMBOLS
SPECIALS
TAGS
VARIATION_SELECTORS
VARIATION_SELECTORS_SUPPLEMENT
PRIVATE_USE_AREA
SUPPLEMENTARY_PRIVATE_USE_AREA_A
SUPPLEMENTARY_PRIVATE_USE_AREA_B
Arguments
lo
A non-negative integer. Minimum number of repeats, when grouped.
hi
positive integer. Maximum number of repeats, when grouped.
char_class
TRUE or FALSE. Should the values be wrapped into a character class?
Value

A character vector representing part or all of a regular expression.

Note

Windows currently doesn't handle Unicode points with more than four digits correctly. See https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16098

Format

An object of class regex (inherits from character) of length 1.

References

http://www.unicode.org/charts

See Also

ClassGroups

Aliases
  • ADDITIONAL_ARROWS
  • AEGEAN_NUMBERS
  • ALCHEMICAL_SYMBOLS
  • ALPHABETIC_PRESENTATION_FORMS
  • ANCIENT_GREEK_MUSICAL_NOTATION
  • ANCIENT_GREEK_NUMBERS
  • ANCIENT_SYMBOLS
  • ARABIC
  • ARABIC_EXTENDED_A
  • ARABIC_MATHEMATICAL_ALPHANUMERIC_SYMBOLS
  • ARABIC_PRESENTATION_FORMS_A
  • ARABIC_PRESENTATION_FORMS_B
  • ARABIC_SUPPLEMENT
  • ARMENIAN
  • ARMENIAN_LIGATURES
  • AVESTAN
  • BALINESE
  • BAMUN
  • BAMUN_SUPPLEMENT
  • BASSA_VAH
  • BATAK
  • BENGALI_AND_ASSAMESE
  • BLOCK_ELEMENTS
  • BOPOMOFO
  • BOPOMOFO_EXTENDED
  • BOX_DRAWING
  • BRAHMI
  • BRAILLE_PATTERNS
  • BUGINESE
  • BUHID
  • BYZANTINE_MUSICAL_SYMBOLS
  • CARD_SUITS
  • CARIAN
  • CAUCASIAN_ALBANIAN
  • CHAKMA
  • CHAM
  • CHEROKEE
  • CHESS_CHECKERS_DRAUGHTS
  • CJK_COMPATIBILITY
  • CJK_COMPATIBILITY_FORMS
  • CJK_COMPATIBILITY_IDEOGRAPHS
  • CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
  • CJK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS
  • CJK_STROKES
  • CJK_SYMBOLS_AND_PUNCTUATION
  • CJK_UNIFIED_IDEOGRAPHS
  • CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
  • CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
  • CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
  • CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
  • COMBINING_DIACRITIC_EXTENDED
  • COMBINING_DIACRITIC_MARKS
  • COMBINING_DIACRITIC_MARKS_FOR_SYMBOLS
  • COMBINING_DIACRITIC_SUPPLEMENT
  • COMBINING_HALF_MARKS
  • COMMON_INDIC_NUMBER_FORMS
  • CONTROL_PICTURES
  • COPTIC
  • COPTIC_EPACT_NUMBERS
  • COUNTING_ROD_NUMERALS
  • CUNEIFORM
  • CUNEIFORM_NUMBERS_AND_PUNCTUATION
  • CURRENCY_SYMBOLS
  • CYPRIOT_SYLLABARY
  • CYRILLIC
  • CYRILLIC_EXTENDED_A
  • CYRILLIC_EXTENDED_B
  • CYRILLIC_SUPPLEMENT
  • DESERET
  • DEVANAGARI
  • DEVANAGARI_EXTENDED
  • DINGBATS
  • DOMINO_TILES
  • DUPLOYAN
  • EGYPTIAN_HIEROGLYPHS
  • ELBASAN
  • EMOTICONS
  • ENCLOSED_ALPHANUMERICS
  • ENCLOSED_ALPHANUMERIC_SUPPLEMENT
  • ENCLOSED_CJK_LETTERS_AND_MONTHS
  • ENCLOSED_IDEOGRAPHIC_SUPPLEMENT
  • ETHIOPIC
  • ETHIOPIC_EXTENDED
  • ETHIOPIC_EXTENDED_A
  • ETHIOPIC_SUPPLEMENT
  • FLOORS_AND_CEILINGS
  • FULLWIDTH_ASCII_DIGITS
  • FULLWIDTH_ASCII_PUNCTUATION
  • GENERAL_PUNCTUATION
  • GEOMETRIC_SHAPES
  • GEOMETRIC_SHAPES_EXTENDED
  • GEORGIAN
  • GEORGIAN_SUPPLEMENT
  • GLAGOLITIC
  • GOTHIC
  • GRANTHA
  • GREEK_AND_COPTIC
  • GREEK_EXTENDED
  • GUJARATI
  • GURMUKHI
  • HALFWIDTH_AND_FULLWIDTH_FORMS
  • HANGUL_COMPATIBILITY_JAMO
  • HANGUL_JAMO
  • HANGUL_JAMO_EXTENDED_A
  • HANGUL_JAMO_EXTENDED_B
  • HANGUL_SYLLABLES
  • HANUNOO
  • HEBREW
  • HIRAGANA
  • IMPERIAL_ARAMAIC
  • INVISIBLE_OPERATORS
  • IPA_EXTENSIONS
  • JAPANESE_CHESS
  • JAVANESE
  • KAITHI
  • KANA_SUPPLEMENT
  • KANBUN
  • KANGXI_RADICALS
  • KANGXI_RADICALS_SUPPLEMENT
  • KANNADA
  • KATAKANA
  • KATAKANA_PHONETIC_EXTENSIONS
  • KAYAH_LI
  • KHAROSHTHI
  • KHMER
  • KHMER_SYMBOLS
  • KHOJKI
  • KHUDAWADI
  • LAO
  • LATIN
  • LATIN_1_PUNCTUATION
  • LATIN_1_SUPPLEMENT
  • LATIN_EXTENDED_A
  • LATIN_EXTENDED_ADDITIONAL
  • LATIN_EXTENDED_B
  • LATIN_EXTENDED_C
  • LATIN_EXTENDED_D
  • LATIN_EXTENDED_E
  • LATIN_LIGATURES
  • LEPCHA
  • LETTERLIKE_SYMBOLS
  • LIMBU
  • LINEAR_A
  • LINEAR_B_IDEOGRAMS
  • LINEAR_B_SYLLABARY
  • LISU
  • LYCIAN
  • LYDIAN
  • MAHAJANI
  • MAHJONG_TILES
  • MALAYALAM
  • MANDAIC
  • MANICHAEAN
  • MATHEMATICAL_ALPHANUMERIC_SYMBOLS
  • MATH_ARROWS
  • MEETEI_MAYEK
  • MEETEI_MAYEK_EXTENSIONS
  • MENDE_KIKAKUI
  • MEROITIC_CURSIVE
  • MEROITIC_HIEROGLYPHS
  • MIAO
  • MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
  • MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
  • MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS
  • MISCELLANEOUS_TECHNICAL
  • MODI
  • MODIFIER_TONE_LETTERS
  • MONGOLIAN
  • MRO
  • MUSICAL_SYMBOLS
  • MYANMAR
  • MYANMAR_EXTENDED_A
  • MYANMAR_EXTENDED_B
  • NABATAEAN
  • NEW_TAI_LUE
  • NKO
  • NUMBER_FORMS
  • OGHAM
  • OLD_ITALIC
  • OLD_NORTH_ARABIAN
  • OLD_PERMIC
  • OLD_PERSIAN
  • OLD_SOUTH_ARABIAN
  • OLD_TURKIC
  • OL_CHIKI
  • OPTICAL_CHARACTER_RECOGNITION
  • ORIYA
  • ORNAMENTAL_DINGBATS
  • OSMANYA
  • PAHAWH_HMONG
  • PAHLAVI_INSCRIPTIONAL
  • PAHLAVI_PSALTER
  • PALMYRENE
  • PAU_CIN_HAU
  • PHAGS_PA
  • PHAISTOS_DISC
  • PHOENICIAN
  • PHONETIC_EXTENSIONS
  • PHONETIC_EXTENSIONS_SUPPLEMENT
  • PLAYING_CARDS
  • PRIVATE_USE_AREA
  • REJANG
  • RUMI_NUMERAL_SYMBOLS
  • RUNIC
  • SAMARITAN
  • SAURASHTRA
  • SHARADA
  • SHAVIAN
  • SHORTHAND_FORMAT_CONTROLS
  • SIDDHAM
  • SINHALA
  • SINHALA_ARCHAIC_NUMBERS
  • SMALL_FORM_VARIANTS
  • SORA_SOMPENG
  • SPACING_MODIFIER_LETTERS
  • SPECIALS
  • SUNDANESE
  • SUNDANESE_SUPPLEMENT
  • SUPERSCRIPTS_AND_SUBSCRIPTS
  • SUPPLEMENTAL_ARROWS_A
  • SUPPLEMENTAL_MATHEMATICAL_OPERATORS
  • SUPPLEMENTAL_PUNCTUATION
  • SUPPLEMENTARY_PRIVATE_USE_AREA_A
  • SUPPLEMENTARY_PRIVATE_USE_AREA_B
  • SYLOTI_NAGRI
  • SYRIAC
  • TAGALOG
  • TAGBANWA
  • TAGS
  • TAI_LE
  • TAI_THAM
  • TAI_VIET
  • TAI_XUAN_JING_SYMBOLS
  • TAKRI
  • TAMIL
  • TELUGU
  • THAANA
  • THAI
  • TIBETAN
  • TIFINAGH
  • TIRHUTA
  • TRANSPORT_AND_MAP_SYMBOLS
  • UGARITIC
  • UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
  • UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED
  • Unicode
  • VAI
  • VARIATION_SELECTORS
  • VARIATION_SELECTORS_SUPPLEMENT
  • VEDIC_EXTENSIONS
  • VERTICAL_FORMS
  • WARANG_CITI
  • YIJING_HEXAGRAM_SYMBOLS
  • YIJING_MONO_DI_AND_TRIGRAMS
  • YI_RADICALS
  • YI_SYLLABLES
  • additional_arrows
  • aegean_numbers
  • alchemical_symbols
  • alphabetic_presentation_forms
  • ancient_greek_musical_notation
  • ancient_greek_numbers
  • ancient_symbols
  • arabic
  • arabic_extended_a
  • arabic_mathematical_alphanumeric_symbols
  • arabic_presentation_forms_a
  • arabic_presentation_forms_b
  • arabic_supplement
  • armenian
  • armenian_ligatures
  • avestan
  • balinese
  • bamun
  • bamun_supplement
  • bassa_vah
  • batak
  • bengali_and_assamese
  • block_elements
  • bopomofo
  • bopomofo_extended
  • box_drawing
  • brahmi
  • braille_patterns
  • buginese
  • buhid
  • byzantine_musical_symbols
  • card_suits
  • carian
  • caucasian_albanian
  • chakma
  • cham
  • cherokee
  • chess_checkers_draughts
  • cjk_compatibility
  • cjk_compatibility_forms
  • cjk_compatibility_ideographs
  • cjk_compatibility_ideographs_supplement
  • cjk_ideographic_description_characters
  • cjk_strokes
  • cjk_symbols_and_punctuation
  • cjk_unified_ideographs
  • cjk_unified_ideographs_extension_a
  • cjk_unified_ideographs_extension_b
  • cjk_unified_ideographs_extension_c
  • cjk_unified_ideographs_extension_d
  • combining_diacritic_extended
  • combining_diacritic_marks
  • combining_diacritic_marks_for_symbols
  • combining_diacritic_supplement
  • combining_half_marks
  • common_indic_number_forms
  • control_pictures
  • coptic
  • coptic_epact_numbers
  • counting_rod_numerals
  • cuneiform
  • cuneiform_numbers_and_punctuation
  • currency_symbols
  • cypriot_syllabary
  • cyrillic
  • cyrillic_extended_a
  • cyrillic_extended_b
  • cyrillic_supplement
  • deseret
  • devanagari
  • devanagari_extended
  • dingbats
  • domino_tiles
  • duployan
  • egyptian_hieroglyphs
  • elbasan
  • emoticons
  • enclosed_alphanumeric_supplement
  • enclosed_alphanumerics
  • enclosed_cjk_letters_and_months
  • enclosed_ideographic_supplement
  • ethiopic
  • ethiopic_extended
  • ethiopic_extended_a
  • ethiopic_supplement
  • floors_and_ceilings
  • fullwidth_ascii_digits
  • fullwidth_ascii_punctuation
  • general_punctuation
  • geometric_shapes
  • geometric_shapes_extended
  • georgian
  • georgian_supplement
  • glagolitic
  • gothic
  • grantha
  • greek_and_coptic
  • greek_extended
  • gujarati
  • gurmukhi
  • halfwidth_and_fullwidth_forms
  • hangul_compatibility_jamo
  • hangul_jamo
  • hangul_jamo_extended_a
  • hangul_jamo_extended_b
  • hangul_syllables
  • hanunoo
  • hebrew
  • hiragana
  • imperial_aramaic
  • invisible_operators
  • ipa_extensions
  • japanese_chess
  • javanese
  • kaithi
  • kana_supplement
  • kanbun
  • kangxi_radicals
  • kangxi_radicals_supplement
  • kannada
  • katakana
  • katakana_phonetic_extensions
  • kayah_li
  • kharoshthi
  • khmer
  • khmer_symbols
  • khojki
  • khudawadi
  • lao
  • latin
  • latin_1_punctuation
  • latin_1_supplement
  • latin_extended_a
  • latin_extended_additional
  • latin_extended_b
  • latin_extended_c
  • latin_extended_d
  • latin_extended_e
  • latin_ligatures
  • lepcha
  • letterlike_symbols
  • limbu
  • linear_a
  • linear_b_ideograms
  • linear_b_syllabary
  • lisu
  • lycian
  • lydian
  • mahajani
  • mahjong_tiles
  • malayalam
  • mandaic
  • manichaean
  • math_arrows
  • mathematical_alphanumeric_symbols
  • meetei_mayek
  • meetei_mayek_extensions
  • mende_kikakui
  • meroitic_cursive
  • meroitic_hieroglyphs
  • miao
  • miscellaneous_mathematical_symbols_a
  • miscellaneous_mathematical_symbols_b
  • miscellaneous_symbols_and_pictographs
  • miscellaneous_technical
  • modi
  • modifier_tone_letters
  • mongolian
  • mro
  • musical_symbols
  • myanmar
  • myanmar_extended_a
  • myanmar_extended_b
  • nabataean
  • new_tai_lue
  • nko
  • number_forms
  • ogham
  • ol_chiki
  • old_italic
  • old_north_arabian
  • old_permic
  • old_persian
  • old_south_arabian
  • old_turkic
  • optical_character_recognition
  • oriya
  • ornamental_dingbats
  • osmanya
  • pahawh_hmong
  • pahlavi_inscriptional
  • pahlavi_psalter
  • palmyrene
  • pau_cin_hau
  • phags_pa
  • phaistos_disc
  • phoenician
  • phonetic_extensions
  • phonetic_extensions_supplement
  • playing_cards
  • private_use_area
  • rejang
  • rumi_numeral_symbols
  • runic
  • samaritan
  • saurashtra
  • sharada
  • shavian
  • shorthand_format_controls
  • siddham
  • sinhala
  • sinhala_archaic_numbers
  • small_form_variants
  • sora_sompeng
  • spacing_modifier_letters
  • specials
  • sundanese
  • sundanese_supplement
  • superscripts_and_subscripts
  • supplemental_arrows_a
  • supplemental_mathematical_operators
  • supplemental_punctuation
  • supplementary_private_use_area_a
  • supplementary_private_use_area_b
  • syloti_nagri
  • syriac
  • tagalog
  • tagbanwa
  • tags
  • tai_le
  • tai_tham
  • tai_viet
  • tai_xuan_jing_symbols
  • takri
  • tamil
  • telugu
  • thaana
  • thai
  • tibetan
  • tifinagh
  • tirhuta
  • transport_and_map_symbols
  • ugaritic
  • unified_canadian_aboriginal_syllabics
  • unified_canadian_aboriginal_syllabics_extended
  • vai
  • variation_selectors
  • variation_selectors_supplement
  • vedic_extensions
  • vertical_forms
  • warang_citi
  • yi_radicals
  • yi_syllables
  • yijing_hexagram_symbols
  • yijing_mono_di_and_trigrams
Examples
# Classes
latin()
greek_and_coptic()
cyrillic()
arabic()

# With repetition
hebrew(3, 6)
hiragana(1, Inf)
katakana(0, Inf)

# Without a class wrapper
cjk_unified_ideographs(char_class = FALSE)

# Constants
ARMENIAN
LINEAR_B_IDEOGRAMS
DUPLOYAN
OSMANYA

## Not run: 
# # All the Unicode characer classes
# # Not run, since it generates lots of output
# setdiff(
#   ls("package:rebus.unicode", pattern = lower()), 
#   ls(
#     "package:rebus.unicode", 
#     pattern = START %R% case_insensitive(or("up", "ugc", "unicode")))
# )
# ## End(Not run)

# Usage
pythag <- "\u03b1^2 + \u03b2^2 = \u03b3^2"
stringi::stri_extract_all_regex(pythag, greek_and_coptic())
Documentation reproduced from package rebus.unicode, version 0.0-2, License: Unlimited

Community examples

Looks like there are no examples yet.