[M] Change in code/re2[abseil]: Update Unicode data to 15.0.0.

5 views
Skip to first unread message

Paul Wankadia (Gerrit)

unread,
Sep 14, 2022, 9:00:08 AM9/14/22
to Paul Wankadia, re2...@googlegroups.com

Paul Wankadia has uploaded this change for review.

View Change

Update Unicode data to 15.0.0.

Change-Id: I73650fd2844d4a892275b4234cfde343152e415a
---
M doc/syntax.html
M doc/syntax.txt
M re2/unicode.py
M re2/unicode_groups.cc
4 files changed, 164 insertions(+), 76 deletions(-)

diff --git a/doc/syntax.html b/doc/syntax.html
index f0e0138..eed4fd2 100644
--- a/doc/syntax.html
+++ b/doc/syntax.html
@@ -303,6 +303,7 @@
<tr><td colspan=2>Kaithi</td></tr>
<tr><td colspan=2>Kannada</td></tr>
<tr><td colspan=2>Katakana</td></tr>
+<tr><td colspan=2>Kawi</td></tr>
<tr><td colspan=2>Kayah_Li</td></tr>
<tr><td colspan=2>Kharoshthi</td></tr>
<tr><td colspan=2>Khitan_Small_Script</td></tr>
@@ -337,6 +338,7 @@
<tr><td colspan=2>Multani</td></tr>
<tr><td colspan=2>Myanmar</td></tr>
<tr><td colspan=2>Nabataean</td></tr>
+<tr><td colspan=2>Nag_Mundari</td></tr>
<tr><td colspan=2>Nandinagari</td></tr>
<tr><td colspan=2>New_Tai_Lue</td></tr>
<tr><td colspan=2>Newa</td></tr>
diff --git a/doc/syntax.txt b/doc/syntax.txt
index c12a482..5bb2067 100644
--- a/doc/syntax.txt
+++ b/doc/syntax.txt
@@ -292,6 +292,7 @@
Kaithi
Kannada
Katakana
+Kawi
Kayah_Li
Kharoshthi
Khitan_Small_Script
@@ -326,6 +327,7 @@
Multani
Myanmar
Nabataean
+Nag_Mundari
Nandinagari
New_Tai_Lue
Newa
diff --git a/re2/unicode.py b/re2/unicode.py
index e329383..1b68cbe 100644
--- a/re2/unicode.py
+++ b/re2/unicode.py
@@ -13,7 +13,7 @@
import urllib.request

# Directory or URL where Unicode tables reside.
-_UNICODE_DIR = "https://www.unicode.org/Public/14.0.0/ucd"
+_UNICODE_DIR = "https://www.unicode.org/Public/15.0.0/ucd"

# Largest valid Unicode code value.
_RUNE_MAX = 0x10FFFF
diff --git a/re2/unicode_groups.cc b/re2/unicode_groups.cc
index 2a8d7da..3b58be4 100644
--- a/re2/unicode_groups.cc
+++ b/re2/unicode_groups.cc
@@ -29,7 +29,7 @@
static const URange32 C_range32[] = {
{ 69821, 69821 },
{ 69837, 69837 },
- { 78896, 78904 },
+ { 78896, 78911 },
{ 113824, 113827 },
{ 119155, 119162 },
{ 917505, 917505 },
@@ -60,7 +60,7 @@
static const URange32 Cf_range32[] = {
{ 69821, 69821 },
{ 69837, 69837 },
- { 78896, 78904 },
+ { 78896, 78911 },
{ 113824, 113827 },
{ 119155, 119162 },
{ 917505, 917505 },
@@ -548,6 +548,7 @@
{ 70108, 70108 },
{ 70144, 70161 },
{ 70163, 70187 },
+ { 70207, 70208 },
{ 70272, 70278 },
{ 70280, 70280 },
{ 70282, 70285 },
@@ -610,11 +611,15 @@
{ 73066, 73097 },
{ 73112, 73112 },
{ 73440, 73458 },
+ { 73474, 73474 },
+ { 73476, 73488 },
+ { 73490, 73523 },
{ 73648, 73648 },
{ 73728, 74649 },
{ 74880, 75075 },
{ 77712, 77808 },
- { 77824, 78894 },
+ { 77824, 78895 },
+ { 78913, 78918 },
{ 82944, 83526 },
{ 92160, 92728 },
{ 92736, 92766 },
@@ -637,7 +642,9 @@
{ 110581, 110587 },
{ 110589, 110590 },
{ 110592, 110882 },
+ { 110898, 110898 },
{ 110928, 110930 },
+ { 110933, 110933 },
{ 110948, 110951 },
{ 110960, 111355 },
{ 113664, 113770 },
@@ -675,11 +682,14 @@
{ 120746, 120770 },
{ 120772, 120779 },
{ 122624, 122654 },
+ { 122661, 122666 },
+ { 122928, 122989 },
{ 123136, 123180 },
{ 123191, 123197 },
{ 123214, 123214 },
{ 123536, 123565 },
{ 123584, 123627 },
+ { 124112, 124139 },
{ 124896, 124902 },
{ 124904, 124907 },
{ 124909, 124910 },
@@ -721,12 +731,13 @@
{ 126629, 126633 },
{ 126635, 126651 },
{ 131072, 173791 },
- { 173824, 177976 },
+ { 173824, 177977 },
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
{ 194560, 195101 },
{ 196608, 201546 },
+ { 201552, 205743 },
};
static const URange16 Ll_range16[] = {
{ 97, 122 },
@@ -1387,6 +1398,7 @@
{ 120779, 120779 },
{ 122624, 122633 },
{ 122635, 122654 },
+ { 122661, 122666 },
{ 125218, 125251 },
};
static const URange16 Lm_range16[] = {
@@ -1459,7 +1471,9 @@
{ 110576, 110579 },
{ 110581, 110587 },
{ 110589, 110590 },
+ { 122928, 122989 },
{ 123191, 123197 },
+ { 124139, 124139 },
{ 125259, 125259 },
};
static const URange16 Lo_range16[] = {
@@ -1829,6 +1843,7 @@
{ 70108, 70108 },
{ 70144, 70161 },
{ 70163, 70187 },
+ { 70207, 70208 },
{ 70272, 70278 },
{ 70280, 70280 },
{ 70282, 70285 },
@@ -1890,11 +1905,15 @@
{ 73066, 73097 },
{ 73112, 73112 },
{ 73440, 73458 },
+ { 73474, 73474 },
+ { 73476, 73488 },
+ { 73490, 73523 },
{ 73648, 73648 },
{ 73728, 74649 },
{ 74880, 75075 },
{ 77712, 77808 },
- { 77824, 78894 },
+ { 77824, 78895 },
+ { 78913, 78918 },
{ 82944, 83526 },
{ 92160, 92728 },
{ 92736, 92766 },
@@ -1909,7 +1928,9 @@
{ 100352, 101589 },
{ 101632, 101640 },
{ 110592, 110882 },
+ { 110898, 110898 },
{ 110928, 110930 },
+ { 110933, 110933 },
{ 110948, 110951 },
{ 110960, 111355 },
{ 113664, 113770 },
@@ -1921,6 +1942,7 @@
{ 123214, 123214 },
{ 123536, 123565 },
{ 123584, 123627 },
+ { 124112, 124138 },
{ 124896, 124902 },
{ 124904, 124907 },
{ 124909, 124910 },
@@ -1960,12 +1982,13 @@
{ 126629, 126633 },
{ 126635, 126651 },
{ 131072, 173791 },
- { 173824, 177976 },
+ { 173824, 177977 },
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
{ 194560, 195101 },
{ 196608, 201546 },
+ { 201552, 205743 },
};
static const URange16 Lt_range16[] = {
{ 453, 453 },
@@ -2710,6 +2733,7 @@
{ 3274, 3277 },
{ 3285, 3286 },
{ 3298, 3299 },
+ { 3315, 3315 },
{ 3328, 3331 },
{ 3387, 3388 },
{ 3390, 3396 },
@@ -2728,7 +2752,7 @@
{ 3655, 3662 },
{ 3761, 3761 },
{ 3764, 3772 },
- { 3784, 3789 },
+ { 3784, 3790 },
{ 3864, 3865 },
{ 3893, 3893 },
{ 3895, 3895 },
@@ -2832,6 +2856,7 @@
{ 68325, 68326 },
{ 68900, 68903 },
{ 69291, 69292 },
+ { 69373, 69375 },
{ 69446, 69456 },
{ 69506, 69509 },
{ 69632, 69634 },
@@ -2851,6 +2876,7 @@
{ 70094, 70095 },
{ 70188, 70199 },
{ 70206, 70206 },
+ { 70209, 70209 },
{ 70367, 70378 },
{ 70400, 70403 },
{ 70459, 70460 },
@@ -2898,6 +2924,12 @@
{ 73104, 73105 },
{ 73107, 73111 },
{ 73459, 73462 },
+ { 73472, 73473 },
+ { 73475, 73475 },
+ { 73524, 73530 },
+ { 73534, 73538 },
+ { 78912, 78912 },
+ { 78919, 78933 },
{ 92912, 92916 },
{ 92976, 92982 },
{ 94031, 94031 },
@@ -2925,9 +2957,11 @@
{ 122907, 122913 },
{ 122915, 122916 },
{ 122918, 122922 },
+ { 123023, 123023 },
{ 123184, 123190 },
{ 123566, 123566 },
{ 123628, 123631 },
+ { 124140, 124143 },
{ 125136, 125142 },
{ 125252, 125258 },
{ 917760, 917999 },
@@ -2968,6 +3002,7 @@
{ 3271, 3272 },
{ 3274, 3275 },
{ 3285, 3286 },
+ { 3315, 3315 },
{ 3330, 3331 },
{ 3390, 3392 },
{ 3398, 3400 },
@@ -3108,6 +3143,10 @@
{ 73107, 73108 },
{ 73110, 73110 },
{ 73461, 73462 },
+ { 73475, 73475 },
+ { 73524, 73525 },
+ { 73534, 73535 },
+ { 73537, 73537 },
{ 94033, 94087 },
{ 94192, 94193 },
{ 119141, 119142 },
@@ -3213,7 +3252,7 @@
{ 3655, 3662 },
{ 3761, 3761 },
{ 3764, 3772 },
- { 3784, 3789 },
+ { 3784, 3790 },
{ 3864, 3865 },
{ 3893, 3893 },
{ 3895, 3895 },
@@ -3346,6 +3385,7 @@
{ 68325, 68326 },
{ 68900, 68903 },
{ 69291, 69292 },
+ { 69373, 69375 },
{ 69446, 69456 },
{ 69506, 69509 },
{ 69633, 69633 },
@@ -3368,6 +3408,7 @@
{ 70196, 70196 },
{ 70198, 70199 },
{ 70206, 70206 },
+ { 70209, 70209 },
{ 70367, 70367 },
{ 70371, 70378 },
{ 70400, 70401 },
@@ -3429,6 +3470,12 @@
{ 73109, 73109 },
{ 73111, 73111 },
{ 73459, 73460 },
+ { 73472, 73473 },
+ { 73526, 73530 },
+ { 73536, 73536 },
+ { 73538, 73538 },
+ { 78912, 78912 },
+ { 78919, 78933 },
{ 92912, 92916 },
{ 92976, 92982 },
{ 94031, 94031 },
@@ -3453,9 +3500,11 @@
{ 122907, 122913 },
{ 122915, 122916 },
{ 122918, 122922 },
+ { 123023, 123023 },
{ 123184, 123190 },
{ 123566, 123566 },
{ 123628, 123631 },
+ { 124140, 124143 },
{ 125136, 125142 },
{ 125252, 125258 },
{ 917760, 917999 },
@@ -3576,6 +3625,7 @@
{ 72784, 72812 },
{ 73040, 73049 },
{ 73120, 73129 },
+ { 73552, 73561 },
{ 73664, 73684 },
{ 74752, 74862 },
{ 92768, 92777 },
@@ -3583,11 +3633,13 @@
{ 93008, 93017 },
{ 93019, 93025 },
{ 93824, 93846 },
+ { 119488, 119507 },
{ 119520, 119539 },
{ 119648, 119672 },
{ 120782, 120831 },
{ 123200, 123209 },
{ 123632, 123641 },
+ { 124144, 124153 },
{ 125127, 125135 },
{ 125264, 125273 },
{ 126065, 126123 },
@@ -3655,12 +3707,14 @@
{ 72784, 72793 },
{ 73040, 73049 },
{ 73120, 73129 },
+ { 73552, 73561 },
{ 92768, 92777 },
{ 92864, 92873 },
{ 93008, 93017 },
{ 120782, 120831 },
{ 123200, 123209 },
{ 123632, 123641 },
+ { 124144, 124153 },
{ 125264, 125273 },
{ 130032, 130041 },
};
@@ -3745,6 +3799,7 @@
{ 73664, 73684 },
{ 93019, 93025 },
{ 93824, 93846 },
+ { 119488, 119507 },
{ 119520, 119539 },
{ 119648, 119672 },
{ 125127, 125135 },
@@ -3932,9 +3987,11 @@
{ 72255, 72262 },
{ 72346, 72348 },
{ 72350, 72354 },
+ { 72448, 72457 },
{ 72769, 72773 },
{ 72816, 72817 },
{ 73463, 73464 },
+ { 73539, 73551 },
{ 73727, 73727 },
{ 74864, 74868 },
{ 77809, 77810 },
@@ -4255,9 +4312,11 @@
{ 72255, 72262 },
{ 72346, 72348 },
{ 72350, 72354 },
+ { 72448, 72457 },
{ 72769, 72773 },
{ 72816, 72817 },
{ 73463, 73464 },
+ { 73539, 73551 },
{ 73727, 73727 },
{ 74864, 74868 },
{ 77809, 77810 },
@@ -4564,10 +4623,10 @@
{ 127568, 127569 },
{ 127584, 127589 },
{ 127744, 128727 },
- { 128733, 128748 },
+ { 128732, 128748 },
{ 128752, 128764 },
- { 128768, 128883 },
- { 128896, 128984 },
+ { 128768, 128886 },
+ { 128891, 128985 },
{ 128992, 129003 },
{ 129008, 129008 },
{ 129024, 129035 },
@@ -4578,15 +4637,13 @@
{ 129200, 129201 },
{ 129280, 129619 },
{ 129632, 129645 },
- { 129648, 129652 },
- { 129656, 129660 },
- { 129664, 129670 },
- { 129680, 129708 },
- { 129712, 129722 },
- { 129728, 129733 },
- { 129744, 129753 },
- { 129760, 129767 },
- { 129776, 129782 },
+ { 129648, 129660 },
+ { 129664, 129672 },
+ { 129680, 129725 },
+ { 129727, 129733 },
+ { 129742, 129755 },
+ { 129760, 129768 },
+ { 129776, 129784 },
{ 129792, 129938 },
{ 129940, 129994 },
};
@@ -4882,10 +4939,10 @@
{ 127584, 127589 },
{ 127744, 127994 },
{ 128000, 128727 },
- { 128733, 128748 },
+ { 128732, 128748 },
{ 128752, 128764 },
- { 128768, 128883 },
- { 128896, 128984 },
+ { 128768, 128886 },
+ { 128891, 128985 },
{ 128992, 129003 },
{ 129008, 129008 },
{ 129024, 129035 },
@@ -4896,15 +4953,13 @@
{ 129200, 129201 },
{ 129280, 129619 },
{ 129632, 129645 },
- { 129648, 129652 },
- { 129656, 129660 },
- { 129664, 129670 },
- { 129680, 129708 },
- { 129712, 129722 },
- { 129728, 129733 },
- { 129744, 129753 },
- { 129760, 129767 },
- { 129776, 129782 },
+ { 129648, 129660 },
+ { 129664, 129672 },
+ { 129680, 129725 },
+ { 129727, 129733 },
+ { 129742, 129755 },
+ { 129760, 129768 },
+ { 129776, 129784 },
{ 129792, 129938 },
{ 129940, 129994 },
};
@@ -4972,6 +5027,7 @@
};
static const URange32 Arabic_range32[] = {
{ 69216, 69246 },
+ { 69373, 69375 },
{ 126464, 126467 },
{ 126469, 126495 },
{ 126497, 126498 },
@@ -5218,6 +5274,7 @@
{ 119171, 119172 },
{ 119180, 119209 },
{ 119214, 119274 },
+ { 119488, 119507 },
{ 119520, 119539 },
{ 119552, 119638 },
{ 119648, 119672 },
@@ -5258,10 +5315,10 @@
{ 127568, 127569 },
{ 127584, 127589 },
{ 127744, 128727 },
- { 128733, 128748 },
+ { 128732, 128748 },
{ 128752, 128764 },
- { 128768, 128883 },
- { 128896, 128984 },
+ { 128768, 128886 },
+ { 128891, 128985 },
{ 128992, 129003 },
{ 129008, 129008 },
{ 129024, 129035 },
@@ -5272,15 +5329,13 @@
{ 129200, 129201 },
{ 129280, 129619 },
{ 129632, 129645 },
- { 129648, 129652 },
- { 129656, 129660 },
- { 129664, 129670 },
- { 129680, 129708 },
- { 129712, 129722 },
- { 129728, 129733 },
- { 129744, 129753 },
- { 129760, 129767 },
- { 129776, 129782 },
+ { 129648, 129660 },
+ { 129664, 129672 },
+ { 129680, 129725 },
+ { 129727, 129733 },
+ { 129742, 129755 },
+ { 129760, 129768 },
+ { 129776, 129784 },
{ 129792, 129938 },
{ 129940, 129994 },
{ 130032, 130041 },
@@ -5319,6 +5374,10 @@
{ 42560, 42655 },
{ 65070, 65071 },
};
+static const URange32 Cyrillic_range32[] = {
+ { 122928, 122989 },
+ { 123023, 123023 },
+};
static const URange32 Deseret_range32[] = {
{ 66560, 66639 },
};
@@ -5328,6 +5387,9 @@
{ 2406, 2431 },
{ 43232, 43263 },
};
+static const URange32 Devanagari_range32[] = {
+ { 72448, 72457 },
+};
static const URange32 Dives_Akuru_range32[] = {
{ 71936, 71942 },
{ 71945, 71945 },
@@ -5349,8 +5411,7 @@
{ 113820, 113823 },
};
static const URange32 Egyptian_Hieroglyphs_range32[] = {
- { 77824, 78894 },
- { 78896, 78904 },
+ { 77824, 78933 },
};
static const URange32 Elbasan_range32[] = {
{ 66816, 66855 },
@@ -5539,12 +5600,13 @@
{ 94178, 94179 },
{ 94192, 94193 },
{ 131072, 173791 },
- { 173824, 177976 },
+ { 173824, 177977 },
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
{ 194560, 195101 },
{ 196608, 201546 },
+ { 201552, 205743 },
};
static const URange16 Hangul_range16[] = {
{ 4352, 4607 },
@@ -5591,6 +5653,7 @@
};
static const URange32 Hiragana_range32[] = {
{ 110593, 110879 },
+ { 110898, 110898 },
{ 110928, 110930 },
{ 127488, 127488 },
};
@@ -5661,7 +5724,7 @@
{ 3293, 3294 },
{ 3296, 3299 },
{ 3302, 3311 },
- { 3313, 3314 },
+ { 3313, 3315 },
};
static const URange16 Katakana_range16[] = {
{ 12449, 12538 },
@@ -5678,8 +5741,14 @@
{ 110589, 110590 },
{ 110592, 110592 },
{ 110880, 110882 },
+ { 110933, 110933 },
{ 110948, 110951 },
};
+static const URange32 Kawi_range32[] = {
+ { 73472, 73488 },
+ { 73490, 73530 },
+ { 73534, 73561 },
+};
static const URange16 Kayah_Li_range16[] = {
{ 43264, 43309 },
{ 43311, 43311 },
@@ -5706,7 +5775,7 @@
};
static const URange32 Khojki_range32[] = {
{ 70144, 70161 },
- { 70163, 70206 },
+ { 70163, 70209 },
};
static const URange32 Khudawadi_range32[] = {
{ 70320, 70378 },
@@ -5721,7 +5790,7 @@
{ 3751, 3773 },
{ 3776, 3780 },
{ 3782, 3782 },
- { 3784, 3789 },
+ { 3784, 3790 },
{ 3792, 3801 },
{ 3804, 3807 },
};
@@ -5766,6 +5835,7 @@
{ 67463, 67504 },
{ 67506, 67514 },
{ 122624, 122654 },
+ { 122661, 122666 },
};
static const URange16 Lepcha_range16[] = {
{ 7168, 7223 },
@@ -5903,6 +5973,9 @@
{ 67712, 67742 },
{ 67751, 67759 },
};
+static const URange32 Nag_Mundari_range32[] = {
+ { 124112, 124153 },
+};
static const URange32 Nandinagari_range32[] = {
{ 72096, 72103 },
{ 72106, 72151 },
@@ -6229,12 +6302,12 @@
static const URange32 Zanabazar_Square_range32[] = {
{ 72192, 72263 },
};
-// 4038 16-bit ranges, 1712 32-bit ranges
+// 4040 16-bit ranges, 1775 32-bit ranges
const UGroup unicode_groups[] = {
{ "Adlam", +1, 0, 0, Adlam_range32, 3 },
{ "Ahom", +1, 0, 0, Ahom_range32, 3 },
{ "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 },
- { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 },
+ { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 36 },
{ "Armenian", +1, Armenian_range16, 4, 0, 0 },
{ "Avestan", +1, 0, 0, Avestan_range32, 2 },
{ "Balinese", +1, Balinese_range16, 2, 0, 0 },
@@ -6259,19 +6332,19 @@
{ "Cherokee", +1, Cherokee_range16, 3, 0, 0 },
{ "Chorasmian", +1, 0, 0, Chorasmian_range32, 1 },
{ "Co", +1, Co_range16, 1, Co_range32, 2 },
- { "Common", +1, Common_range16, 91, Common_range32, 83 },
+ { "Common", +1, Common_range16, 91, Common_range32, 82 },
{ "Coptic", +1, Coptic_range16, 3, 0, 0 },
{ "Cs", +1, Cs_range16, 1, 0, 0 },
{ "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 },
{ "Cypriot", +1, 0, 0, Cypriot_range32, 6 },
{ "Cypro_Minoan", +1, 0, 0, Cypro_Minoan_range32, 1 },
- { "Cyrillic", +1, Cyrillic_range16, 8, 0, 0 },
+ { "Cyrillic", +1, Cyrillic_range16, 8, Cyrillic_range32, 2 },
{ "Deseret", +1, 0, 0, Deseret_range32, 1 },
- { "Devanagari", +1, Devanagari_range16, 4, 0, 0 },
+ { "Devanagari", +1, Devanagari_range16, 4, Devanagari_range32, 1 },
{ "Dives_Akuru", +1, 0, 0, Dives_Akuru_range32, 8 },
{ "Dogra", +1, 0, 0, Dogra_range32, 1 },
{ "Duployan", +1, 0, 0, Duployan_range32, 5 },
- { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 2 },
+ { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 1 },
{ "Elbasan", +1, 0, 0, Elbasan_range32, 1 },
{ "Elymaic", +1, 0, 0, Elymaic_range32, 1 },
{ "Ethiopic", +1, Ethiopic_range16, 32, Ethiopic_range32, 4 },
@@ -6283,13 +6356,13 @@
{ "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
{ "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 },
{ "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
- { "Han", +1, Han_range16, 11, Han_range32, 9 },
+ { "Han", +1, Han_range16, 11, Han_range32, 10 },
{ "Hangul", +1, Hangul_range16, 14, 0, 0 },
{ "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 },
{ "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
{ "Hatran", +1, 0, 0, Hatran_range32, 3 },
{ "Hebrew", +1, Hebrew_range16, 9, 0, 0 },
- { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 3 },
+ { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 4 },
{ "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 },
{ "Inherited", +1, Inherited_range16, 19, Inherited_range32, 10 },
{ "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 },
@@ -6297,29 +6370,30 @@
{ "Javanese", +1, Javanese_range16, 3, 0, 0 },
{ "Kaithi", +1, 0, 0, Kaithi_range32, 2 },
{ "Kannada", +1, Kannada_range16, 13, 0, 0 },
- { "Katakana", +1, Katakana_range16, 7, Katakana_range32, 6 },
+ { "Katakana", +1, Katakana_range16, 7, Katakana_range32, 7 },
+ { "Kawi", +1, 0, 0, Kawi_range32, 3 },
{ "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 },
{ "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 },
{ "Khitan_Small_Script", +1, 0, 0, Khitan_Small_Script_range32, 2 },
{ "Khmer", +1, Khmer_range16, 4, 0, 0 },
{ "Khojki", +1, 0, 0, Khojki_range32, 2 },
{ "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 },
- { "L", +1, L_range16, 380, L_range32, 268 },
+ { "L", +1, L_range16, 380, L_range32, 279 },
{ "Lao", +1, Lao_range16, 11, 0, 0 },
- { "Latin", +1, Latin_range16, 34, Latin_range32, 4 },
+ { "Latin", +1, Latin_range16, 34, Latin_range32, 5 },
{ "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
{ "Limbu", +1, Limbu_range16, 5, 0, 0 },
{ "Linear_A", +1, 0, 0, Linear_A_range32, 3 },
{ "Linear_B", +1, 0, 0, Linear_B_range32, 7 },
{ "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 },
- { "Ll", +1, Ll_range16, 617, Ll_range32, 40 },
- { "Lm", +1, Lm_range16, 57, Lm_range32, 12 },
- { "Lo", +1, Lo_range16, 290, Lo_range32, 211 },
+ { "Ll", +1, Ll_range16, 617, Ll_range32, 41 },
+ { "Lm", +1, Lm_range16, 57, Lm_range32, 14 },
+ { "Lo", +1, Lo_range16, 290, Lo_range32, 220 },
{ "Lt", +1, Lt_range16, 10, 0, 0 },
{ "Lu", +1, Lu_range16, 605, Lu_range32, 41 },
{ "Lycian", +1, 0, 0, Lycian_range32, 1 },
{ "Lydian", +1, 0, 0, Lydian_range32, 2 },
- { "M", +1, M_range16, 189, M_range32, 110 },
+ { "M", +1, M_range16, 190, M_range32, 120 },
{ "Mahajani", +1, 0, 0, Mahajani_range32, 1 },
{ "Makasar", +1, 0, 0, Makasar_range32, 1 },
{ "Malayalam", +1, Malayalam_range16, 7, 0, 0 },
@@ -6327,7 +6401,7 @@
{ "Manichaean", +1, 0, 0, Manichaean_range32, 2 },
{ "Marchen", +1, 0, 0, Marchen_range32, 3 },
{ "Masaram_Gondi", +1, 0, 0, Masaram_Gondi_range32, 7 },
- { "Mc", +1, Mc_range16, 111, Mc_range32, 66 },
+ { "Mc", +1, Mc_range16, 112, Mc_range32, 70 },
{ "Me", +1, Me_range16, 5, 0, 0 },
{ "Medefaidrin", +1, 0, 0, Medefaidrin_range32, 1 },
{ "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 },
@@ -6335,21 +6409,22 @@
{ "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 },
{ "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 },
{ "Miao", +1, 0, 0, Miao_range32, 3 },
- { "Mn", +1, Mn_range16, 212, Mn_range32, 124 },
+ { "Mn", +1, Mn_range16, 212, Mn_range32, 134 },
{ "Modi", +1, 0, 0, Modi_range32, 2 },
{ "Mongolian", +1, Mongolian_range16, 5, Mongolian_range32, 1 },
{ "Mro", +1, 0, 0, Mro_range32, 3 },
{ "Multani", +1, 0, 0, Multani_range32, 5 },
{ "Myanmar", +1, Myanmar_range16, 3, 0, 0 },
- { "N", +1, N_range16, 67, N_range32, 67 },
+ { "N", +1, N_range16, 67, N_range32, 70 },
{ "Nabataean", +1, 0, 0, Nabataean_range32, 2 },
+ { "Nag_Mundari", +1, 0, 0, Nag_Mundari_range32, 1 },
{ "Nandinagari", +1, 0, 0, Nandinagari_range32, 3 },
- { "Nd", +1, Nd_range16, 37, Nd_range32, 25 },
+ { "Nd", +1, Nd_range16, 37, Nd_range32, 27 },
{ "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 },
{ "Newa", +1, 0, 0, Newa_range32, 2 },
{ "Nko", +1, Nko_range16, 2, 0, 0 },
{ "Nl", +1, Nl_range16, 7, Nl_range32, 5 },
- { "No", +1, No_range16, 29, No_range32, 42 },
+ { "No", +1, No_range16, 29, No_range32, 43 },
{ "Nushu", +1, 0, 0, Nushu_range32, 2 },
{ "Nyiakeng_Puachue_Hmong", +1, 0, 0, Nyiakeng_Puachue_Hmong_range32, 4 },
{ "Ogham", +1, Ogham_range16, 1, 0, 0 },
@@ -6366,7 +6441,7 @@
{ "Oriya", +1, Oriya_range16, 14, 0, 0 },
{ "Osage", +1, 0, 0, Osage_range32, 2 },
{ "Osmanya", +1, 0, 0, Osmanya_range32, 2 },
- { "P", +1, P_range16, 133, P_range32, 56 },
+ { "P", +1, P_range16, 133, P_range32, 58 },
{ "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 },
{ "Palmyrene", +1, 0, 0, Palmyrene_range32, 1 },
{ "Pau_Cin_Hau", +1, 0, 0, Pau_Cin_Hau_range32, 1 },
@@ -6377,12 +6452,12 @@
{ "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 },
{ "Phoenician", +1, 0, 0, Phoenician_range32, 2 },
{ "Pi", +1, Pi_range16, 11, 0, 0 },
- { "Po", +1, Po_range16, 130, Po_range32, 55 },
+ { "Po", +1, Po_range16, 130, Po_range32, 57 },
{ "Ps", +1, Ps_range16, 79, 0, 0 },
{ "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 },
{ "Rejang", +1, Rejang_range16, 2, 0, 0 },
{ "Runic", +1, Runic_range16, 2, 0, 0 },
- { "S", +1, S_range16, 151, S_range32, 83 },
+ { "S", +1, S_range16, 151, S_range32, 81 },
{ "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
{ "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
{ "Sc", +1, Sc_range16, 18, Sc_range32, 3 },
@@ -6393,7 +6468,7 @@
{ "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 },
{ "Sk", +1, Sk_range16, 30, Sk_range32, 1 },
{ "Sm", +1, Sm_range16, 53, Sm_range32, 11 },
- { "So", +1, So_range16, 114, So_range32, 72 },
+ { "So", +1, So_range16, 114, So_range32, 70 },
{ "Sogdian", +1, 0, 0, Sogdian_range32, 1 },
{ "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 },
{ "Soyombo", +1, 0, 0, Soyombo_range32, 1 },
@@ -6429,7 +6504,7 @@
{ "Zp", +1, Zp_range16, 1, 0, 0 },
{ "Zs", +1, Zs_range16, 7, 0, 0 },
};
-const int num_unicode_groups = 197;
+const int num_unicode_groups = 199;


} // namespace re2

To view, visit change 60471. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: re2
Gerrit-Branch: abseil
Gerrit-Change-Id: I73650fd2844d4a892275b4234cfde343152e415a
Gerrit-Change-Number: 60471
Gerrit-PatchSet: 1
Gerrit-Owner: Paul Wankadia <jun...@google.com>
Gerrit-MessageType: newchange

Perry Lorier (Gerrit)

unread,
Sep 14, 2022, 10:51:21 AM9/14/22
to Paul Wankadia, Randall Bosetti, re2...@googlegroups.com

Attention is currently required from: Paul Wankadia.

Patch set 1:Code-Review +1

View Change

    To view, visit change 60471. To unsubscribe, or for help writing mail filters, visit settings.

    Gerrit-Project: re2
    Gerrit-Branch: abseil
    Gerrit-Change-Id: I73650fd2844d4a892275b4234cfde343152e415a
    Gerrit-Change-Number: 60471
    Gerrit-PatchSet: 1
    Gerrit-Owner: Paul Wankadia <jun...@google.com>
    Gerrit-Reviewer: Perry Lorier <per...@google.com>
    Gerrit-CC: Randall Bosetti <r...@google.com>
    Gerrit-Attention: Paul Wankadia <jun...@google.com>
    Gerrit-Comment-Date: Wed, 14 Sep 2022 14:51:16 +0000
    Gerrit-HasComments: No
    Gerrit-Has-Labels: Yes
    Gerrit-MessageType: comment

    Paul Wankadia (Gerrit)

    unread,
    Sep 14, 2022, 10:59:02 AM9/14/22
    to Paul Wankadia, Perry Lorier, Randall Bosetti, re2...@googlegroups.com

    Patch set 1:Code-Review +2

    View Change

      To view, visit change 60471. To unsubscribe, or for help writing mail filters, visit settings.

      Gerrit-Project: re2
      Gerrit-Branch: abseil
      Gerrit-Change-Id: I73650fd2844d4a892275b4234cfde343152e415a
      Gerrit-Change-Number: 60471
      Gerrit-PatchSet: 1
      Gerrit-Owner: Paul Wankadia <jun...@google.com>
      Gerrit-Reviewer: Paul Wankadia <jun...@google.com>
      Gerrit-Reviewer: Perry Lorier <per...@google.com>
      Gerrit-CC: Randall Bosetti <r...@google.com>
      Gerrit-Comment-Date: Wed, 14 Sep 2022 14:58:57 +0000

      Paul Wankadia (Gerrit)

      unread,
      Sep 14, 2022, 10:59:05 AM9/14/22
      to Paul Wankadia, Perry Lorier, Randall Bosetti, re2...@googlegroups.com

      Paul Wankadia submitted this change.

      View Change


      Approvals: Paul Wankadia: Looks good to me, approved Perry Lorier: Looks good to me, but someone else must approve
      Update Unicode data to 15.0.0.

      Change-Id: I73650fd2844d4a892275b4234cfde343152e415a
      Reviewed-on: https://code-review.googlesource.com/c/re2/+/60471
      Reviewed-by: Paul Wankadia <jun...@google.com>
      Reviewed-by: Perry Lorier <per...@google.com>

      ---
      M doc/syntax.html
      M doc/syntax.txt
      M re2/unicode.py
      M re2/unicode_groups.cc
      4 files changed, 167 insertions(+), 76 deletions(-)

      To view, visit change 60471. To unsubscribe, or for help writing mail filters, visit settings.

      Gerrit-Project: re2
      Gerrit-Branch: abseil
      Gerrit-Change-Id: I73650fd2844d4a892275b4234cfde343152e415a
      Gerrit-Change-Number: 60471
      Gerrit-PatchSet: 2
      Gerrit-Owner: Paul Wankadia <jun...@google.com>
      Gerrit-Reviewer: Paul Wankadia <jun...@google.com>
      Gerrit-Reviewer: Perry Lorier <per...@google.com>
      Gerrit-CC: Randall Bosetti <r...@google.com>
      Gerrit-MessageType: merged
      Reply all
      Reply to author
      Forward
      0 new messages