diff --git a/scripts/unicode.py b/scripts/unicode.py index 1df6a75..aa0d86b 100755 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -18,7 +18,6 @@ # - NormalizationTest.txt (for tests only) # - PropList.txt # - ReadMe.txt -# - Scripts.txt # - UnicodeData.txt # - auxiliary/GraphemeBreakProperty.txt # - emoji/emoji-data.txt @@ -430,22 +429,10 @@ def load_east_asian_widths() -> list[EastAsianWidth]: # Catch any leftover codepoints and assign them implicit Neutral/narrow width. width_map.append(EastAsianWidth.NARROW) - # Characters from alphabetic scripts are narrow - load_property( - "Scripts.txt", - r"(?:Latin|Greek|Cyrillic)", - lambda cp: ( - operator.setitem(width_map, cp, EastAsianWidth.NARROW) - if width_map[cp] == EastAsianWidth.AMBIGUOUS - and not (0x2160 <= cp <= 0x217F) # Roman numerals remain ambiguous - else None - ), - ) - - # Ambiguous `Modifier_Letter`s and `Modifier_Symbol`s are narrow + # Ambiguous `Letter`s and `Modifier_Symbol`s are narrow load_property( "extracted/DerivedGeneralCategory.txt", - r"(:?Lm|Sk)", + r"(:?Lu|Ll|Lt|Lm|Lo|Sk)", lambda cp: ( operator.setitem(width_map, cp, EastAsianWidth.NARROW) if width_map[cp] == EastAsianWidth.AMBIGUOUS diff --git a/src/lib.rs b/src/lib.rs index ccb59ef..71b5d70 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -122,8 +122,7 @@ //! - Has an [`East_Asian_Width`] of [`Ambiguous`], or //! has a canonical decomposition to an [`Ambiguous`] character followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY], or //! is [`'\u{0387}'` GREEK ANO TELEIA](https://util.unicode.org/UnicodeJsps/character.jsp?a=0387), and -//! - Does not have a [`General_Category`] of `Modifier_Letter` or `Modifier_Symbol`, and -//! - Does not have a [`Script`] of `Latin`, `Greek`, or `Cyrillic`, or is a Roman numeral in the range `'\u{2160}'..='\u{217F}'`. +//! - Does not have a [`General_Category`] of `Letter` or `Modifier_Symbol`. //! 7. All other characters have width 1. //! //! [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY]: https://util.unicode.org/UnicodeJsps/character.jsp?a=0338 diff --git a/src/tables.rs b/src/tables.rs index c97d505..fa632d6 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -1877,7 +1877,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([ ], #[cfg(feature = "cjk")] [ - 0x95, 0x59, 0x59, 0x55, 0x95, 0x65, 0x55, 0x55, 0x69, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x95, 0x59, 0x59, 0x55, 0x55, 0x65, 0x55, 0x55, 0x69, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x95, 0x56, 0x95, 0x6A, 0xAA, 0xAA, 0xAA, 0x55, 0xAA, 0xAA, 0x5A, 0x55, ],