Skip to content

Commit

Permalink
Simplify derivation of ambiguous
Browse files Browse the repository at this point in the history
Use `Letter` general category instead of script and block.
Changes `ℓ` to narrow, matching common fonts
  • Loading branch information
Jules-Bertholet committed Jun 17, 2024
1 parent 5a5c031 commit be1aa17
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 18 deletions.
17 changes: 2 additions & 15 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
# - NormalizationTest.txt (for tests only)
# - PropList.txt
# - ReadMe.txt
# - Scripts.txt
# - UnicodeData.txt
# - auxiliary/GraphemeBreakProperty.txt
# - emoji/emoji-data.txt
Expand Down Expand Up @@ -430,22 +429,10 @@ def load_east_asian_widths() -> list[EastAsianWidth]:
# Catch any leftover codepoints and assign them implicit Neutral/narrow width.
width_map.append(EastAsianWidth.NARROW)

# Characters from alphabetic scripts are narrow
load_property(
"Scripts.txt",
r"(?:Latin|Greek|Cyrillic)",
lambda cp: (
operator.setitem(width_map, cp, EastAsianWidth.NARROW)
if width_map[cp] == EastAsianWidth.AMBIGUOUS
and not (0x2160 <= cp <= 0x217F) # Roman numerals remain ambiguous
else None
),
)

# Ambiguous `Modifier_Letter`s and `Modifier_Symbol`s are narrow
# Ambiguous `Letter`s and `Modifier_Symbol`s are narrow
load_property(
"extracted/DerivedGeneralCategory.txt",
r"(:?Lm|Sk)",
r"(:?Lu|Ll|Lt|Lm|Lo|Sk)",
lambda cp: (
operator.setitem(width_map, cp, EastAsianWidth.NARROW)
if width_map[cp] == EastAsianWidth.AMBIGUOUS
Expand Down
3 changes: 1 addition & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,7 @@
//! - Has an [`East_Asian_Width`] of [`Ambiguous`], or
//! has a canonical decomposition to an [`Ambiguous`] character followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY], or
//! is [`'\u{0387}'` GREEK ANO TELEIA](https://util.unicode.org/UnicodeJsps/character.jsp?a=0387), and
//! - Does not have a [`General_Category`] of `Modifier_Letter` or `Modifier_Symbol`, and
//! - Does not have a [`Script`] of `Latin`, `Greek`, or `Cyrillic`, or is a Roman numeral in the range `'\u{2160}'..='\u{217F}'`.
//! - Does not have a [`General_Category`] of `Letter` or `Modifier_Symbol`.
//! 7. All other characters have width 1.
//!
//! [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY]: https://util.unicode.org/UnicodeJsps/character.jsp?a=0338
Expand Down
2 changes: 1 addition & 1 deletion src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1877,7 +1877,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([
],
#[cfg(feature = "cjk")]
[
0x95, 0x59, 0x59, 0x55, 0x95, 0x65, 0x55, 0x55, 0x69, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x95, 0x59, 0x59, 0x55, 0x55, 0x65, 0x55, 0x55, 0x69, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55, 0x55, 0x95, 0x56, 0x95, 0x6A, 0xAA, 0xAA, 0xAA, 0x55, 0xAA, 0xAA,
0x5A, 0x55,
],
Expand Down

0 comments on commit be1aa17

Please sign in to comment.