From 0444c614da05ff6d5609bb973e62d90e9d1113d2 Mon Sep 17 00:00:00 2001 From: Jacob Sandlund Date: Thu, 21 Aug 2025 22:29:34 -0400 Subject: [PATCH] update for new grapheme_break --- build.zig.zon | 8 ++++---- src/build/uucode_config.zig | 2 ++ src/unicode/props.zig | 18 ++++++++++++++---- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 2a44c0220..db0b63596 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -37,12 +37,12 @@ .lazy = true, }, .uucode = .{ - .url = "https://github.com/jacobsandlund/uucode/archive/aaa2aef70dd37e7c3975bb973fcc36bf93faab9f.tar.gz", - .hash = "uucode-0.0.0-ZZjBPi6BPAC5vZ7yoeYp_5uMNSVx_JsgzY-r54DEgt3a", + .url = "https://github.com/jacobsandlund/uucode/archive/907218a2c8097688554e54bb0999e6dbd59b226e.tar.gz", + .hash = "uucode-0.0.0-ZZjBPopfPwDqH70dn65Zklni_Yo8KWdLcVMEvmPoj1vW", }, .uucode_x = .{ - .url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz", - .hash = "uucode_x-0.0.0-5_D0j04hAADjn00a4Jfsjqz-gO6oF8FTLWUXmmvO1_MQ", + .url = "https://github.com/jacobsandlund/uucode.x/archive/9f5cfb1b48ab923677e837e22aa33c2a4380fc47.tar.gz", + .hash = "uucode_x-0.0.0-5_D0j2YhAAC8KvciYTFrV3hKANPbXke5havA5OIEf7XT", }, .zig_wayland = .{ // codeberg ifreund/zig-wayland diff --git a/src/build/uucode_config.zig b/src/build/uucode_config.zig index 42c755d8c..69d0d2fd3 100644 --- a/src/build/uucode_config.zig +++ b/src/build/uucode_config.zig @@ -15,6 +15,8 @@ pub const tables = [_]config.Table{ // Alternative: // d.field("case_folding_simple"), d.field("grapheme_break"), + d.field("is_emoji_modifier"), + d.field("is_emoji_modifier_base"), }, }, }; diff --git a/src/unicode/props.zig b/src/unicode/props.zig index 879ada7a8..c06329876 100644 --- a/src/unicode/props.zig +++ b/src/unicode/props.zig @@ -81,9 +81,10 @@ pub const GraphemeBoundaryClass = enum(u4) { /// The use case for this is only in generating lookup tables. pub fn init(cp: u21) GraphemeBoundaryClass { if (cp < uucode.code_point_range_end) { + if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier; + if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base; + return switch (uucode.get(.grapheme_break, cp)) { - .emoji_modifier_base => .extended_pictographic_base, - .emoji_modifier => .emoji_modifier, .extended_pictographic => .extended_pictographic, .l => .L, .v => .V, @@ -91,15 +92,24 @@ pub const GraphemeBoundaryClass = enum(u4) { .lv => .LV, .lvt => .LVT, .prepend => .prepend, - .extend => .extend, .zwj => .zwj, .spacing_mark => .spacing_mark, .regional_indicator => .regional_indicator, + .zwnj, + .indic_conjunct_break_extend, + .indic_conjunct_break_linker, + => .extend, + // This is obviously not INVALID invalid, there is SOME grapheme // boundary class for every codepoint. But we don't care about // anything that doesn't fit into the above categories. - .other, .cr, .lf, .control => .invalid, + .other, + .indic_conjunct_break_consonant, + .cr, + .lf, + .control, + => .invalid, }; } else { return .invalid;