update for new grapheme_break

This commit is contained in:
Jacob Sandlund
2025-08-21 22:29:34 -04:00
parent 90832d89b3
commit 0444c614da
3 changed files with 20 additions and 8 deletions

View File

@@ -37,12 +37,12 @@
.lazy = true,
},
.uucode = .{
.url = "https://github.com/jacobsandlund/uucode/archive/aaa2aef70dd37e7c3975bb973fcc36bf93faab9f.tar.gz",
.hash = "uucode-0.0.0-ZZjBPi6BPAC5vZ7yoeYp_5uMNSVx_JsgzY-r54DEgt3a",
.url = "https://github.com/jacobsandlund/uucode/archive/907218a2c8097688554e54bb0999e6dbd59b226e.tar.gz",
.hash = "uucode-0.0.0-ZZjBPopfPwDqH70dn65Zklni_Yo8KWdLcVMEvmPoj1vW",
},
.uucode_x = .{
.url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz",
.hash = "uucode_x-0.0.0-5_D0j04hAADjn00a4Jfsjqz-gO6oF8FTLWUXmmvO1_MQ",
.url = "https://github.com/jacobsandlund/uucode.x/archive/9f5cfb1b48ab923677e837e22aa33c2a4380fc47.tar.gz",
.hash = "uucode_x-0.0.0-5_D0j2YhAAC8KvciYTFrV3hKANPbXke5havA5OIEf7XT",
},
.zig_wayland = .{
// codeberg ifreund/zig-wayland

View File

@@ -15,6 +15,8 @@ pub const tables = [_]config.Table{
// Alternative:
// d.field("case_folding_simple"),
d.field("grapheme_break"),
d.field("is_emoji_modifier"),
d.field("is_emoji_modifier_base"),
},
},
};

View File

@@ -81,9 +81,10 @@ pub const GraphemeBoundaryClass = enum(u4) {
/// The use case for this is only in generating lookup tables.
pub fn init(cp: u21) GraphemeBoundaryClass {
if (cp < uucode.code_point_range_end) {
if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
return switch (uucode.get(.grapheme_break, cp)) {
.emoji_modifier_base => .extended_pictographic_base,
.emoji_modifier => .emoji_modifier,
.extended_pictographic => .extended_pictographic,
.l => .L,
.v => .V,
@@ -91,15 +92,24 @@ pub const GraphemeBoundaryClass = enum(u4) {
.lv => .LV,
.lvt => .LVT,
.prepend => .prepend,
.extend => .extend,
.zwj => .zwj,
.spacing_mark => .spacing_mark,
.regional_indicator => .regional_indicator,
.zwnj,
.indic_conjunct_break_extend,
.indic_conjunct_break_linker,
=> .extend,
// This is obviously not INVALID invalid, there is SOME grapheme
// boundary class for every codepoint. But we don't care about
// anything that doesn't fit into the above categories.
.other, .cr, .lf, .control => .invalid,
.other,
.indic_conjunct_break_consonant,
.cr,
.lf,
.control,
=> .invalid,
};
} else {
return .invalid;