From e84d8535f5504d7a47bcba8792f019f6f421336f Mon Sep 17 00:00:00 2001 From: Jacob Sandlund Date: Sun, 17 Aug 2025 21:24:27 -0400 Subject: [PATCH] removing all ziglyph imports (aside from unicode/grapheme.zig) --- build.zig.zon | 4 +- src/build/SharedDeps.zig | 6 --- src/build/UnicodeTables.zig | 9 ----- src/build/uucode_config.zig | 1 + src/simd/codepoint_width.zig | 16 ++++---- src/terminal/Terminal.zig | 4 +- src/unicode/grapheme.zig | 6 +++ src/unicode/props.zig | 77 ++++++++++++++++++------------------ 8 files changed, 57 insertions(+), 66 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index f2f8362c3..3d4e59ef3 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -42,8 +42,8 @@ .lazy = true, }, .uucode = .{ - .url = "https://github.com/jacobsandlund/uucode/archive/a50e106b57f406ada41d380ec59b6b33cdb77667.tar.gz", - .hash = "uucode-0.0.0-ZZjBPoF_PADS8lyIfgw-C-j5lM-CznP5808p9OMSxytN", + .url = "https://github.com/jacobsandlund/uucode/archive/658743f845f25f8f8d30f535329829660c808eaf.tar.gz", + .hash = "uucode-0.0.0-ZZjBPjWBPACBbQFG11xoSRCP8NztUnPCieiCtBx0t57i", }, .uucode_x = .{ .url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz", diff --git a/src/build/SharedDeps.zig b/src/build/SharedDeps.zig index 66563d173..0c2cc96d0 100644 --- a/src/build/SharedDeps.zig +++ b/src/build/SharedDeps.zig @@ -421,12 +421,6 @@ pub fn add( })) |dep| { step.root_module.addImport("z2d", dep.module("z2d")); } - if (b.lazyDependency("ziglyph", .{ - .target = target, - .optimize = optimize, - })) |dep| { - step.root_module.addImport("ziglyph", dep.module("ziglyph")); - } if (b.lazyDependency("uucode", .{ .target = target, .optimize = optimize, diff --git a/src/build/UnicodeTables.zig b/src/build/UnicodeTables.zig index bb625c3b8..78bcef2c9 100644 --- a/src/build/UnicodeTables.zig +++ b/src/build/UnicodeTables.zig @@ -21,15 +21,6 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables }), }); - if (b.lazyDependency("ziglyph", .{ - .target = b.graph.host, - })) |ziglyph_dep| { - exe.root_module.addImport( - "ziglyph", - ziglyph_dep.module("ziglyph"), - ); - } - if (b.lazyDependency("uucode", .{ .target = b.graph.host, .@"tables.zig" = uucode_tables_zig, diff --git a/src/build/uucode_config.zig b/src/build/uucode_config.zig index bc6ff7eb7..de1b71717 100644 --- a/src/build/uucode_config.zig +++ b/src/build/uucode_config.zig @@ -14,6 +14,7 @@ pub const tables = [_]config.Table{ d.field("case_folding_full"), // Alternative: // d.field("case_folding_simple"), + d.field("grapheme_break"), }, }, }; diff --git a/src/simd/codepoint_width.zig b/src/simd/codepoint_width.zig index aab4bdd95..e2383aff1 100644 --- a/src/simd/codepoint_width.zig +++ b/src/simd/codepoint_width.zig @@ -4,7 +4,7 @@ const std = @import("std"); extern "c" fn ghostty_simd_codepoint_width(u32) i8; pub fn codepointWidth(cp: u32) i8 { - //return @import("ziglyph").display_width.codePointWidth(@intCast(cp), .half); + //return @import("uucode").get(.wcwidth, @intCast(cp)); return ghostty_simd_codepoint_width(cp); } @@ -19,26 +19,26 @@ test "codepointWidth basic" { try testing.expectEqual(@as(i8, 2), codepointWidth(0xF900)); // 豈 try testing.expectEqual(@as(i8, 2), codepointWidth(0x20000)); // 𠀀 try testing.expectEqual(@as(i8, 2), codepointWidth(0x30000)); // 𠀀 - // try testing.expectEqual(@as(i8, 1), @import("ziglyph").display_width.codePointWidth(0x100, .half)); + // try testing.expectEqual(@as(i8, 1), @import("uucode").get(.wcwidth, 0x100)); } // This is not very fast in debug modes, so its commented by default. // IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES. -// test "codepointWidth matches ziglyph" { +// test "codepointWidth matches uucode" { // const testing = std.testing; -// const ziglyph = @import("ziglyph"); +// const uucode = @import("uucode"); // // const min = 0xFF + 1; // start outside ascii -// for (min..std.math.maxInt(u21)) |cp| { +// for (min..uucode.code_point_range_end) |cp| { // const simd = codepointWidth(@intCast(cp)); -// const zg = ziglyph.display_width.codePointWidth(@intCast(cp), .half); -// if (simd != zg) mismatch: { +// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp)))); +// if (simd != uu) mismatch: { // if (cp == 0x2E3B) { // try testing.expectEqual(@as(i8, 2), simd); // break :mismatch; // } // -// std.log.warn("mismatch cp=U+{x} simd={} zg={}", .{ cp, simd, zg }); +// std.log.warn("mismatch cp=U+{x} simd={} uucode={}", .{ cp, simd, uu }); // try testing.expect(false); // } // } diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index dd7207f6d..d08c31b34 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -415,8 +415,8 @@ pub fn print(self: *Terminal, c: u21) !void { // control characters because they're always filtered prior. const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width); - // Note: it is possible to have a width of "3" and a width of "-1" - // from ziglyph. We should look into those cases and handle them + // Note: it is possible to have a width of "3" and a width of "-1" from + // uucode.x's wcwidth. We should look into those cases and handle them // appropriately. assert(width <= 2); // log.debug("c={x} width={}", .{ c, width }); diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig index 7847ef6f5..0950bedba 100644 --- a/src/unicode/grapheme.zig +++ b/src/unicode/grapheme.zig @@ -152,6 +152,12 @@ fn graphemeBreakClass( /// If you build this file as a binary, we will verify the grapheme break /// implementation. This iterates over billions of codepoints so it is /// SLOW. It's not meant to be run in CI, but it's useful for debugging. +/// TODO: this is hard to build with newer zig build, so +/// https://github.com/ghostty-org/ghostty/pull/7806 took the approach of +/// adding a `-Demit-unicode-test` option for `zig build`, but that +/// hasn't been done here yet. +/// TODO: this also still uses `ziglyph`, but could be switched to use +/// `uucode`'s grapheme break once that is implemented. pub fn main() !void { const ziglyph = @import("ziglyph"); diff --git a/src/unicode/props.zig b/src/unicode/props.zig index b7bf6e3c1..879ada7a8 100644 --- a/src/unicode/props.zig +++ b/src/unicode/props.zig @@ -1,7 +1,6 @@ const props = @This(); const std = @import("std"); const assert = std.debug.assert; -const ziglyph = @import("ziglyph"); const uucode = @import("uucode"); const lut = @import("lut.zig"); @@ -78,33 +77,33 @@ pub const GraphemeBoundaryClass = enum(u4) { extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base} emoji_modifier, // \p{Emoji_Modifier} - /// Gets the grapheme boundary class for a codepoint. This is VERY - /// SLOW. The use case for this is only in generating lookup tables. + /// Gets the grapheme boundary class for a codepoint. + /// The use case for this is only in generating lookup tables. pub fn init(cp: u21) GraphemeBoundaryClass { - // We special-case modifier bases because we should not break - // if a modifier isn't next to a base. - if (ziglyph.emoji.isEmojiModifierBase(cp)) { - assert(ziglyph.emoji.isExtendedPictographic(cp)); - return .extended_pictographic_base; + if (cp < uucode.code_point_range_end) { + return switch (uucode.get(.grapheme_break, cp)) { + .emoji_modifier_base => .extended_pictographic_base, + .emoji_modifier => .emoji_modifier, + .extended_pictographic => .extended_pictographic, + .l => .L, + .v => .V, + .t => .T, + .lv => .LV, + .lvt => .LVT, + .prepend => .prepend, + .extend => .extend, + .zwj => .zwj, + .spacing_mark => .spacing_mark, + .regional_indicator => .regional_indicator, + + // This is obviously not INVALID invalid, there is SOME grapheme + // boundary class for every codepoint. But we don't care about + // anything that doesn't fit into the above categories. + .other, .cr, .lf, .control => .invalid, + }; + } else { + return .invalid; } - - if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier; - if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic; - if (ziglyph.grapheme_break.isL(cp)) return .L; - if (ziglyph.grapheme_break.isV(cp)) return .V; - if (ziglyph.grapheme_break.isT(cp)) return .T; - if (ziglyph.grapheme_break.isLv(cp)) return .LV; - if (ziglyph.grapheme_break.isLvt(cp)) return .LVT; - if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend; - if (ziglyph.grapheme_break.isExtend(cp)) return .extend; - if (ziglyph.grapheme_break.isZwj(cp)) return .zwj; - if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark; - if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator; - - // This is obviously not INVALID invalid, there is SOME grapheme - // boundary class for every codepoint. But we don't care about - // anything that doesn't fit into the above categories. - return .invalid; } /// Returns true if this is an extended pictographic type. This @@ -122,7 +121,7 @@ pub const GraphemeBoundaryClass = enum(u4) { }; pub fn get(cp: u21) Properties { - const wcwidth = if (cp < 0x110000) uucode.get(.wcwidth, cp) else 0; + const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0; return .{ .width = @intCast(@min(2, @max(0, wcwidth))), @@ -167,16 +166,16 @@ pub fn main() !void { // This is not very fast in debug modes, so its commented by default. // IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES. -// test "tables match ziglyph" { -// const testing = std.testing; +//test "tables match uucode" { +// const testing = std.testing; // -// const min = 0xFF + 1; // start outside ascii -// for (min..std.math.maxInt(u21)) |cp| { -// const t = table.get(@intCast(cp)); -// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half))); -// if (t.width != zg) { -// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg }); -// try testing.expect(false); -// } -// } -// } +// const min = 0xFF + 1; // start outside ascii +// for (min..uucode.code_point_range_end) |cp| { +// const t = table.get(@intCast(cp)); +// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp)))); +// if (t.width != uu) { +// std.log.warn("mismatch cp=U+{x} t={} uucode={}", .{ cp, t, uu }); +// try testing.expect(false); +// } +// } +//}