trying a bunch of things to get performance to match

2025-10-01 15:38:35 +00:00 · 2025-09-06 10:42:02 -04:00
parent c67f51f3ee
commit 2af08bdbe3
10 changed files with 367 additions and 76 deletions
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -37,8 +37,8 @@
            .lazy = true,
        },
        .uucode = .{
-            .url = "https://github.com/jacobsandlund/uucode/archive/38b82297e69a3b2dc55dc8df25f3851be37f9327.tar.gz",
-            .hash = "uucode-0.0.0-ZZjBPiqdPwB-rG3ieaq3c6tMpnksWYs4_rGj2IvFGjjB",
+            .url = "https://github.com/jacobsandlund/uucode/archive/69782fbe79e06a34ee177978d3479ed5801ce0af.tar.gz",
+            .hash = "uucode-0.0.0-ZZjBPl_dPwC-BPhSJLID4Hs9O0zw-vZKGXdaOBFch8c8",
        },
        .zig_wayland = .{
            // codeberg ifreund/zig-wayland
--- a/src/benchmark/CodepointWidth.zig
+++ b/src/benchmark/CodepointWidth.zig
@@ -10,6 +10,7 @@ const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const Benchmark = @import("Benchmark.zig");
 const options = @import("options.zig");
+const uucode = @import("uucode");
 const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
 const simd = @import("../simd/main.zig");
 const table = @import("../unicode/main.zig").table;
@@ -47,6 +48,9 @@ pub const Mode = enum {

    /// Test our lookup table implementation.
    table,
+
+    /// Using uucode, with custom `width` extension based on `wcwidth`.
+    uucode,
 };

 /// Create a new terminal stream handler for the given arguments.
@@ -71,6 +75,7 @@ pub fn benchmark(self: *CodepointWidth) Benchmark {
            .wcwidth => stepWcwidth,
            .table => stepTable,
            .simd => stepSimd,
+            .uucode => stepUucode,
        },
        .setupFn = setup,
        .teardownFn = teardown,
@@ -192,6 +197,41 @@ fn stepSimd(ptr: *anyopaque) Benchmark.Error!void {
    }
 }

+fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
+    const self: *CodepointWidth = @ptrCast(@alignCast(ptr));
+
+    const f = self.data_f orelse return;
+    var r = std.io.bufferedReader(f.reader());
+    var d: UTF8Decoder = .{};
+    var buf: [4096]u8 = undefined;
+    while (true) {
+        const n = r.read(&buf) catch |err| {
+            log.warn("error reading data file err={}", .{err});
+            return error.BenchmarkFailed;
+        };
+        if (n == 0) break; // EOF reached
+
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp| {
+                // This is the same trick we do in terminal.zig so we
+                // keep it here.
+                const width = if (cp <= 0xFF)
+                    1
+                else
+                    //uucode.getX(.width, @intCast(cp));
+                    //uucode.getWidth(@intCast(cp));
+                    uucode.getSpecial(@intCast(cp)).width;
+
+                // Write the width to the buffer to avoid it being compiled
+                // away
+                buf[0] = @intCast(width);
+            }
+        }
+    }
+}
+
 test CodepointWidth {
    const testing = std.testing;
    const alloc = testing.allocator;
--- a/src/benchmark/GraphemeBreak.zig
+++ b/src/benchmark/GraphemeBreak.zig
@@ -8,6 +8,7 @@ const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const Benchmark = @import("Benchmark.zig");
 const options = @import("options.zig");
+const uucode = @import("uucode");
 const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
 const unicode = @import("../unicode/main.zig");

@@ -38,6 +39,9 @@ pub const Mode = enum {

    /// Ghostty's table-based approach.
    table,
+
+    /// Uucode
+    uucode,
 };

 /// Create a new terminal stream handler for the given arguments.
@@ -60,6 +64,7 @@ pub fn benchmark(self: *GraphemeBreak) Benchmark {
        .stepFn = switch (self.opts.mode) {
            .noop => stepNoop,
            .table => stepTable,
+            .uucode => stepUucode,
        },
        .setupFn = setup,
        .teardownFn = teardown,
@@ -134,6 +139,160 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
    }
 }

+const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);
+
+pub fn computeGraphemeBoundaryClass(
+    gb1: GraphemeBoundaryClass,
+    gb2: GraphemeBoundaryClass,
+    state: *uucode.grapheme.BreakState,
+) bool {
+    // Set state back to default when `gb1` or `gb2` is not expected in sequence.
+    switch (state.*) {
+        .regional_indicator => {
+            if (gb1 != .regional_indicator or gb2 != .regional_indicator) {
+                state.* = .default;
+            }
+        },
+        .extended_pictographic => {
+            switch (gb1) {
+                .extend,
+                .zwj,
+                .extended_pictographic,
+                => {},
+
+                else => state.* = .default,
+            }
+
+            switch (gb2) {
+                .extend,
+                .zwj,
+                .extended_pictographic,
+                => {},
+
+                else => state.* = .default,
+            }
+        },
+        .default, .indic_conjunct_break_consonant, .indic_conjunct_break_linker => {},
+    }
+
+    // GB6: L x (L | V | LV | VT)
+    if (gb1 == .L) {
+        if (gb2 == .L or
+            gb2 == .V or
+            gb2 == .LV or
+            gb2 == .LVT) return false;
+    }
+
+    // GB7: (LV | V) x (V | T)
+    if (gb1 == .LV or gb1 == .V) {
+        if (gb2 == .V or gb2 == .T) return false;
+    }
+
+    // GB8: (LVT | T) x T
+    if (gb1 == .LVT or gb1 == .T) {
+        if (gb2 == .T) return false;
+    }
+
+    // Handle GB9 (Extend | ZWJ) later, since it can also match the start of
+    // GB9c (Indic) and GB11 (Emoji ZWJ)
+
+    // GB9a: SpacingMark
+    if (gb2 == .spacing_mark) return false;
+
+    // GB9b: Prepend
+    if (gb1 == .prepend) return false;
+
+    // GB11: Emoji ZWJ sequence
+    if (gb1 == .extended_pictographic) {
+        // start of sequence:
+
+        // In normal operation, we'll be in this state, but
+        // precomputeGraphemeBreak iterates all states.
+        // std.debug.assert(state.* == .default);
+
+        if (gb2 == .extend or gb2 == .zwj) {
+            state.* = .extended_pictographic;
+            return false;
+        }
+        // else, not an Emoji ZWJ sequence
+    } else if (state.* == .extended_pictographic) {
+        // continue or end sequence:
+
+        if (gb1 == .extend and (gb2 == .extend or gb2 == .zwj)) {
+            // continue extend* ZWJ sequence
+            return false;
+        } else if (gb1 == .zwj and gb2 == .extended_pictographic) {
+            // ZWJ -> end of sequence
+            state.* = .default;
+            return false;
+        } else {
+            // Not a valid Emoji ZWJ sequence
+            state.* = .default;
+        }
+    }
+
+    // GB12 and GB13: Regional Indicator
+    if (gb1 == .regional_indicator and gb2 == .regional_indicator) {
+        if (state.* == .default) {
+            state.* = .regional_indicator;
+            return false;
+        } else {
+            state.* = .default;
+            return true;
+        }
+    }
+
+    // GB9: x (Extend | ZWJ)
+    if (gb2 == .extend or gb2 == .zwj) return false;
+
+    // GB999: Otherwise, break everywhere
+    return true;
+}
+
+pub fn isBreak(
+    cp1: u21,
+    cp2: u21,
+    state: *uucode.grapheme.BreakState,
+) bool {
+    const table = comptime uucode.grapheme.precomputeGraphemeBreak(
+        GraphemeBoundaryClass,
+        computeGraphemeBoundaryClass,
+    );
+    const gb1 = uucode.getX(.grapheme_boundary_class, cp1);
+    const gb2 = uucode.getX(.grapheme_boundary_class, cp2);
+    const result = table.get(gb1, gb2, state.*);
+    state.* = result.state;
+    return result.result;
+}
+
+fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
+    const self: *GraphemeBreak = @ptrCast(@alignCast(ptr));
+
+    const f = self.data_f orelse return;
+    var r = std.io.bufferedReader(f.reader());
+    var d: UTF8Decoder = .{};
+    var state: uucode.grapheme.BreakState = .default;
+    var cp1: u21 = 0;
+    var buf: [4096]u8 = undefined;
+    while (true) {
+        const n = r.read(&buf) catch |err| {
+            log.warn("error reading data file err={}", .{err});
+            return error.BenchmarkFailed;
+        };
+        if (n == 0) break; // EOF reached
+
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp2| {
+                const v = isBreak(cp1, @intCast(cp2), &state);
+                buf[0] = @intCast(@intFromBool(v));
+                cp1 = cp2;
+            }
+        }
+    }
+}
+
 test GraphemeBreak {
    const testing = std.testing;
    const alloc = testing.allocator;
--- a/src/build/UnicodeTables.zig
+++ b/src/build/UnicodeTables.zig
@@ -24,14 +24,16 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
    if (b.lazyDependency("uucode", .{
        .target = b.graph.host,
        .@"tables.zig" = uucode_tables_zig,
+        .build_config_path = b.path("src/build/uucode_config.zig"),
    })) |dep| {
        exe.root_module.addImport("uucode", dep.module("uucode"));
    }

    const run = b.addRunArtifact(exe);
+    const output = run.addOutputFileArg("tables.zig");
    return .{
        .exe = exe,
-        .output = run.captureStdOut(),
+        .output = output,
    };
 }

--- a/src/build/uucode_config.zig
+++ b/src/build/uucode_config.zig
@@ -3,6 +3,93 @@ const config_x = @import("config.x.zig");
 const d = config.default;
 const wcwidth = config_x.wcwidth;

+pub const log_level = .debug;
+
+fn computeWidth(cp: u21, data: anytype, backing: anytype, tracking: anytype) void {
+    _ = cp;
+    _ = backing;
+    _ = tracking;
+    if (data.wcwidth < 0) {
+        data.width = 0;
+    } else if (data.wcwidth > 2) {
+        data.width = 2;
+    } else {
+        data.width = @intCast(data.wcwidth);
+    }
+}
+
+const width = config.Extension{ .inputs = &.{"wcwidth"}, .compute = &computeWidth, .fields = &.{
+    .{ .name = "width", .type = u2 },
+} };
+
+pub const GraphemeBoundaryClass = enum(u4) {
+    invalid,
+    L,
+    V,
+    T,
+    LV,
+    LVT,
+    prepend,
+    extend,
+    zwj,
+    spacing_mark,
+    regional_indicator,
+    extended_pictographic,
+    extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
+    emoji_modifier, // \p{Emoji_Modifier}
+};
+
+fn computeGraphemeBoundaryClass(cp: u21, data: anytype, backing: anytype, tracking: anytype) void {
+    _ = cp;
+    _ = backing;
+    _ = tracking;
+    if (data.is_emoji_modifier) {
+        data.grapheme_boundary_class = .emoji_modifier;
+    } else if (data.is_emoji_modifier_base) {
+        data.grapheme_boundary_class = .extended_pictographic_base;
+    } else {
+        data.grapheme_boundary_class = switch (data.grapheme_break) {
+            .extended_pictographic => .extended_pictographic,
+            .l => .L,
+            .v => .V,
+            .t => .T,
+            .lv => .LV,
+            .lvt => .LVT,
+            .prepend => .prepend,
+            .zwj => .zwj,
+            .spacing_mark => .spacing_mark,
+            .regional_indicator => .regional_indicator,
+
+            .zwnj,
+            .indic_conjunct_break_extend,
+            .indic_conjunct_break_linker,
+            => .extend,
+
+            // This is obviously not INVALID invalid, there is SOME grapheme
+            // boundary class for every codepoint. But we don't care about
+            // anything that doesn't fit into the above categories.
+            .other,
+            .indic_conjunct_break_consonant,
+            .cr,
+            .lf,
+            .control,
+            => .invalid,
+        };
+    }
+}
+
+const grapheme_boundary_class = config.Extension{
+    .inputs = &.{
+        "grapheme_break",
+        "is_emoji_modifier",
+        "is_emoji_modifier_base",
+    },
+    .compute = &computeGraphemeBoundaryClass,
+    .fields = &.{
+        .{ .name = "grapheme_boundary_class", .type = GraphemeBoundaryClass },
+    },
+};
+
 pub const tables = [_]config.Table{
    .{
        .extensions = &.{wcwidth},
@@ -14,9 +101,16 @@ pub const tables = [_]config.Table{
            d.field("case_folding_full"),
            // Alternative:
            // d.field("case_folding_simple"),
-            d.field("grapheme_break"),
            d.field("is_emoji_modifier"),
            d.field("is_emoji_modifier_base"),
+            d.field("grapheme_break"),
+        },
+    },
+    .{
+        .extensions = &.{ wcwidth, width, grapheme_boundary_class },
+        .fields = &.{
+            width.field("width"),
+            grapheme_boundary_class.field("grapheme_boundary_class"),
        },
    },
 };
--- a/src/simd/codepoint_width.zig
+++ b/src/simd/codepoint_width.zig
@@ -29,7 +29,8 @@ test "codepointWidth basic" {
 //     const uucode = @import("uucode");
 //
 //     const min = 0xFF + 1; // start outside ascii
-//     for (min..uucode.code_point_range_end) |cp| {
+//     const max = std.math.maxInt(u21) + 1;
+//     for (min..max) |cp| {
 //         const simd = codepointWidth(@intCast(cp));
 //         const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
 //         if (simd != uu) mismatch: {
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@@ -345,7 +345,7 @@ pub fn print(self: *Terminal, c: u21) !void {
            if (c == 0xFE0F or c == 0xFE0E) {
                // This only applies to emoji
                const prev_props = unicode.getProperties(prev.cell.content.codepoint);
-                const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
+                const emoji = unicode.isExtendedPictographic(prev_props.grapheme_boundary_class);
                if (!emoji) return;

                switch (c) {
--- a/src/unicode/grapheme.zig
+++ b/src/unicode/grapheme.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 const props = @import("props.zig");
 const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
 const table = props.table;
+const isExtendedPictographic = props.isExtendedPictographic;

 /// Determines if there is a grapheme break between two codepoints. This
 /// must be called sequentially maintaining the state between calls.
@@ -80,7 +81,7 @@ fn graphemeBreakClass(
    state: *BreakState,
 ) bool {
    // GB11: Emoji Extend* ZWJ x Emoji
-    if (!state.extended_pictographic and gbc1.isExtendedPictographic()) {
+    if (!state.extended_pictographic and isExtendedPictographic(gbc1)) {
        state.extended_pictographic = true;
    }

@@ -131,7 +132,7 @@ fn graphemeBreakClass(
    // GB11: Emoji Extend* ZWJ x Emoji
    if (state.extended_pictographic and
        gbc1 == .zwj and
-        gbc2.isExtendedPictographic())
+        isExtendedPictographic(gbc2))
    {
        state.extended_pictographic = false;
        return false;
--- a/src/unicode/main.zig
+++ b/src/unicode/main.zig
@@ -7,6 +7,7 @@ pub const Properties = props.Properties;
 pub const getProperties = props.get;
 pub const graphemeBreak = grapheme.graphemeBreak;
 pub const GraphemeBreakState = grapheme.BreakState;
+pub const isExtendedPictographic = props.isExtendedPictographic;

 test {
    @import("std").testing.refAllDecls(@This());
--- a/src/unicode/props.zig
+++ b/src/unicode/props.zig
@@ -6,10 +6,11 @@ const lut = @import("lut.zig");

 /// The lookup tables for Ghostty.
 pub const table = table: {
+    const Props = uucode.PackedTypeOf("1");
    // This is only available after running main() below as part of the Ghostty
    // build.zig, but due to Zig's lazy analysis we can still reference it here.
-    const generated = @import("unicode_tables").Tables(Properties);
-    const Tables = lut.Tables(Properties);
+    const generated = @import("unicode_tables").Tables(Props);
+    const Tables = lut.Tables(Props);
    break :table Tables{
        .stage1 = &generated.stage1,
        .stage2 = &generated.stage2,
@@ -61,81 +62,62 @@ pub const Properties = struct {
 /// Possible grapheme boundary classes. This isn't an exhaustive list:
 /// we omit control, CR, LF, etc. because in Ghostty's usage that are
 /// impossible because they're handled by the terminal.
-pub const GraphemeBoundaryClass = enum(u4) {
-    invalid,
-    L,
-    V,
-    T,
-    LV,
-    LVT,
-    prepend,
-    extend,
-    zwj,
-    spacing_mark,
-    regional_indicator,
-    extended_pictographic,
-    extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
-    emoji_modifier, // \p{Emoji_Modifier}
+pub const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);

-    /// Gets the grapheme boundary class for a codepoint.
-    /// The use case for this is only in generating lookup tables.
-    pub fn init(cp: u21) GraphemeBoundaryClass {
-        if (cp < uucode.code_point_range_end) {
-            if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
-            if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
+/// Gets the grapheme boundary class for a codepoint.
+/// The use case for this is only in generating lookup tables.
+pub fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
+    if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
+    if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;

-            return switch (uucode.get(.grapheme_break, cp)) {
-                .extended_pictographic => .extended_pictographic,
-                .l => .L,
-                .v => .V,
-                .t => .T,
-                .lv => .LV,
-                .lvt => .LVT,
-                .prepend => .prepend,
-                .zwj => .zwj,
-                .spacing_mark => .spacing_mark,
-                .regional_indicator => .regional_indicator,
+    return switch (uucode.get(.grapheme_break, cp)) {
+        .extended_pictographic => .extended_pictographic,
+        .l => .L,
+        .v => .V,
+        .t => .T,
+        .lv => .LV,
+        .lvt => .LVT,
+        .prepend => .prepend,
+        .zwj => .zwj,
+        .spacing_mark => .spacing_mark,
+        .regional_indicator => .regional_indicator,

-                .zwnj,
-                .indic_conjunct_break_extend,
-                .indic_conjunct_break_linker,
-                => .extend,
+        .zwnj,
+        .indic_conjunct_break_extend,
+        .indic_conjunct_break_linker,
+        => .extend,

-                // This is obviously not INVALID invalid, there is SOME grapheme
-                // boundary class for every codepoint. But we don't care about
-                // anything that doesn't fit into the above categories.
-                .other,
-                .indic_conjunct_break_consonant,
-                .cr,
-                .lf,
-                .control,
-                => .invalid,
-            };
-        } else {
-            return .invalid;
-        }
-    }
+        // This is obviously not INVALID invalid, there is SOME grapheme
+        // boundary class for every codepoint. But we don't care about
+        // anything that doesn't fit into the above categories.
+        .other,
+        .indic_conjunct_break_consonant,
+        .cr,
+        .lf,
+        .control,
+        => .invalid,
+    };
+}

-    /// Returns true if this is an extended pictographic type. This
-    /// should be used instead of comparing the enum value directly
-    /// because we classify multiple.
-    pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
-        return switch (self) {
-            .extended_pictographic,
-            .extended_pictographic_base,
-            => true,
+/// Returns true if this is an extended pictographic type. This
+/// should be used instead of comparing the enum value directly
+/// because we classify multiple.
+pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
+    return switch (self) {
+        .extended_pictographic,
+        .extended_pictographic_base,
+        => true,

-            else => false,
-        };
-    }
-};
+        else => false,
+    };
+}

 pub fn get(cp: u21) Properties {
-    const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;
+    const wcwidth = uucode.get(.wcwidth, cp);

    return .{
        .width = @intCast(@min(2, @max(0, wcwidth))),
-        .grapheme_boundary_class = .init(cp),
+        .grapheme_boundary_class = computeGraphemeBoundaryClass(cp),
    };
 }

@@ -145,6 +127,13 @@ pub fn main() !void {
    defer arena_state.deinit();
    const alloc = arena_state.allocator();

+    var args_iter = try std.process.argsWithAllocator(alloc);
+    defer args_iter.deinit();
+    _ = args_iter.skip(); // Skip program name
+
+    const output_path = args_iter.next() orelse std.debug.panic("No output file arg!", .{});
+    std.debug.print("Unicode tables output_path = {s}\n", .{output_path});
+
    const gen: lut.Generator(
        Properties,
        struct {
@@ -164,7 +153,10 @@ pub fn main() !void {
    defer alloc.free(t.stage1);
    defer alloc.free(t.stage2);
    defer alloc.free(t.stage3);
-    try t.writeZig(std.io.getStdOut().writer());
+    var out_file = try std.fs.cwd().createFile(output_path, .{});
+    defer out_file.close();
+    const writer = out_file.writer();
+    try t.writeZig(writer);

    // Uncomment when manually debugging to see our table sizes.
    // std.log.warn("stage1={} stage2={} stage3={}", .{
@@ -180,7 +172,8 @@ pub fn main() !void {
 //    const testing = std.testing;
 //
 //    const min = 0xFF + 1; // start outside ascii
-//    for (min..uucode.code_point_range_end) |cp| {
+//    const max = std.math.maxInt(u21) + 1;
+//    for (min..max) |cp| {
 //        const t = table.get(@intCast(cp));
 //        const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
 //        if (t.width != uu) {