trying a bunch of things to get performance to match

2025-10-01 15:38:35 +00:00 · 2025-09-06 10:42:02 -04:00
parent c67f51f3ee
commit 2af08bdbe3
10 changed files with 367 additions and 76 deletions
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -37,8 +37,8 @@
            .lazy = true,
        },
        .uucode = .{
-            .url = "https://github.com/jacobsandlund/uucode/archive/38b82297e69a3b2dc55dc8df25f3851be37f9327.tar.gz",
+            .url = "https://github.com/jacobsandlund/uucode/archive/69782fbe79e06a34ee177978d3479ed5801ce0af.tar.gz",
-            .hash = "uucode-0.0.0-ZZjBPiqdPwB-rG3ieaq3c6tMpnksWYs4_rGj2IvFGjjB",
+            .hash = "uucode-0.0.0-ZZjBPl_dPwC-BPhSJLID4Hs9O0zw-vZKGXdaOBFch8c8",
        },
        .zig_wayland = .{
            // codeberg ifreund/zig-wayland
--- a/src/benchmark/CodepointWidth.zig
+++ b/src/benchmark/CodepointWidth.zig
@@ -10,6 +10,7 @@ const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const Benchmark = @import("Benchmark.zig");
 const options = @import("options.zig");
 const uucode = @import("uucode");
 const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
 const simd = @import("../simd/main.zig");
 const table = @import("../unicode/main.zig").table;
@@ -47,6 +48,9 @@ pub const Mode = enum {
    /// Test our lookup table implementation.
    table,
    /// Using uucode, with custom `width` extension based on `wcwidth`.
    uucode,
 };
 /// Create a new terminal stream handler for the given arguments.
@@ -71,6 +75,7 @@ pub fn benchmark(self: *CodepointWidth) Benchmark {
            .wcwidth => stepWcwidth,
            .table => stepTable,
            .simd => stepSimd,
            .uucode => stepUucode,
        },
        .setupFn = setup,
        .teardownFn = teardown,
@@ -192,6 +197,41 @@ fn stepSimd(ptr: *anyopaque) Benchmark.Error!void {
    }
 }
 fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
    const self: *CodepointWidth = @ptrCast(@alignCast(ptr));
    const f = self.data_f orelse return;
    var r = std.io.bufferedReader(f.reader());
    var d: UTF8Decoder = .{};
    var buf: [4096]u8 = undefined;
    while (true) {
        const n = r.read(&buf) catch |err| {
            log.warn("error reading data file err={}", .{err});
            return error.BenchmarkFailed;
        };
        if (n == 0) break; // EOF reached
        for (buf[0..n]) |c| {
            const cp_, const consumed = d.next(c);
            assert(consumed);
            if (cp_) |cp| {
                // This is the same trick we do in terminal.zig so we
                // keep it here.
                const width = if (cp <= 0xFF)
                    1
                else
                    //uucode.getX(.width, @intCast(cp));
                    //uucode.getWidth(@intCast(cp));
                    uucode.getSpecial(@intCast(cp)).width;
                // Write the width to the buffer to avoid it being compiled
                // away
                buf[0] = @intCast(width);
            }
        }
    }
 }
 test CodepointWidth {
    const testing = std.testing;
    const alloc = testing.allocator;
--- a/src/benchmark/GraphemeBreak.zig
+++ b/src/benchmark/GraphemeBreak.zig
@@ -8,6 +8,7 @@ const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const Benchmark = @import("Benchmark.zig");
 const options = @import("options.zig");
 const uucode = @import("uucode");
 const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
 const unicode = @import("../unicode/main.zig");
@@ -38,6 +39,9 @@ pub const Mode = enum {
    /// Ghostty's table-based approach.
    table,
    /// Uucode
    uucode,
 };
 /// Create a new terminal stream handler for the given arguments.
@@ -60,6 +64,7 @@ pub fn benchmark(self: *GraphemeBreak) Benchmark {
        .stepFn = switch (self.opts.mode) {
            .noop => stepNoop,
            .table => stepTable,
            .uucode => stepUucode,
        },
        .setupFn = setup,
        .teardownFn = teardown,
@@ -134,6 +139,160 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
    }
 }
 const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);
 pub fn computeGraphemeBoundaryClass(
    gb1: GraphemeBoundaryClass,
    gb2: GraphemeBoundaryClass,
    state: *uucode.grapheme.BreakState,
 ) bool {
    // Set state back to default when `gb1` or `gb2` is not expected in sequence.
    switch (state.*) {
        .regional_indicator => {
            if (gb1 != .regional_indicator or gb2 != .regional_indicator) {
                state.* = .default;
            }
        },
        .extended_pictographic => {
            switch (gb1) {
                .extend,
                .zwj,
                .extended_pictographic,
                => {},
                else => state.* = .default,
            }
            switch (gb2) {
                .extend,
                .zwj,
                .extended_pictographic,
                => {},
                else => state.* = .default,
            }
        },
        .default, .indic_conjunct_break_consonant, .indic_conjunct_break_linker => {},
    }
    // GB6: L x (L | V | LV | VT)
    if (gb1 == .L) {
        if (gb2 == .L or
            gb2 == .V or
            gb2 == .LV or
            gb2 == .LVT) return false;
    }
    // GB7: (LV | V) x (V | T)
    if (gb1 == .LV or gb1 == .V) {
        if (gb2 == .V or gb2 == .T) return false;
    }
    // GB8: (LVT | T) x T
    if (gb1 == .LVT or gb1 == .T) {
        if (gb2 == .T) return false;
    }
    // Handle GB9 (Extend | ZWJ) later, since it can also match the start of
    // GB9c (Indic) and GB11 (Emoji ZWJ)
    // GB9a: SpacingMark
    if (gb2 == .spacing_mark) return false;
    // GB9b: Prepend
    if (gb1 == .prepend) return false;
    // GB11: Emoji ZWJ sequence
    if (gb1 == .extended_pictographic) {
        // start of sequence:
        // In normal operation, we'll be in this state, but
        // precomputeGraphemeBreak iterates all states.
        // std.debug.assert(state.* == .default);
        if (gb2 == .extend or gb2 == .zwj) {
            state.* = .extended_pictographic;
            return false;
        }
        // else, not an Emoji ZWJ sequence
    } else if (state.* == .extended_pictographic) {
        // continue or end sequence:
        if (gb1 == .extend and (gb2 == .extend or gb2 == .zwj)) {
            // continue extend* ZWJ sequence
            return false;
        } else if (gb1 == .zwj and gb2 == .extended_pictographic) {
            // ZWJ -> end of sequence
            state.* = .default;
            return false;
        } else {
            // Not a valid Emoji ZWJ sequence
            state.* = .default;
        }
    }
    // GB12 and GB13: Regional Indicator
    if (gb1 == .regional_indicator and gb2 == .regional_indicator) {
        if (state.* == .default) {
            state.* = .regional_indicator;
            return false;
        } else {
            state.* = .default;
            return true;
        }
    }
    // GB9: x (Extend | ZWJ)
    if (gb2 == .extend or gb2 == .zwj) return false;
    // GB999: Otherwise, break everywhere
    return true;
 }
 pub fn isBreak(
    cp1: u21,
    cp2: u21,
    state: *uucode.grapheme.BreakState,
 ) bool {
    const table = comptime uucode.grapheme.precomputeGraphemeBreak(
        GraphemeBoundaryClass,
        computeGraphemeBoundaryClass,
    );
    const gb1 = uucode.getX(.grapheme_boundary_class, cp1);
    const gb2 = uucode.getX(.grapheme_boundary_class, cp2);
    const result = table.get(gb1, gb2, state.*);
    state.* = result.state;
    return result.result;
 }
 fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
    const self: *GraphemeBreak = @ptrCast(@alignCast(ptr));
    const f = self.data_f orelse return;
    var r = std.io.bufferedReader(f.reader());
    var d: UTF8Decoder = .{};
    var state: uucode.grapheme.BreakState = .default;
    var cp1: u21 = 0;
    var buf: [4096]u8 = undefined;
    while (true) {
        const n = r.read(&buf) catch |err| {
            log.warn("error reading data file err={}", .{err});
            return error.BenchmarkFailed;
        };
        if (n == 0) break; // EOF reached
        for (buf[0..n]) |c| {
            const cp_, const consumed = d.next(c);
            assert(consumed);
            if (cp_) |cp2| {
                const v = isBreak(cp1, @intCast(cp2), &state);
                buf[0] = @intCast(@intFromBool(v));
                cp1 = cp2;
            }
        }
    }
 }
 test GraphemeBreak {
    const testing = std.testing;
    const alloc = testing.allocator;
--- a/src/build/UnicodeTables.zig
+++ b/src/build/UnicodeTables.zig
@@ -24,14 +24,16 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
    if (b.lazyDependency("uucode", .{
        .target = b.graph.host,
        .@"tables.zig" = uucode_tables_zig,
        .build_config_path = b.path("src/build/uucode_config.zig"),
    })) |dep| {
        exe.root_module.addImport("uucode", dep.module("uucode"));
    }
    const run = b.addRunArtifact(exe);
    const output = run.addOutputFileArg("tables.zig");
    return .{
        .exe = exe,
-        .output = run.captureStdOut(),
+        .output = output,
    };
 }
--- a/src/build/uucode_config.zig
+++ b/src/build/uucode_config.zig
@@ -3,6 +3,93 @@ const config_x = @import("config.x.zig");
 const d = config.default;
 const wcwidth = config_x.wcwidth;
 pub const log_level = .debug;
 fn computeWidth(cp: u21, data: anytype, backing: anytype, tracking: anytype) void {
    _ = cp;
    _ = backing;
    _ = tracking;
    if (data.wcwidth < 0) {
        data.width = 0;
    } else if (data.wcwidth > 2) {
        data.width = 2;
    } else {
        data.width = @intCast(data.wcwidth);
    }
 }
 const width = config.Extension{ .inputs = &.{"wcwidth"}, .compute = &computeWidth, .fields = &.{
    .{ .name = "width", .type = u2 },
 } };
 pub const GraphemeBoundaryClass = enum(u4) {
    invalid,
    L,
    V,
    T,
    LV,
    LVT,
    prepend,
    extend,
    zwj,
    spacing_mark,
    regional_indicator,
    extended_pictographic,
    extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
    emoji_modifier, // \p{Emoji_Modifier}
 };
 fn computeGraphemeBoundaryClass(cp: u21, data: anytype, backing: anytype, tracking: anytype) void {
    _ = cp;
    _ = backing;
    _ = tracking;
    if (data.is_emoji_modifier) {
        data.grapheme_boundary_class = .emoji_modifier;
    } else if (data.is_emoji_modifier_base) {
        data.grapheme_boundary_class = .extended_pictographic_base;
    } else {
        data.grapheme_boundary_class = switch (data.grapheme_break) {
            .extended_pictographic => .extended_pictographic,
            .l => .L,
            .v => .V,
            .t => .T,
            .lv => .LV,
            .lvt => .LVT,
            .prepend => .prepend,
            .zwj => .zwj,
            .spacing_mark => .spacing_mark,
            .regional_indicator => .regional_indicator,
            .zwnj,
            .indic_conjunct_break_extend,
            .indic_conjunct_break_linker,
            => .extend,
            // This is obviously not INVALID invalid, there is SOME grapheme
            // boundary class for every codepoint. But we don't care about
            // anything that doesn't fit into the above categories.
            .other,
            .indic_conjunct_break_consonant,
            .cr,
            .lf,
            .control,
            => .invalid,
        };
    }
 }
 const grapheme_boundary_class = config.Extension{
    .inputs = &.{
        "grapheme_break",
        "is_emoji_modifier",
        "is_emoji_modifier_base",
    },
    .compute = &computeGraphemeBoundaryClass,
    .fields = &.{
        .{ .name = "grapheme_boundary_class", .type = GraphemeBoundaryClass },
    },
 };
 pub const tables = [_]config.Table{
    .{
        .extensions = &.{wcwidth},
@@ -14,9 +101,16 @@ pub const tables = [_]config.Table{
            d.field("case_folding_full"),
            // Alternative:
            // d.field("case_folding_simple"),
            d.field("grapheme_break"),
            d.field("is_emoji_modifier"),
            d.field("is_emoji_modifier_base"),
            d.field("grapheme_break"),
        },
    },
    .{
        .extensions = &.{ wcwidth, width, grapheme_boundary_class },
        .fields = &.{
            width.field("width"),
            grapheme_boundary_class.field("grapheme_boundary_class"),
        },
    },
 };
--- a/src/simd/codepoint_width.zig
+++ b/src/simd/codepoint_width.zig
@@ -29,7 +29,8 @@ test "codepointWidth basic" {
 //     const uucode = @import("uucode");
 //
 //     const min = 0xFF + 1; // start outside ascii
-//     for (min..uucode.code_point_range_end) |cp| {
+//     const max = std.math.maxInt(u21) + 1;
 //     for (min..max) |cp| {
 //         const simd = codepointWidth(@intCast(cp));
 //         const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
 //         if (simd != uu) mismatch: {
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@@ -345,7 +345,7 @@ pub fn print(self: *Terminal, c: u21) !void {
            if (c == 0xFE0F or c == 0xFE0E) {
                // This only applies to emoji
                const prev_props = unicode.getProperties(prev.cell.content.codepoint);
-                const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
+                const emoji = unicode.isExtendedPictographic(prev_props.grapheme_boundary_class);
                if (!emoji) return;
                switch (c) {
--- a/src/unicode/grapheme.zig
+++ b/src/unicode/grapheme.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 const props = @import("props.zig");
 const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
 const table = props.table;
 const isExtendedPictographic = props.isExtendedPictographic;
 /// Determines if there is a grapheme break between two codepoints. This
 /// must be called sequentially maintaining the state between calls.
@@ -80,7 +81,7 @@ fn graphemeBreakClass(
    state: *BreakState,
 ) bool {
    // GB11: Emoji Extend* ZWJ x Emoji
-    if (!state.extended_pictographic and gbc1.isExtendedPictographic()) {
+    if (!state.extended_pictographic and isExtendedPictographic(gbc1)) {
        state.extended_pictographic = true;
    }
@@ -131,7 +132,7 @@ fn graphemeBreakClass(
    // GB11: Emoji Extend* ZWJ x Emoji
    if (state.extended_pictographic and
        gbc1 == .zwj and
-        gbc2.isExtendedPictographic())
+        isExtendedPictographic(gbc2))
    {
        state.extended_pictographic = false;
        return false;
--- a/src/unicode/main.zig
+++ b/src/unicode/main.zig
@@ -7,6 +7,7 @@ pub const Properties = props.Properties;
 pub const getProperties = props.get;
 pub const graphemeBreak = grapheme.graphemeBreak;
 pub const GraphemeBreakState = grapheme.BreakState;
 pub const isExtendedPictographic = props.isExtendedPictographic;
 test {
    @import("std").testing.refAllDecls(@This());
--- a/src/unicode/props.zig
+++ b/src/unicode/props.zig
@@ -6,10 +6,11 @@ const lut = @import("lut.zig");
 /// The lookup tables for Ghostty.
 pub const table = table: {
    const Props = uucode.PackedTypeOf("1");
    // This is only available after running main() below as part of the Ghostty
    // build.zig, but due to Zig's lazy analysis we can still reference it here.
-    const generated = @import("unicode_tables").Tables(Properties);
+    const generated = @import("unicode_tables").Tables(Props);
-    const Tables = lut.Tables(Properties);
+    const Tables = lut.Tables(Props);
    break :table Tables{
        .stage1 = &generated.stage1,
        .stage2 = &generated.stage2,
@@ -61,81 +62,62 @@ pub const Properties = struct {
 /// Possible grapheme boundary classes. This isn't an exhaustive list:
 /// we omit control, CR, LF, etc. because in Ghostty's usage that are
 /// impossible because they're handled by the terminal.
-pub const GraphemeBoundaryClass = enum(u4) {
+pub const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);
    invalid,
    L,
    V,
    T,
    LV,
    LVT,
    prepend,
    extend,
    zwj,
    spacing_mark,
    regional_indicator,
    extended_pictographic,
    extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
    emoji_modifier, // \p{Emoji_Modifier}
-    /// Gets the grapheme boundary class for a codepoint.
+/// Gets the grapheme boundary class for a codepoint.
-    /// The use case for this is only in generating lookup tables.
+/// The use case for this is only in generating lookup tables.
-    pub fn init(cp: u21) GraphemeBoundaryClass {
+pub fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
-        if (cp < uucode.code_point_range_end) {
+    if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
-            if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
+    if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
            if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
-            return switch (uucode.get(.grapheme_break, cp)) {
+    return switch (uucode.get(.grapheme_break, cp)) {
-                .extended_pictographic => .extended_pictographic,
+        .extended_pictographic => .extended_pictographic,
-                .l => .L,
+        .l => .L,
-                .v => .V,
+        .v => .V,
-                .t => .T,
+        .t => .T,
-                .lv => .LV,
+        .lv => .LV,
-                .lvt => .LVT,
+        .lvt => .LVT,
-                .prepend => .prepend,
+        .prepend => .prepend,
-                .zwj => .zwj,
+        .zwj => .zwj,
-                .spacing_mark => .spacing_mark,
+        .spacing_mark => .spacing_mark,
-                .regional_indicator => .regional_indicator,
+        .regional_indicator => .regional_indicator,
-                .zwnj,
+        .zwnj,
-                .indic_conjunct_break_extend,
+        .indic_conjunct_break_extend,
-                .indic_conjunct_break_linker,
+        .indic_conjunct_break_linker,
-                => .extend,
+        => .extend,
-                // This is obviously not INVALID invalid, there is SOME grapheme
+        // This is obviously not INVALID invalid, there is SOME grapheme
-                // boundary class for every codepoint. But we don't care about
+        // boundary class for every codepoint. But we don't care about
-                // anything that doesn't fit into the above categories.
+        // anything that doesn't fit into the above categories.
-                .other,
+        .other,
-                .indic_conjunct_break_consonant,
+        .indic_conjunct_break_consonant,
-                .cr,
+        .cr,
-                .lf,
+        .lf,
-                .control,
+        .control,
-                => .invalid,
+        => .invalid,
-            };
+    };
-        } else {
+}
            return .invalid;
        }
    }
-    /// Returns true if this is an extended pictographic type. This
+/// Returns true if this is an extended pictographic type. This
-    /// should be used instead of comparing the enum value directly
+/// should be used instead of comparing the enum value directly
-    /// because we classify multiple.
+/// because we classify multiple.
-    pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
+pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
-        return switch (self) {
+    return switch (self) {
-            .extended_pictographic,
+        .extended_pictographic,
-            .extended_pictographic_base,
+        .extended_pictographic_base,
-            => true,
+        => true,
-            else => false,
+        else => false,
-        };
+    };
-    }
+}
 };
 pub fn get(cp: u21) Properties {
-    const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;
+    const wcwidth = uucode.get(.wcwidth, cp);
    return .{
        .width = @intCast(@min(2, @max(0, wcwidth))),
-        .grapheme_boundary_class = .init(cp),
+        .grapheme_boundary_class = computeGraphemeBoundaryClass(cp),
    };
 }
@@ -145,6 +127,13 @@ pub fn main() !void {
    defer arena_state.deinit();
    const alloc = arena_state.allocator();
    var args_iter = try std.process.argsWithAllocator(alloc);
    defer args_iter.deinit();
    _ = args_iter.skip(); // Skip program name
    const output_path = args_iter.next() orelse std.debug.panic("No output file arg!", .{});
    std.debug.print("Unicode tables output_path = {s}\n", .{output_path});
    const gen: lut.Generator(
        Properties,
        struct {
@@ -164,7 +153,10 @@ pub fn main() !void {
    defer alloc.free(t.stage1);
    defer alloc.free(t.stage2);
    defer alloc.free(t.stage3);
-    try t.writeZig(std.io.getStdOut().writer());
+    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
    const writer = out_file.writer();
    try t.writeZig(writer);
    // Uncomment when manually debugging to see our table sizes.
    // std.log.warn("stage1={} stage2={} stage3={}", .{
@@ -180,7 +172,8 @@ pub fn main() !void {
 //    const testing = std.testing;
 //
 //    const min = 0xFF + 1; // start outside ascii
-//    for (min..uucode.code_point_range_end) |cp| {
+//    const max = std.math.maxInt(u21) + 1;
 //    for (min..max) |cp| {
 //        const t = table.get(@intCast(cp));
 //        const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
 //        if (t.width != uu) {