From c3994347c079151fa22ce28e7e60bdfd9dcd1f44 Mon Sep 17 00:00:00 2001 From: Jacob Sandlund Date: Sat, 6 Sep 2025 14:55:21 -0400 Subject: [PATCH] doNotOptimizeAway --- build.zig.zon | 4 ++-- src/benchmark/CodepointWidth.zig | 28 ++++++---------------------- src/benchmark/GraphemeBreak.zig | 25 +++++++++++++++---------- src/benchmark/IsSymbol.zig | 10 +++++----- src/build/UnicodeTables.zig | 5 ++--- 5 files changed, 30 insertions(+), 42 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index ba128f853..4b2ef813a 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -37,8 +37,8 @@ .lazy = true, }, .uucode = .{ - .url = "https://github.com/jacobsandlund/uucode/archive/69782fbe79e06a34ee177978d3479ed5801ce0af.tar.gz", - .hash = "uucode-0.0.0-ZZjBPl_dPwC-BPhSJLID4Hs9O0zw-vZKGXdaOBFch8c8", + .url = "https://github.com/jacobsandlund/uucode/archive/8a4e07adbcb70bd45fbb70520dbbca6df44ec083.tar.gz", + .hash = "uucode-0.0.0-ZZjBPuTdPwBOU3VAvAT6XMbmj1QL1IA7OtMraVMB5j_0", }, .zig_wayland = .{ // codeberg ifreund/zig-wayland diff --git a/src/benchmark/CodepointWidth.zig b/src/benchmark/CodepointWidth.zig index b6c719184..d175b69e9 100644 --- a/src/benchmark/CodepointWidth.zig +++ b/src/benchmark/CodepointWidth.zig @@ -126,11 +126,7 @@ fn stepWcwidth(ptr: *anyopaque) Benchmark.Error!void { const cp_, const consumed = d.next(c); assert(consumed); if (cp_) |cp| { - const width = wcwidth(cp); - - // Write the width to the buffer to avoid it being compiled - // away - buf[0] = @intCast(width); + std.mem.doNotOptimizeAway(wcwidth(cp)); } } } @@ -156,14 +152,10 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void { if (cp_) |cp| { // This is the same trick we do in terminal.zig so we // keep it here. - const width = if (cp <= 0xFF) + std.mem.doNotOptimizeAway(if (cp <= 0xFF) 1 else - table.get(@intCast(cp)).width; - - // Write the width to the buffer to avoid it being compiled - // away - buf[0] = @intCast(width); + table.get(@intCast(cp)).width); } } } @@ -187,11 +179,7 @@ fn stepSimd(ptr: *anyopaque) Benchmark.Error!void { const cp_, const consumed = d.next(c); assert(consumed); if (cp_) |cp| { - const width = simd.codepointWidth(cp); - - // Write the width to the buffer to avoid it being compiled - // away - buf[0] = @intCast(width); + std.mem.doNotOptimizeAway(simd.codepointWidth(cp)); } } } @@ -217,16 +205,12 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void { if (cp_) |cp| { // This is the same trick we do in terminal.zig so we // keep it here. - const width = if (cp <= 0xFF) + std.mem.doNotOptimizeAway(if (cp <= 0xFF) 1 else //uucode.getX(.width, @intCast(cp)); //uucode.getWidth(@intCast(cp)); - uucode.getSpecial(@intCast(cp)).width; - - // Write the width to the buffer to avoid it being compiled - // away - buf[0] = @intCast(width); + uucode.getSpecial(@intCast(cp)).width); } } } diff --git a/src/benchmark/GraphemeBreak.zig b/src/benchmark/GraphemeBreak.zig index 105371ea5..9bbfc469c 100644 --- a/src/benchmark/GraphemeBreak.zig +++ b/src/benchmark/GraphemeBreak.zig @@ -21,7 +21,7 @@ data_f: ?std.fs.File = null, pub const Options = struct { /// The type of codepoint width calculation to use. - mode: Mode = .table, + mode: Mode = .noop, /// The data to read as a filepath. If this is "-" then /// we will read stdin. If this is unset, then we will @@ -40,7 +40,7 @@ pub const Mode = enum { /// Ghostty's table-based approach. table, - /// Uucode + /// uucode implementation uucode, }; @@ -131,8 +131,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void { const cp_, const consumed = d.next(c); assert(consumed); if (cp_) |cp2| { - const v = unicode.graphemeBreak(cp1, @intCast(cp2), &state); - buf[0] = @intCast(@intFromBool(v)); + std.mem.doNotOptimizeAway(unicode.graphemeBreak(cp1, @intCast(cp2), &state)); cp1 = cp2; } } @@ -141,10 +140,16 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void { const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class); +const BreakState = enum(u3) { + default, + regional_indicator, + extended_pictographic, +}; + pub fn computeGraphemeBoundaryClass( gb1: GraphemeBoundaryClass, gb2: GraphemeBoundaryClass, - state: *uucode.grapheme.BreakState, + state: *BreakState, ) bool { // Set state back to default when `gb1` or `gb2` is not expected in sequence. switch (state.*) { @@ -172,7 +177,7 @@ pub fn computeGraphemeBoundaryClass( else => state.* = .default, } }, - .default, .indic_conjunct_break_consonant, .indic_conjunct_break_linker => {}, + .default => {}, } // GB6: L x (L | V | LV | VT) @@ -252,10 +257,11 @@ pub fn computeGraphemeBoundaryClass( pub fn isBreak( cp1: u21, cp2: u21, - state: *uucode.grapheme.BreakState, + state: *BreakState, ) bool { const table = comptime uucode.grapheme.precomputeGraphemeBreak( GraphemeBoundaryClass, + BreakState, computeGraphemeBoundaryClass, ); const gb1 = uucode.getX(.grapheme_boundary_class, cp1); @@ -271,7 +277,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void { const f = self.data_f orelse return; var r = std.io.bufferedReader(f.reader()); var d: UTF8Decoder = .{}; - var state: uucode.grapheme.BreakState = .default; + var state: BreakState = .default; var cp1: u21 = 0; var buf: [4096]u8 = undefined; while (true) { @@ -285,8 +291,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void { const cp_, const consumed = d.next(c); assert(consumed); if (cp_) |cp2| { - const v = isBreak(cp1, @intCast(cp2), &state); - buf[0] = @intCast(@intFromBool(v)); + std.mem.doNotOptimizeAway(isBreak(cp1, @intCast(cp2), &state)); cp1 = cp2; } } diff --git a/src/benchmark/IsSymbol.zig b/src/benchmark/IsSymbol.zig index 940207619..368a0570e 100644 --- a/src/benchmark/IsSymbol.zig +++ b/src/benchmark/IsSymbol.zig @@ -21,7 +21,7 @@ data_f: ?std.fs.File = null, pub const Options = struct { /// Which test to run. - mode: Mode = .ziglyph, + mode: Mode = .uucode, /// The data to read as a filepath. If this is "-" then /// we will read stdin. If this is unset, then we will @@ -32,8 +32,8 @@ pub const Options = struct { }; pub const Mode = enum { - /// "Naive" ziglyph implementation. - ziglyph, + /// uucode implementation + uucode, /// Ghostty's table-based approach. table, @@ -57,7 +57,7 @@ pub fn destroy(self: *IsSymbol, alloc: Allocator) void { pub fn benchmark(self: *IsSymbol) Benchmark { return .init(self, .{ .stepFn = switch (self.opts.mode) { - .ziglyph => stepZiglyph, + .uucode => stepUucode, .table => stepTable, }, .setupFn = setup, @@ -85,7 +85,7 @@ fn teardown(ptr: *anyopaque) void { } } -fn stepZiglyph(ptr: *anyopaque) Benchmark.Error!void { +fn stepUucode(ptr: *anyopaque) Benchmark.Error!void { const self: *IsSymbol = @ptrCast(@alignCast(ptr)); const f = self.data_f orelse return; diff --git a/src/build/UnicodeTables.zig b/src/build/UnicodeTables.zig index d71c5ca95..dc3fa2cb3 100644 --- a/src/build/UnicodeTables.zig +++ b/src/build/UnicodeTables.zig @@ -46,14 +46,13 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables const props_run = b.addRunArtifact(props_exe); const symbols_run = b.addRunArtifact(symbols_exe); - const props_output = props_run.addOutputFileArg("tables.zig"); - const symbols_output = symbols_run.addOutputFileArg("tables.zig"); + const props_output = props_run.addOutputFileArg("props_table.zig"); return .{ .props_exe = props_exe, .symbols_exe = symbols_exe, .props_output = props_output, - .symbols_output = symbols_output, + .symbols_output = symbols_run.captureStdOut(), }; }