From a7da96faeea305d5ff1758a98557a42afe0fed32 Mon Sep 17 00:00:00 2001
From: "Jeffrey C. Ollie" <jeff@ocjtech.us>
Date: Thu, 4 Sep 2025 23:04:08 -0500
Subject: [PATCH] add two LUT-based implementations of isSymbol

---
 src/benchmark/IsSymbol.zig  | 172 +++++++++++++++++++++++++++++++++
 src/benchmark/cli.zig       |   2 +
 src/benchmark/main.zig      |   1 +
 src/build/Config.zig        |   7 ++
 src/build/SharedDeps.zig    |   1 +
 src/build/UnicodeTables.zig |  73 +++++++++++---
 src/renderer/cell.zig       |  12 +--
 src/unicode/lut.zig         |  26 +++++
 src/unicode/lut2.zig        | 183 ++++++++++++++++++++++++++++++++++++
 src/unicode/main.zig        |   2 +
 src/unicode/props.zig       |   2 +-
 src/unicode/symbols1.zig    |  93 ++++++++++++++++++
 src/unicode/symbols2.zig    |  85 +++++++++++++++++
 13 files changed, 634 insertions(+), 25 deletions(-)
 create mode 100644 src/benchmark/IsSymbol.zig
 create mode 100644 src/unicode/lut2.zig
 create mode 100644 src/unicode/symbols1.zig
 create mode 100644 src/unicode/symbols2.zig

diff --git a/src/benchmark/IsSymbol.zig b/src/benchmark/IsSymbol.zig
new file mode 100644
index 000000000..46ebb8c66
--- /dev/null
+++ b/src/benchmark/IsSymbol.zig
@@ -0,0 +1,172 @@
+//! This benchmark tests the throughput of grapheme break calculation.
+//! This is a common operation in terminal character printing for terminals
+//! that support grapheme clustering.
+const IsSymbol = @This();
+
+const std = @import("std");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const Benchmark = @import("Benchmark.zig");
+const options = @import("options.zig");
+const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
+const symbols1 = @import("../unicode/symbols1.zig");
+const symbols2 = @import("../unicode/symbols2.zig");
+
+const log = std.log.scoped(.@"is-symbol-bench");
+
+opts: Options,
+
+/// The file, opened in the setup function.
+data_f: ?std.fs.File = null,
+
+pub const Options = struct {
+    /// Which test to run.
+    mode: Mode = .ziglyph,
+
+    /// The data to read as a filepath. If this is "-" then
+    /// we will read stdin. If this is unset, then we will
+    /// do nothing (benchmark is a noop). It'd be more unixy to
+    /// use stdin by default but I find that a hanging CLI command
+    /// with no interaction is a bit annoying.
+    data: ?[]const u8 = null,
+};
+
+pub const Mode = enum {
+    /// "Naive" ziglyph implementation.
+    ziglyph,
+
+    /// Ghostty's table-based approach.
+    table1,
+    table2,
+};
+
+/// Create a new terminal stream handler for the given arguments.
+pub fn create(
+    alloc: Allocator,
+    opts: Options,
+) !*IsSymbol {
+    const ptr = try alloc.create(IsSymbol);
+    errdefer alloc.destroy(ptr);
+    ptr.* = .{ .opts = opts };
+    return ptr;
+}
+
+pub fn destroy(self: *IsSymbol, alloc: Allocator) void {
+    alloc.destroy(self);
+}
+
+pub fn benchmark(self: *IsSymbol) Benchmark {
+    return .init(self, .{
+        .stepFn = switch (self.opts.mode) {
+            .ziglyph => stepZiglyph,
+            .table1 => stepTable1,
+            .table2 => stepTable1,
+        },
+        .setupFn = setup,
+        .teardownFn = teardown,
+    });
+}
+
+fn setup(ptr: *anyopaque) Benchmark.Error!void {
+    const self: *IsSymbol = @ptrCast(@alignCast(ptr));
+
+    // Open our data file to prepare for reading. We can do more
+    // validation here eventually.
+    assert(self.data_f == null);
+    self.data_f = options.dataFile(self.opts.data) catch |err| {
+        log.warn("error opening data file err={}", .{err});
+        return error.BenchmarkFailed;
+    };
+}
+
+fn teardown(ptr: *anyopaque) void {
+    const self: *IsSymbol = @ptrCast(@alignCast(ptr));
+    if (self.data_f) |f| {
+        f.close();
+        self.data_f = null;
+    }
+}
+
+fn stepZiglyph(ptr: *anyopaque) Benchmark.Error!void {
+    const self: *IsSymbol = @ptrCast(@alignCast(ptr));
+
+    const f = self.data_f orelse return;
+    var r = std.io.bufferedReader(f.reader());
+    var d: UTF8Decoder = .{};
+    var buf: [4096]u8 = undefined;
+    while (true) {
+        const n = r.read(&buf) catch |err| {
+            log.warn("error reading data file err={}", .{err});
+            return error.BenchmarkFailed;
+        };
+        if (n == 0) break; // EOF reached
+
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp| {
+                std.mem.doNotOptimizeAway(symbols1.isSymbol(cp));
+            }
+        }
+    }
+}
+
+fn stepTable1(ptr: *anyopaque) Benchmark.Error!void {
+    const self: *IsSymbol = @ptrCast(@alignCast(ptr));
+
+    const f = self.data_f orelse return;
+    var r = std.io.bufferedReader(f.reader());
+    var d: UTF8Decoder = .{};
+    var buf: [4096]u8 = undefined;
+    while (true) {
+        const n = r.read(&buf) catch |err| {
+            log.warn("error reading data file err={}", .{err});
+            return error.BenchmarkFailed;
+        };
+        if (n == 0) break; // EOF reached
+
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp| {
+                std.mem.doNotOptimizeAway(symbols1.table.get(cp));
+            }
+        }
+    }
+}
+
+fn stepTable2(ptr: *anyopaque) Benchmark.Error!void {
+    const self: *IsSymbol = @ptrCast(@alignCast(ptr));
+
+    const f = self.data_f orelse return;
+    var r = std.io.bufferedReader(f.reader());
+    var d: UTF8Decoder = .{};
+    var buf: [4096]u8 = undefined;
+    while (true) {
+        const n = r.read(&buf) catch |err| {
+            log.warn("error reading data file err={}", .{err});
+            return error.BenchmarkFailed;
+        };
+        if (n == 0) break; // EOF reached
+
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp| {
+                std.mem.doNotOptimizeAway(symbols2.table.get(cp));
+            }
+        }
+    }
+}
+
+test IsSymbol {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    const impl: *IsSymbol = try .create(alloc, .{});
+    defer impl.destroy(alloc);
+
+    const bench = impl.benchmark();
+    _ = try bench.run(.once);
+}
diff --git a/src/benchmark/cli.zig b/src/benchmark/cli.zig
index 97bb9c683..3b1c905eb 100644
--- a/src/benchmark/cli.zig
+++ b/src/benchmark/cli.zig
@@ -10,6 +10,7 @@ pub const Action = enum {
     @"grapheme-break",
     @"terminal-parser",
     @"terminal-stream",
+    @"is-symbol",
 
     /// Returns the struct associated with the action. The struct
     /// should have a few decls:
@@ -25,6 +26,7 @@ pub const Action = enum {
             .@"codepoint-width" => @import("CodepointWidth.zig"),
             .@"grapheme-break" => @import("GraphemeBreak.zig"),
             .@"terminal-parser" => @import("TerminalParser.zig"),
+            .@"is-symbol" => @import("IsSymbol.zig"),
         };
     }
 };
diff --git a/src/benchmark/main.zig b/src/benchmark/main.zig
index 49bb17289..3a59125fc 100644
--- a/src/benchmark/main.zig
+++ b/src/benchmark/main.zig
@@ -5,6 +5,7 @@ pub const TerminalStream = @import("TerminalStream.zig");
 pub const CodepointWidth = @import("CodepointWidth.zig");
 pub const GraphemeBreak = @import("GraphemeBreak.zig");
 pub const TerminalParser = @import("TerminalParser.zig");
+pub const IsSymbol = @import("IsSymbol.zig");
 
 test {
     @import("std").testing.refAllDecls(@This());
diff --git a/src/build/Config.zig b/src/build/Config.zig
index fd892f16c..b11e8850d 100644
--- a/src/build/Config.zig
+++ b/src/build/Config.zig
@@ -61,6 +61,7 @@ emit_termcap: bool = false,
 emit_test_exe: bool = false,
 emit_xcframework: bool = false,
 emit_webdata: bool = false,
+emit_unicode_table_gen: bool = false,
 
 /// Environmental properties
 env: std.process.EnvMap,
@@ -299,6 +300,12 @@ pub fn init(b: *std.Build) !Config {
         "Build and install test executables with 'build'",
     ) orelse false;
 
+    config.emit_unicode_table_gen = b.option(
+        bool,
+        "emit-unicode-table-gen",
+        "Build and install executables that generate unicode tables with 'build'",
+    ) orelse false;
+
     config.emit_bench = b.option(
         bool,
         "emit-bench",
diff --git a/src/build/SharedDeps.zig b/src/build/SharedDeps.zig
index 86390a496..af826d964 100644
--- a/src/build/SharedDeps.zig
+++ b/src/build/SharedDeps.zig
@@ -31,6 +31,7 @@ pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps {
         .metallib = undefined,
     };
     try result.initTarget(b, cfg.target);
+    if (cfg.emit_unicode_table_gen) result.unicode_tables.install(b);
     return result;
 }
 
diff --git a/src/build/UnicodeTables.zig b/src/build/UnicodeTables.zig
index 5bba2341b..dd9a6bdf2 100644
--- a/src/build/UnicodeTables.zig
+++ b/src/build/UnicodeTables.zig
@@ -4,14 +4,18 @@ const std = @import("std");
 const Config = @import("Config.zig");
 
 /// The exe.
-exe: *std.Build.Step.Compile,
+props_exe: *std.Build.Step.Compile,
+symbols1_exe: *std.Build.Step.Compile,
+symbols2_exe: *std.Build.Step.Compile,
 
 /// The output path for the unicode tables
-output: std.Build.LazyPath,
+props_output: std.Build.LazyPath,
+symbols1_output: std.Build.LazyPath,
+symbols2_output: std.Build.LazyPath,
 
 pub fn init(b: *std.Build) !UnicodeTables {
-    const exe = b.addExecutable(.{
-        .name = "unigen",
+    const props_exe = b.addExecutable(.{
+        .name = "props-unigen",
         .root_module = b.createModule(.{
             .root_source_file = b.path("src/unicode/props.zig"),
             .target = b.graph.host,
@@ -21,31 +25,72 @@ pub fn init(b: *std.Build) !UnicodeTables {
         }),
     });
 
+    const symbols1_exe = b.addExecutable(.{
+        .name = "symbols1-unigen",
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/unicode/symbols1.zig"),
+            .target = b.graph.host,
+            .strip = false,
+            .omit_frame_pointer = false,
+            .unwind_tables = .sync,
+        }),
+    });
+
+    const symbols2_exe = b.addExecutable(.{
+        .name = "symbols2-unigen",
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/unicode/symbols2.zig"),
+            .target = b.graph.host,
+            .strip = false,
+            .omit_frame_pointer = false,
+            .unwind_tables = .sync,
+        }),
+    });
+
     if (b.lazyDependency("ziglyph", .{
         .target = b.graph.host,
     })) |ziglyph_dep| {
-        exe.root_module.addImport(
-            "ziglyph",
-            ziglyph_dep.module("ziglyph"),
-        );
+        inline for (&.{ props_exe, symbols1_exe, symbols2_exe }) |exe| {
+            exe.root_module.addImport(
+                "ziglyph",
+                ziglyph_dep.module("ziglyph"),
+            );
+        }
     }
 
-    const run = b.addRunArtifact(exe);
+    const props_run = b.addRunArtifact(props_exe);
+    const symbols1_run = b.addRunArtifact(symbols1_exe);
+    const symbols2_run = b.addRunArtifact(symbols2_exe);
+
     return .{
-        .exe = exe,
-        .output = run.captureStdOut(),
+        .props_exe = props_exe,
+        .symbols1_exe = symbols1_exe,
+        .symbols2_exe = symbols2_exe,
+        .props_output = props_run.captureStdOut(),
+        .symbols1_output = symbols1_run.captureStdOut(),
+        .symbols2_output = symbols2_run.captureStdOut(),
     };
 }
 
 /// Add the "unicode_tables" import.
 pub fn addImport(self: *const UnicodeTables, step: *std.Build.Step.Compile) void {
-    self.output.addStepDependencies(&step.step);
+    self.props_output.addStepDependencies(&step.step);
     step.root_module.addAnonymousImport("unicode_tables", .{
-        .root_source_file = self.output,
+        .root_source_file = self.props_output,
+    });
+    self.symbols1_output.addStepDependencies(&step.step);
+    step.root_module.addAnonymousImport("symbols1_tables", .{
+        .root_source_file = self.symbols1_output,
+    });
+    self.symbols2_output.addStepDependencies(&step.step);
+    step.root_module.addAnonymousImport("symbols2_tables", .{
+        .root_source_file = self.symbols2_output,
     });
 }
 
 /// Install the exe
 pub fn install(self: *const UnicodeTables, b: *std.Build) void {
-    b.installArtifact(self.exe);
+    b.installArtifact(self.props_exe);
+    b.installArtifact(self.symbols1_exe);
+    b.installArtifact(self.symbols2_exe);
 }
diff --git a/src/renderer/cell.zig b/src/renderer/cell.zig
index ec13b8953..a75fddf52 100644
--- a/src/renderer/cell.zig
+++ b/src/renderer/cell.zig
@@ -1,12 +1,12 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
-const ziglyph = @import("ziglyph");
 const font = @import("../font/main.zig");
 const terminal = @import("../terminal/main.zig");
 const renderer = @import("../renderer.zig");
 const shaderpkg = renderer.Renderer.API.shaders;
 const ArrayListCollection = @import("../datastruct/array_list_collection.zig").ArrayListCollection;
+const symbols = @import("../unicode/symbols1.zig").table;
 
 /// The possible cell content keys that exist.
 pub const Key = enum {
@@ -249,15 +249,7 @@ pub fn isCovering(cp: u21) bool {
 /// In the future it may be prudent to expand this to encompass more
 /// symbol-like characters, and/or exclude some PUA sections.
 pub fn isSymbol(cp: u21) bool {
-    // TODO: This should probably become a codegen'd LUT
-    return ziglyph.general_category.isPrivateUse(cp) or
-        ziglyph.blocks.isDingbats(cp) or
-        ziglyph.blocks.isEmoticons(cp) or
-        ziglyph.blocks.isMiscellaneousSymbols(cp) or
-        ziglyph.blocks.isEnclosedAlphanumerics(cp) or
-        ziglyph.blocks.isEnclosedAlphanumericSupplement(cp) or
-        ziglyph.blocks.isMiscellaneousSymbolsAndPictographs(cp) or
-        ziglyph.blocks.isTransportAndMapSymbols(cp);
+    return symbols.get(cp);
 }
 
 /// Returns the appropriate `constraint_width` for
diff --git a/src/unicode/lut.zig b/src/unicode/lut.zig
index 95c6a3688..e709bf1fe 100644
--- a/src/unicode/lut.zig
+++ b/src/unicode/lut.zig
@@ -142,6 +142,32 @@ pub fn Tables(comptime Elem: type) type {
             return self.stage3[self.stage2[self.stage1[high] + low]];
         }
 
+        pub inline fn getInline(self: *const Self, cp: u21) Elem {
+            const high = cp >> 8;
+            const low = cp & 0xFF;
+            return self.stage3[self.stage2[self.stage1[high] + low]];
+        }
+
+        pub fn getBool(self: *const Self, cp: u21) bool {
+            assert(Elem == bool);
+            assert(self.stage3.len == 2);
+            assert(self.stage3[0] == false);
+            assert(self.stage3[1] == true);
+            const high = cp >> 8;
+            const low = cp & 0xFF;
+            return self.stage2[self.stage1[high] + low] != 0;
+        }
+
+        pub inline fn getBoolInline(self: *const Self, cp: u21) bool {
+            assert(Elem == bool);
+            assert(self.stage3.len == 2);
+            assert(self.stage3[0] == false);
+            assert(self.stage3[1] == true);
+            const high = cp >> 8;
+            const low = cp & 0xFF;
+            return self.stage2[self.stage1[high] + low] != 0;
+        }
+
         /// Writes the lookup table as Zig to the given writer. The
         /// written file exports three constants: stage1, stage2, and
         /// stage3. These can be used to rebuild the lookup table in Zig.
diff --git a/src/unicode/lut2.zig b/src/unicode/lut2.zig
new file mode 100644
index 000000000..ef5c886a2
--- /dev/null
+++ b/src/unicode/lut2.zig
@@ -0,0 +1,183 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+
+// This whole file is based on the algorithm described here:
+// https://here-be-braces.com/fast-lookup-of-unicode-properties/
+
+const set_size = @typeInfo(usize).int.bits;
+// const Set = std.bit_set.ArrayBitSet(usize, set_size);
+const Set = std.bit_set.IntegerBitSet(set_size);
+const cp_shift = std.math.log2_int(u21, set_size);
+const cp_mask = set_size - 1;
+
+/// Creates a type that is able to generate a 2-level lookup table
+/// from a Unicode codepoint to a mapping of type bool. The lookup table
+/// generally is expected to be codegen'd and then reloaded, although it
+/// can in theory be generated at runtime.
+///
+/// Context must have one function:
+///   - `get(Context, u21) bool`: returns the mapping for a given codepoint
+///
+pub fn Generator(
+    comptime Context: type,
+) type {
+    return struct {
+        const Self = @This();
+
+        /// Mapping of a block to its index in the stage2 array.
+        const SetMap = std.HashMap(
+            Set,
+            u16,
+            struct {
+                pub fn hash(ctx: @This(), k: Set) u64 {
+                    _ = ctx;
+                    var hasher = std.hash.Wyhash.init(0);
+                    std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
+                    return hasher.final();
+                }
+
+                pub fn eql(ctx: @This(), a: Set, b: Set) bool {
+                    _ = ctx;
+                    return a.eql(b);
+                }
+            },
+            std.hash_map.default_max_load_percentage,
+        );
+
+        ctx: Context = undefined,
+
+        /// Generate the lookup tables. The arrays in the return value
+        /// are owned by the caller and must be freed.
+        pub fn generate(self: *const Self, alloc: Allocator) !Tables {
+            var min: u21 = std.math.maxInt(u21);
+            var max: u21 = std.math.minInt(u21);
+
+            // Maps block => stage2 index
+            var set_map = SetMap.init(alloc);
+            defer set_map.deinit();
+
+            // Our stages
+            var stage1 = std.ArrayList(u16).init(alloc);
+            defer stage1.deinit();
+            var stage2 = std.ArrayList(Set).init(alloc);
+            defer stage2.deinit();
+
+            var set: Set = .initEmpty();
+
+            // ensure that the 1st entry is always all false
+            try stage2.append(set);
+            try set_map.putNoClobber(set, 0);
+
+            for (0..std.math.maxInt(u21) + 1) |cp_| {
+                const cp: u21 = @intCast(cp_);
+                const high = cp >> cp_shift;
+                const low = cp & cp_mask;
+
+                if (self.ctx.get(cp)) {
+                    if (cp < min) min = cp;
+                    if (cp > max) max = cp;
+                    set.set(low);
+                }
+
+                // If we still have space and we're not done with codepoints,
+                // we keep building up the block. Conversely: we finalize this
+                // block if we've filled it or are out of codepoints.
+                if (low + 1 < set_size and cp != std.math.maxInt(u21)) continue;
+
+                // Look for the stage2 index for this block. If it doesn't exist
+                // we add it to stage2 and update the mapping.
+                const gop = try set_map.getOrPut(set);
+                if (!gop.found_existing) {
+                    gop.value_ptr.* = std.math.cast(
+                        u16,
+                        stage2.items.len,
+                    ) orelse return error.Stage2TooLarge;
+                    try stage2.append(set);
+                }
+
+                // Map stage1 => stage2 and reset our block
+                try stage1.append(gop.value_ptr.*);
+                set = .initEmpty();
+                assert(stage1.items.len - 1 == high);
+            }
+
+            // All of our lengths must fit in a u16 for this to work
+            assert(stage1.items.len <= std.math.maxInt(u16));
+            assert(stage2.items.len <= std.math.maxInt(u16));
+
+            const stage1_owned = try stage1.toOwnedSlice();
+            errdefer alloc.free(stage1_owned);
+            const stage2_owned = try stage2.toOwnedSlice();
+            errdefer alloc.free(stage2_owned);
+
+            return .{
+                .min = min,
+                .max = max,
+                .stage1 = stage1_owned,
+                .stage2 = stage2_owned,
+            };
+        }
+    };
+}
+
+/// Creates a type that given a 3-level lookup table, can be used to
+/// look up a mapping for a given codepoint, encode it out to Zig, etc.
+pub const Tables = struct {
+    const Self = @This();
+
+    min: u21,
+    max: u21,
+    stage1: []const u16,
+    stage2: []const Set,
+
+    /// Given a codepoint, returns the mapping for that codepoint.
+    pub fn get(self: *const Self, cp: u21) bool {
+        if (cp < self.min) return false;
+        if (cp > self.max) return false;
+        const high = cp >> cp_shift;
+        const stage2 = self.stage1[high];
+        // take advantage of the fact that the first entry is always all false
+        if (stage2 == 0) return false;
+        const low = cp & cp_mask;
+        return self.stage2[stage2].isSet(low);
+    }
+
+    /// Writes the lookup table as Zig to the given writer. The
+    /// written file exports three constants: stage1, stage2, and
+    /// stage3. These can be used to rebuild the lookup table in Zig.
+    pub fn writeZig(self: *const Self, writer: anytype) !void {
+        try writer.print(
+            \\//! This file is auto-generated. Do not edit.
+            \\const std = @import("std");
+            \\
+            \\pub const min: u21 = {};
+            \\pub const max: u21 = {};
+            \\
+            \\pub const stage1: [{}]u16 = .{{
+        , .{ self.min, self.max, self.stage1.len });
+        for (self.stage1) |entry| try writer.print("{},", .{entry});
+
+        try writer.print(
+            \\
+            \\}};
+            \\
+            \\pub const Set = std.bit_set.IntegerBitSet({d});
+            \\pub const stage2: [{d}]Set = .{{
+            \\
+        , .{ set_size, self.stage2.len });
+        // for (self.stage2) |entry| {
+        //     try writer.print("    .{{\n", .{});
+        //     try writer.print("        .masks = [{d}]{s}{{\n", .{ entry.masks.len, @typeName(Set.MaskInt) });
+        //     for (entry.masks) |mask| {
+        //         try writer.print("            {d},\n", .{mask});
+        //     }
+        //     try writer.print("        }},\n", .{});
+        //     try writer.print("    }},\n", .{});
+        // }
+        for (self.stage2) |entry| {
+            try writer.print("    .{{ .mask = {d} }},\n", .{entry.mask});
+        }
+        try writer.writeAll("};\n");
+    }
+};
diff --git a/src/unicode/main.zig b/src/unicode/main.zig
index f5b911948..91dfd482c 100644
--- a/src/unicode/main.zig
+++ b/src/unicode/main.zig
@@ -9,5 +9,7 @@ pub const graphemeBreak = grapheme.graphemeBreak;
 pub const GraphemeBreakState = grapheme.BreakState;
 
 test {
+    _ = @import("symbols1.zig");
+    _ = @import("symbols2.zig");
     @import("std").testing.refAllDecls(@This());
 }
diff --git a/src/unicode/props.zig b/src/unicode/props.zig
index 99c57aa0a..7edb3761c 100644
--- a/src/unicode/props.zig
+++ b/src/unicode/props.zig
@@ -166,7 +166,7 @@ pub fn main() !void {
 
 // This is not very fast in debug modes, so its commented by default.
 // IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
-// test "tables match ziglyph" {
+// test "unicode props: tables match ziglyph" {
 //     const testing = std.testing;
 //
 //     const min = 0xFF + 1; // start outside ascii
diff --git a/src/unicode/symbols1.zig b/src/unicode/symbols1.zig
new file mode 100644
index 000000000..e5b8cc22a
--- /dev/null
+++ b/src/unicode/symbols1.zig
@@ -0,0 +1,93 @@
+const props = @This();
+const std = @import("std");
+const assert = std.debug.assert;
+const ziglyph = @import("ziglyph");
+const lut = @import("lut.zig");
+
+/// The lookup tables for Ghostty.
+pub const table = table: {
+    // This is only available after running main() below as part of the Ghostty
+    // build.zig, but due to Zig's lazy analysis we can still reference it here.
+    const generated = @import("symbols1_tables").Tables(bool);
+    const Tables = lut.Tables(bool);
+    break :table Tables{
+        .stage1 = &generated.stage1,
+        .stage2 = &generated.stage2,
+        .stage3 = &generated.stage3,
+    };
+};
+
+/// Returns true of the codepoint is a "symbol-like" character, which
+/// for now we define as anything in a private use area and anything
+/// in several unicode blocks:
+/// - Dingbats
+/// - Emoticons
+/// - Miscellaneous Symbols
+/// - Enclosed Alphanumerics
+/// - Enclosed Alphanumeric Supplement
+/// - Miscellaneous Symbols and Pictographs
+/// - Transport and Map Symbols
+///
+/// In the future it may be prudent to expand this to encompass more
+/// symbol-like characters, and/or exclude some PUA sections.
+pub fn isSymbol(cp: u21) bool {
+    return ziglyph.general_category.isPrivateUse(cp) or
+        ziglyph.blocks.isDingbats(cp) or
+        ziglyph.blocks.isEmoticons(cp) or
+        ziglyph.blocks.isMiscellaneousSymbols(cp) or
+        ziglyph.blocks.isEnclosedAlphanumerics(cp) or
+        ziglyph.blocks.isEnclosedAlphanumericSupplement(cp) or
+        ziglyph.blocks.isMiscellaneousSymbolsAndPictographs(cp) or
+        ziglyph.blocks.isTransportAndMapSymbols(cp);
+}
+
+/// Runnable binary to generate the lookup tables and output to stdout.
+pub fn main() !void {
+    var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena_state.deinit();
+    const alloc = arena_state.allocator();
+
+    const gen: lut.Generator(
+        bool,
+        struct {
+            pub fn get(ctx: @This(), cp: u21) !bool {
+                _ = ctx;
+                return isSymbol(cp);
+            }
+
+            pub fn eql(ctx: @This(), a: bool, b: bool) bool {
+                _ = ctx;
+                return a == b;
+            }
+        },
+    ) = .{};
+
+    const t = try gen.generate(alloc);
+    defer alloc.free(t.stage1);
+    defer alloc.free(t.stage2);
+    defer alloc.free(t.stage3);
+    try t.writeZig(std.io.getStdOut().writer());
+
+    // Uncomment when manually debugging to see our table sizes.
+    // std.log.warn("stage1={} stage2={} stage3={}", .{
+    //     t.stage1.len,
+    //     t.stage2.len,
+    //     t.stage3.len,
+    // });
+}
+
+// This is not very fast in debug modes, so its commented by default.
+// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CHANGES.
+test "unicode symbols1: tables match ziglyph" {
+    const testing = std.testing;
+
+    for (0..std.math.maxInt(u21)) |cp| {
+        const t = table.get(@intCast(cp));
+        const zg = isSymbol(@intCast(cp));
+
+        if (t != zg) {
+            std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
+            try testing.expect(false);
+        }
+    }
+}
diff --git a/src/unicode/symbols2.zig b/src/unicode/symbols2.zig
new file mode 100644
index 000000000..1d23c51be
--- /dev/null
+++ b/src/unicode/symbols2.zig
@@ -0,0 +1,85 @@
+const props = @This();
+const std = @import("std");
+const assert = std.debug.assert;
+const ziglyph = @import("ziglyph");
+const lut2 = @import("lut2.zig");
+
+/// The lookup tables for Ghostty.
+pub const table = table: {
+    // This is only available after running main() below as part of the Ghostty
+    // build.zig, but due to Zig's lazy analysis we can still reference it here.
+    const generated = @import("symbols2_tables");
+    break :table lut2.Tables{
+        .min = generated.min,
+        .max = generated.max,
+        .stage1 = &generated.stage1,
+        .stage2 = &generated.stage2,
+    };
+};
+
+/// Returns true of the codepoint is a "symbol-like" character, which
+/// for now we define as anything in a private use area and anything
+/// in several unicode blocks:
+/// - Dingbats
+/// - Emoticons
+/// - Miscellaneous Symbols
+/// - Enclosed Alphanumerics
+/// - Enclosed Alphanumeric Supplement
+/// - Miscellaneous Symbols and Pictographs
+/// - Transport and Map Symbols
+///
+/// In the future it may be prudent to expand this to encompass more
+/// symbol-like characters, and/or exclude some PUA sections.
+pub fn isSymbol(cp: u21) bool {
+    return ziglyph.general_category.isPrivateUse(cp) or
+        ziglyph.blocks.isDingbats(cp) or
+        ziglyph.blocks.isEmoticons(cp) or
+        ziglyph.blocks.isMiscellaneousSymbols(cp) or
+        ziglyph.blocks.isEnclosedAlphanumerics(cp) or
+        ziglyph.blocks.isEnclosedAlphanumericSupplement(cp) or
+        ziglyph.blocks.isMiscellaneousSymbolsAndPictographs(cp) or
+        ziglyph.blocks.isTransportAndMapSymbols(cp);
+}
+
+/// Runnable binary to generate the lookup tables and output to stdout.
+pub fn main() !void {
+    var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena_state.deinit();
+    const alloc = arena_state.allocator();
+
+    const gen: lut2.Generator(
+        struct {
+            pub fn get(ctx: @This(), cp: u21) bool {
+                _ = ctx;
+                return isSymbol(cp);
+            }
+        },
+    ) = .{};
+
+    const t = try gen.generate(alloc);
+    defer alloc.free(t.stage1);
+    defer alloc.free(t.stage2);
+    try t.writeZig(std.io.getStdOut().writer());
+
+    // Uncomment when manually debugging to see our table sizes.
+    // std.log.warn("stage1={} stage2={}", .{
+    //     t.stage1.len,
+    //     t.stage2.len,
+    // });
+}
+
+// This is not very fast in debug modes, so its commented by default.
+// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CHANGES.
+test "unicode symbols2: tables match ziglyph" {
+    const testing = std.testing;
+
+    for (0..std.math.maxInt(u21)) |cp| {
+        const t1 = table.get(@intCast(cp));
+        const zg = isSymbol(@intCast(cp));
+
+        if (t1 != zg) {
+            std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t1, zg });
+            try testing.expect(false);
+        }
+    }
+}