From b0db51c45e29beb703a711922e44cfd4a621efd2 Mon Sep 17 00:00:00 2001 From: Jacob Sandlund Date: Sat, 6 Sep 2025 15:01:29 -0400 Subject: [PATCH] fast getX(.is_symbol) --- src/benchmark/IsSymbol.zig | 3 ++- src/build/uucode_config.zig | 29 +++++++++++++++++++++++++++++ src/unicode/symbols.zig | 11 ++--------- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/benchmark/IsSymbol.zig b/src/benchmark/IsSymbol.zig index 368a0570e..7ec9137d2 100644 --- a/src/benchmark/IsSymbol.zig +++ b/src/benchmark/IsSymbol.zig @@ -11,6 +11,7 @@ const Benchmark = @import("Benchmark.zig"); const options = @import("options.zig"); const UTF8Decoder = @import("../terminal/UTF8Decoder.zig"); const symbols = @import("../unicode/symbols.zig"); +const uucode = @import("uucode"); const log = std.log.scoped(.@"is-symbol-bench"); @@ -103,7 +104,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void { const cp_, const consumed = d.next(c); assert(consumed); if (cp_) |cp| { - std.mem.doNotOptimizeAway(symbols.isSymbol(cp)); + std.mem.doNotOptimizeAway(uucode.getX(.is_symbol, cp)); } } } diff --git a/src/build/uucode_config.zig b/src/build/uucode_config.zig index e2e3c9163..c349216d7 100644 --- a/src/build/uucode_config.zig +++ b/src/build/uucode_config.zig @@ -90,6 +90,29 @@ const grapheme_boundary_class = config.Extension{ }, }; +fn computeIsSymbol(cp: u21, data: anytype, backing: anytype, tracking: anytype) void { + _ = cp; + _ = backing; + _ = tracking; + const block = data.block; + data.is_symbol = data.general_category == .other_private_use or + block == .dingbats or + block == .emoticons or + block == .miscellaneous_symbols or + block == .enclosed_alphanumerics or + block == .enclosed_alphanumeric_supplement or + block == .miscellaneous_symbols_and_pictographs or + block == .transport_and_map_symbols; +} + +const is_symbol = config.Extension{ + .inputs = &.{ "block", "general_category" }, + .compute = &computeIsSymbol, + .fields = &.{ + .{ .name = "is_symbol", .type = bool }, + }, +}; + pub const tables = [_]config.Table{ .{ .extensions = &.{wcwidth}, @@ -113,4 +136,10 @@ pub const tables = [_]config.Table{ grapheme_boundary_class.field("grapheme_boundary_class"), }, }, + .{ + .extensions = &.{is_symbol}, + .fields = &.{ + is_symbol.field("is_symbol"), + }, + }, }; diff --git a/src/unicode/symbols.zig b/src/unicode/symbols.zig index 20749bf91..b03f82cf8 100644 --- a/src/unicode/symbols.zig +++ b/src/unicode/symbols.zig @@ -31,15 +31,8 @@ pub const table = table: { /// In the future it may be prudent to expand this to encompass more /// symbol-like characters, and/or exclude some PUA sections. pub fn isSymbol(cp: u21) bool { - const block = uucode.get(.block, cp); - return uucode.get(.general_category, cp) == .other_private_use or - block == .dingbats or - block == .emoticons or - block == .miscellaneous_symbols or - block == .enclosed_alphanumerics or - block == .enclosed_alphanumeric_supplement or - block == .miscellaneous_symbols_and_pictographs or - block == .transport_and_map_symbols; + // TODO: probably can remove this method and just call uucode directly + return uucode.getX(.is_symbol, cp); } /// Runnable binary to generate the lookup tables and output to stdout.