mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-09-06 19:38:23 +00:00
renderer: add LUT-based implementation of isSymbol (#8528)
The LUT-based lookup gives a ~20%-30% speedup over the "naive" isSymbol implementation. <img width="1206" height="730" alt="Screenshot From 2025-09-04 22-45-10" src="https://github.com/user-attachments/assets/09a8ef3a-8b4b-43ba-963a-849338307251" /> <img width="1206" height="730" alt="Screenshot From 2025-09-04 22-41-54" src="https://github.com/user-attachments/assets/27962a88-f99c-446d-b986-30f526239ba3" /> Fixes #8523
This commit is contained in:
145
src/benchmark/IsSymbol.zig
Normal file
145
src/benchmark/IsSymbol.zig
Normal file
@@ -0,0 +1,145 @@
|
||||
//! This benchmark tests the throughput of grapheme break calculation.
|
||||
//! This is a common operation in terminal character printing for terminals
|
||||
//! that support grapheme clustering.
|
||||
const IsSymbol = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const Benchmark = @import("Benchmark.zig");
|
||||
const options = @import("options.zig");
|
||||
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
||||
const symbols = @import("../unicode/symbols.zig");
|
||||
|
||||
const log = std.log.scoped(.@"is-symbol-bench");
|
||||
|
||||
opts: Options,
|
||||
|
||||
/// The file, opened in the setup function.
|
||||
data_f: ?std.fs.File = null,
|
||||
|
||||
pub const Options = struct {
|
||||
/// Which test to run.
|
||||
mode: Mode = .ziglyph,
|
||||
|
||||
/// The data to read as a filepath. If this is "-" then
|
||||
/// we will read stdin. If this is unset, then we will
|
||||
/// do nothing (benchmark is a noop). It'd be more unixy to
|
||||
/// use stdin by default but I find that a hanging CLI command
|
||||
/// with no interaction is a bit annoying.
|
||||
data: ?[]const u8 = null,
|
||||
};
|
||||
|
||||
pub const Mode = enum {
|
||||
/// "Naive" ziglyph implementation.
|
||||
ziglyph,
|
||||
|
||||
/// Ghostty's table-based approach.
|
||||
table,
|
||||
};
|
||||
|
||||
/// Create a new terminal stream handler for the given arguments.
|
||||
pub fn create(
|
||||
alloc: Allocator,
|
||||
opts: Options,
|
||||
) !*IsSymbol {
|
||||
const ptr = try alloc.create(IsSymbol);
|
||||
errdefer alloc.destroy(ptr);
|
||||
ptr.* = .{ .opts = opts };
|
||||
return ptr;
|
||||
}
|
||||
|
||||
pub fn destroy(self: *IsSymbol, alloc: Allocator) void {
|
||||
alloc.destroy(self);
|
||||
}
|
||||
|
||||
pub fn benchmark(self: *IsSymbol) Benchmark {
|
||||
return .init(self, .{
|
||||
.stepFn = switch (self.opts.mode) {
|
||||
.ziglyph => stepZiglyph,
|
||||
.table => stepTable,
|
||||
},
|
||||
.setupFn = setup,
|
||||
.teardownFn = teardown,
|
||||
});
|
||||
}
|
||||
|
||||
fn setup(ptr: *anyopaque) Benchmark.Error!void {
|
||||
const self: *IsSymbol = @ptrCast(@alignCast(ptr));
|
||||
|
||||
// Open our data file to prepare for reading. We can do more
|
||||
// validation here eventually.
|
||||
assert(self.data_f == null);
|
||||
self.data_f = options.dataFile(self.opts.data) catch |err| {
|
||||
log.warn("error opening data file err={}", .{err});
|
||||
return error.BenchmarkFailed;
|
||||
};
|
||||
}
|
||||
|
||||
fn teardown(ptr: *anyopaque) void {
|
||||
const self: *IsSymbol = @ptrCast(@alignCast(ptr));
|
||||
if (self.data_f) |f| {
|
||||
f.close();
|
||||
self.data_f = null;
|
||||
}
|
||||
}
|
||||
|
||||
fn stepZiglyph(ptr: *anyopaque) Benchmark.Error!void {
|
||||
const self: *IsSymbol = @ptrCast(@alignCast(ptr));
|
||||
|
||||
const f = self.data_f orelse return;
|
||||
var r = std.io.bufferedReader(f.reader());
|
||||
var d: UTF8Decoder = .{};
|
||||
var buf: [4096]u8 = undefined;
|
||||
while (true) {
|
||||
const n = r.read(&buf) catch |err| {
|
||||
log.warn("error reading data file err={}", .{err});
|
||||
return error.BenchmarkFailed;
|
||||
};
|
||||
if (n == 0) break; // EOF reached
|
||||
|
||||
for (buf[0..n]) |c| {
|
||||
const cp_, const consumed = d.next(c);
|
||||
assert(consumed);
|
||||
if (cp_) |cp| {
|
||||
std.mem.doNotOptimizeAway(symbols.isSymbol(cp));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
|
||||
const self: *IsSymbol = @ptrCast(@alignCast(ptr));
|
||||
|
||||
const f = self.data_f orelse return;
|
||||
var r = std.io.bufferedReader(f.reader());
|
||||
var d: UTF8Decoder = .{};
|
||||
var buf: [4096]u8 = undefined;
|
||||
while (true) {
|
||||
const n = r.read(&buf) catch |err| {
|
||||
log.warn("error reading data file err={}", .{err});
|
||||
return error.BenchmarkFailed;
|
||||
};
|
||||
if (n == 0) break; // EOF reached
|
||||
|
||||
for (buf[0..n]) |c| {
|
||||
const cp_, const consumed = d.next(c);
|
||||
assert(consumed);
|
||||
if (cp_) |cp| {
|
||||
std.mem.doNotOptimizeAway(symbols.table.get(cp));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test IsSymbol {
|
||||
const testing = std.testing;
|
||||
const alloc = testing.allocator;
|
||||
|
||||
const impl: *IsSymbol = try .create(alloc, .{});
|
||||
defer impl.destroy(alloc);
|
||||
|
||||
const bench = impl.benchmark();
|
||||
_ = try bench.run(.once);
|
||||
}
|
@@ -10,6 +10,7 @@ pub const Action = enum {
|
||||
@"grapheme-break",
|
||||
@"terminal-parser",
|
||||
@"terminal-stream",
|
||||
@"is-symbol",
|
||||
|
||||
/// Returns the struct associated with the action. The struct
|
||||
/// should have a few decls:
|
||||
@@ -25,6 +26,7 @@ pub const Action = enum {
|
||||
.@"codepoint-width" => @import("CodepointWidth.zig"),
|
||||
.@"grapheme-break" => @import("GraphemeBreak.zig"),
|
||||
.@"terminal-parser" => @import("TerminalParser.zig"),
|
||||
.@"is-symbol" => @import("IsSymbol.zig"),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@@ -5,6 +5,7 @@ pub const TerminalStream = @import("TerminalStream.zig");
|
||||
pub const CodepointWidth = @import("CodepointWidth.zig");
|
||||
pub const GraphemeBreak = @import("GraphemeBreak.zig");
|
||||
pub const TerminalParser = @import("TerminalParser.zig");
|
||||
pub const IsSymbol = @import("IsSymbol.zig");
|
||||
|
||||
test {
|
||||
@import("std").testing.refAllDecls(@This());
|
||||
|
@@ -61,6 +61,7 @@ emit_termcap: bool = false,
|
||||
emit_test_exe: bool = false,
|
||||
emit_xcframework: bool = false,
|
||||
emit_webdata: bool = false,
|
||||
emit_unicode_table_gen: bool = false,
|
||||
|
||||
/// Environmental properties
|
||||
env: std.process.EnvMap,
|
||||
@@ -299,6 +300,12 @@ pub fn init(b: *std.Build) !Config {
|
||||
"Build and install test executables with 'build'",
|
||||
) orelse false;
|
||||
|
||||
config.emit_unicode_table_gen = b.option(
|
||||
bool,
|
||||
"emit-unicode-table-gen",
|
||||
"Build and install executables that generate unicode tables with 'build'",
|
||||
) orelse false;
|
||||
|
||||
config.emit_bench = b.option(
|
||||
bool,
|
||||
"emit-bench",
|
||||
|
@@ -31,6 +31,7 @@ pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps {
|
||||
.metallib = undefined,
|
||||
};
|
||||
try result.initTarget(b, cfg.target);
|
||||
if (cfg.emit_unicode_table_gen) result.unicode_tables.install(b);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@@ -4,14 +4,16 @@ const std = @import("std");
|
||||
const Config = @import("Config.zig");
|
||||
|
||||
/// The exe.
|
||||
exe: *std.Build.Step.Compile,
|
||||
props_exe: *std.Build.Step.Compile,
|
||||
symbols_exe: *std.Build.Step.Compile,
|
||||
|
||||
/// The output path for the unicode tables
|
||||
output: std.Build.LazyPath,
|
||||
props_output: std.Build.LazyPath,
|
||||
symbols_output: std.Build.LazyPath,
|
||||
|
||||
pub fn init(b: *std.Build) !UnicodeTables {
|
||||
const exe = b.addExecutable(.{
|
||||
.name = "unigen",
|
||||
const props_exe = b.addExecutable(.{
|
||||
.name = "props-unigen",
|
||||
.root_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/unicode/props.zig"),
|
||||
.target = b.graph.host,
|
||||
@@ -21,31 +23,53 @@ pub fn init(b: *std.Build) !UnicodeTables {
|
||||
}),
|
||||
});
|
||||
|
||||
const symbols_exe = b.addExecutable(.{
|
||||
.name = "symbols-unigen",
|
||||
.root_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/unicode/symbols.zig"),
|
||||
.target = b.graph.host,
|
||||
.strip = false,
|
||||
.omit_frame_pointer = false,
|
||||
.unwind_tables = .sync,
|
||||
}),
|
||||
});
|
||||
|
||||
if (b.lazyDependency("ziglyph", .{
|
||||
.target = b.graph.host,
|
||||
})) |ziglyph_dep| {
|
||||
exe.root_module.addImport(
|
||||
"ziglyph",
|
||||
ziglyph_dep.module("ziglyph"),
|
||||
);
|
||||
inline for (&.{ props_exe, symbols_exe }) |exe| {
|
||||
exe.root_module.addImport(
|
||||
"ziglyph",
|
||||
ziglyph_dep.module("ziglyph"),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const run = b.addRunArtifact(exe);
|
||||
const props_run = b.addRunArtifact(props_exe);
|
||||
const symbols_run = b.addRunArtifact(symbols_exe);
|
||||
|
||||
return .{
|
||||
.exe = exe,
|
||||
.output = run.captureStdOut(),
|
||||
.props_exe = props_exe,
|
||||
.symbols_exe = symbols_exe,
|
||||
.props_output = props_run.captureStdOut(),
|
||||
.symbols_output = symbols_run.captureStdOut(),
|
||||
};
|
||||
}
|
||||
|
||||
/// Add the "unicode_tables" import.
|
||||
pub fn addImport(self: *const UnicodeTables, step: *std.Build.Step.Compile) void {
|
||||
self.output.addStepDependencies(&step.step);
|
||||
self.props_output.addStepDependencies(&step.step);
|
||||
step.root_module.addAnonymousImport("unicode_tables", .{
|
||||
.root_source_file = self.output,
|
||||
.root_source_file = self.props_output,
|
||||
});
|
||||
self.symbols_output.addStepDependencies(&step.step);
|
||||
step.root_module.addAnonymousImport("symbols_tables", .{
|
||||
.root_source_file = self.symbols_output,
|
||||
});
|
||||
}
|
||||
|
||||
/// Install the exe
|
||||
pub fn install(self: *const UnicodeTables, b: *std.Build) void {
|
||||
b.installArtifact(self.exe);
|
||||
b.installArtifact(self.props_exe);
|
||||
b.installArtifact(self.symbols_exe);
|
||||
}
|
||||
|
@@ -1,12 +1,12 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const assert = std.debug.assert;
|
||||
const ziglyph = @import("ziglyph");
|
||||
const font = @import("../font/main.zig");
|
||||
const terminal = @import("../terminal/main.zig");
|
||||
const renderer = @import("../renderer.zig");
|
||||
const shaderpkg = renderer.Renderer.API.shaders;
|
||||
const ArrayListCollection = @import("../datastruct/array_list_collection.zig").ArrayListCollection;
|
||||
const symbols = @import("../unicode/symbols.zig").table;
|
||||
|
||||
/// The possible cell content keys that exist.
|
||||
pub const Key = enum {
|
||||
@@ -249,15 +249,7 @@ pub fn isCovering(cp: u21) bool {
|
||||
/// In the future it may be prudent to expand this to encompass more
|
||||
/// symbol-like characters, and/or exclude some PUA sections.
|
||||
pub fn isSymbol(cp: u21) bool {
|
||||
// TODO: This should probably become a codegen'd LUT
|
||||
return ziglyph.general_category.isPrivateUse(cp) or
|
||||
ziglyph.blocks.isDingbats(cp) or
|
||||
ziglyph.blocks.isEmoticons(cp) or
|
||||
ziglyph.blocks.isMiscellaneousSymbols(cp) or
|
||||
ziglyph.blocks.isEnclosedAlphanumerics(cp) or
|
||||
ziglyph.blocks.isEnclosedAlphanumericSupplement(cp) or
|
||||
ziglyph.blocks.isMiscellaneousSymbolsAndPictographs(cp) or
|
||||
ziglyph.blocks.isTransportAndMapSymbols(cp);
|
||||
return symbols.get(cp);
|
||||
}
|
||||
|
||||
/// Returns the appropriate `constraint_width` for
|
||||
|
@@ -83,7 +83,7 @@ pub fn Generator(
|
||||
block_len += 1;
|
||||
|
||||
// If we still have space and we're not done with codepoints,
|
||||
// we keep building up the bock. Conversely: we finalize this
|
||||
// we keep building up the block. Conversely: we finalize this
|
||||
// block if we've filled it or are out of codepoints.
|
||||
if (block_len < block_size and cp != std.math.maxInt(u21)) continue;
|
||||
if (block_len < block_size) @memset(block[block_len..block_size], 0);
|
||||
@@ -136,7 +136,7 @@ pub fn Tables(comptime Elem: type) type {
|
||||
stage3: []const Elem,
|
||||
|
||||
/// Given a codepoint, returns the mapping for that codepoint.
|
||||
pub fn get(self: *const Self, cp: u21) Elem {
|
||||
pub inline fn get(self: *const Self, cp: u21) Elem {
|
||||
const high = cp >> 8;
|
||||
const low = cp & 0xFF;
|
||||
return self.stage3[self.stage2[self.stage1[high] + low]];
|
||||
@@ -173,6 +173,7 @@ pub fn Tables(comptime Elem: type) type {
|
||||
\\};
|
||||
\\ };
|
||||
\\}
|
||||
\\
|
||||
);
|
||||
}
|
||||
};
|
||||
|
@@ -9,5 +9,6 @@ pub const graphemeBreak = grapheme.graphemeBreak;
|
||||
pub const GraphemeBreakState = grapheme.BreakState;
|
||||
|
||||
test {
|
||||
_ = @import("symbols.zig");
|
||||
@import("std").testing.refAllDecls(@This());
|
||||
}
|
||||
|
@@ -166,7 +166,7 @@ pub fn main() !void {
|
||||
|
||||
// This is not very fast in debug modes, so its commented by default.
|
||||
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
||||
// test "tables match ziglyph" {
|
||||
// test "unicode props: tables match ziglyph" {
|
||||
// const testing = std.testing;
|
||||
//
|
||||
// const min = 0xFF + 1; // start outside ascii
|
||||
|
95
src/unicode/symbols.zig
Normal file
95
src/unicode/symbols.zig
Normal file
@@ -0,0 +1,95 @@
|
||||
const props = @This();
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const ziglyph = @import("ziglyph");
|
||||
const lut = @import("lut.zig");
|
||||
|
||||
/// The lookup tables for Ghostty.
|
||||
pub const table = table: {
|
||||
// This is only available after running main() below as part of the Ghostty
|
||||
// build.zig, but due to Zig's lazy analysis we can still reference it here.
|
||||
const generated = @import("symbols_tables").Tables(bool);
|
||||
const Tables = lut.Tables(bool);
|
||||
break :table Tables{
|
||||
.stage1 = &generated.stage1,
|
||||
.stage2 = &generated.stage2,
|
||||
.stage3 = &generated.stage3,
|
||||
};
|
||||
};
|
||||
|
||||
/// Returns true of the codepoint is a "symbol-like" character, which
|
||||
/// for now we define as anything in a private use area and anything
|
||||
/// in several unicode blocks:
|
||||
/// - Dingbats
|
||||
/// - Emoticons
|
||||
/// - Miscellaneous Symbols
|
||||
/// - Enclosed Alphanumerics
|
||||
/// - Enclosed Alphanumeric Supplement
|
||||
/// - Miscellaneous Symbols and Pictographs
|
||||
/// - Transport and Map Symbols
|
||||
///
|
||||
/// In the future it may be prudent to expand this to encompass more
|
||||
/// symbol-like characters, and/or exclude some PUA sections.
|
||||
pub fn isSymbol(cp: u21) bool {
|
||||
return ziglyph.general_category.isPrivateUse(cp) or
|
||||
ziglyph.blocks.isDingbats(cp) or
|
||||
ziglyph.blocks.isEmoticons(cp) or
|
||||
ziglyph.blocks.isMiscellaneousSymbols(cp) or
|
||||
ziglyph.blocks.isEnclosedAlphanumerics(cp) or
|
||||
ziglyph.blocks.isEnclosedAlphanumericSupplement(cp) or
|
||||
ziglyph.blocks.isMiscellaneousSymbolsAndPictographs(cp) or
|
||||
ziglyph.blocks.isTransportAndMapSymbols(cp);
|
||||
}
|
||||
|
||||
/// Runnable binary to generate the lookup tables and output to stdout.
|
||||
pub fn main() !void {
|
||||
var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena_state.deinit();
|
||||
const alloc = arena_state.allocator();
|
||||
|
||||
const gen: lut.Generator(
|
||||
bool,
|
||||
struct {
|
||||
pub fn get(ctx: @This(), cp: u21) !bool {
|
||||
_ = ctx;
|
||||
return isSymbol(cp);
|
||||
}
|
||||
|
||||
pub fn eql(ctx: @This(), a: bool, b: bool) bool {
|
||||
_ = ctx;
|
||||
return a == b;
|
||||
}
|
||||
},
|
||||
) = .{};
|
||||
|
||||
const t = try gen.generate(alloc);
|
||||
defer alloc.free(t.stage1);
|
||||
defer alloc.free(t.stage2);
|
||||
defer alloc.free(t.stage3);
|
||||
try t.writeZig(std.io.getStdOut().writer());
|
||||
|
||||
// Uncomment when manually debugging to see our table sizes.
|
||||
// std.log.warn("stage1={} stage2={} stage3={}", .{
|
||||
// t.stage1.len,
|
||||
// t.stage2.len,
|
||||
// t.stage3.len,
|
||||
// });
|
||||
}
|
||||
|
||||
// This is not very fast in debug modes, so its commented by default.
|
||||
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CHANGES.
|
||||
test "unicode symbols: tables match ziglyph" {
|
||||
if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
for (0..std.math.maxInt(u21)) |cp| {
|
||||
const t = table.get(@intCast(cp));
|
||||
const zg = isSymbol(@intCast(cp));
|
||||
|
||||
if (t != zg) {
|
||||
std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
|
||||
try testing.expect(false);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user