trying a bunch of things to get performance to match

This commit is contained in:
Jacob Sandlund
2025-09-06 10:42:02 -04:00
parent c67f51f3ee
commit 2af08bdbe3
10 changed files with 367 additions and 76 deletions

View File

@@ -2,6 +2,7 @@ const std = @import("std");
const props = @import("props.zig");
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
const table = props.table;
const isExtendedPictographic = props.isExtendedPictographic;
/// Determines if there is a grapheme break between two codepoints. This
/// must be called sequentially maintaining the state between calls.
@@ -80,7 +81,7 @@ fn graphemeBreakClass(
state: *BreakState,
) bool {
// GB11: Emoji Extend* ZWJ x Emoji
if (!state.extended_pictographic and gbc1.isExtendedPictographic()) {
if (!state.extended_pictographic and isExtendedPictographic(gbc1)) {
state.extended_pictographic = true;
}
@@ -131,7 +132,7 @@ fn graphemeBreakClass(
// GB11: Emoji Extend* ZWJ x Emoji
if (state.extended_pictographic and
gbc1 == .zwj and
gbc2.isExtendedPictographic())
isExtendedPictographic(gbc2))
{
state.extended_pictographic = false;
return false;

View File

@@ -7,6 +7,7 @@ pub const Properties = props.Properties;
pub const getProperties = props.get;
pub const graphemeBreak = grapheme.graphemeBreak;
pub const GraphemeBreakState = grapheme.BreakState;
pub const isExtendedPictographic = props.isExtendedPictographic;
test {
@import("std").testing.refAllDecls(@This());

View File

@@ -6,10 +6,11 @@ const lut = @import("lut.zig");
/// The lookup tables for Ghostty.
pub const table = table: {
const Props = uucode.PackedTypeOf("1");
// This is only available after running main() below as part of the Ghostty
// build.zig, but due to Zig's lazy analysis we can still reference it here.
const generated = @import("unicode_tables").Tables(Properties);
const Tables = lut.Tables(Properties);
const generated = @import("unicode_tables").Tables(Props);
const Tables = lut.Tables(Props);
break :table Tables{
.stage1 = &generated.stage1,
.stage2 = &generated.stage2,
@@ -61,81 +62,62 @@ pub const Properties = struct {
/// Possible grapheme boundary classes. This isn't an exhaustive list:
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
/// impossible because they're handled by the terminal.
pub const GraphemeBoundaryClass = enum(u4) {
invalid,
L,
V,
T,
LV,
LVT,
prepend,
extend,
zwj,
spacing_mark,
regional_indicator,
extended_pictographic,
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
emoji_modifier, // \p{Emoji_Modifier}
pub const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);
/// Gets the grapheme boundary class for a codepoint.
/// The use case for this is only in generating lookup tables.
pub fn init(cp: u21) GraphemeBoundaryClass {
if (cp < uucode.code_point_range_end) {
if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
/// Gets the grapheme boundary class for a codepoint.
/// The use case for this is only in generating lookup tables.
pub fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
return switch (uucode.get(.grapheme_break, cp)) {
.extended_pictographic => .extended_pictographic,
.l => .L,
.v => .V,
.t => .T,
.lv => .LV,
.lvt => .LVT,
.prepend => .prepend,
.zwj => .zwj,
.spacing_mark => .spacing_mark,
.regional_indicator => .regional_indicator,
return switch (uucode.get(.grapheme_break, cp)) {
.extended_pictographic => .extended_pictographic,
.l => .L,
.v => .V,
.t => .T,
.lv => .LV,
.lvt => .LVT,
.prepend => .prepend,
.zwj => .zwj,
.spacing_mark => .spacing_mark,
.regional_indicator => .regional_indicator,
.zwnj,
.indic_conjunct_break_extend,
.indic_conjunct_break_linker,
=> .extend,
.zwnj,
.indic_conjunct_break_extend,
.indic_conjunct_break_linker,
=> .extend,
// This is obviously not INVALID invalid, there is SOME grapheme
// boundary class for every codepoint. But we don't care about
// anything that doesn't fit into the above categories.
.other,
.indic_conjunct_break_consonant,
.cr,
.lf,
.control,
=> .invalid,
};
} else {
return .invalid;
}
}
// This is obviously not INVALID invalid, there is SOME grapheme
// boundary class for every codepoint. But we don't care about
// anything that doesn't fit into the above categories.
.other,
.indic_conjunct_break_consonant,
.cr,
.lf,
.control,
=> .invalid,
};
}
/// Returns true if this is an extended pictographic type. This
/// should be used instead of comparing the enum value directly
/// because we classify multiple.
pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
return switch (self) {
.extended_pictographic,
.extended_pictographic_base,
=> true,
/// Returns true if this is an extended pictographic type. This
/// should be used instead of comparing the enum value directly
/// because we classify multiple.
pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
return switch (self) {
.extended_pictographic,
.extended_pictographic_base,
=> true,
else => false,
};
}
};
else => false,
};
}
pub fn get(cp: u21) Properties {
const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;
const wcwidth = uucode.get(.wcwidth, cp);
return .{
.width = @intCast(@min(2, @max(0, wcwidth))),
.grapheme_boundary_class = .init(cp),
.grapheme_boundary_class = computeGraphemeBoundaryClass(cp),
};
}
@@ -145,6 +127,13 @@ pub fn main() !void {
defer arena_state.deinit();
const alloc = arena_state.allocator();
var args_iter = try std.process.argsWithAllocator(alloc);
defer args_iter.deinit();
_ = args_iter.skip(); // Skip program name
const output_path = args_iter.next() orelse std.debug.panic("No output file arg!", .{});
std.debug.print("Unicode tables output_path = {s}\n", .{output_path});
const gen: lut.Generator(
Properties,
struct {
@@ -164,7 +153,10 @@ pub fn main() !void {
defer alloc.free(t.stage1);
defer alloc.free(t.stage2);
defer alloc.free(t.stage3);
try t.writeZig(std.io.getStdOut().writer());
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
const writer = out_file.writer();
try t.writeZig(writer);
// Uncomment when manually debugging to see our table sizes.
// std.log.warn("stage1={} stage2={} stage3={}", .{
@@ -180,7 +172,8 @@ pub fn main() !void {
// const testing = std.testing;
//
// const min = 0xFF + 1; // start outside ascii
// for (min..uucode.code_point_range_end) |cp| {
// const max = std.math.maxInt(u21) + 1;
// for (min..max) |cp| {
// const t = table.get(@intCast(cp));
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
// if (t.width != uu) {