mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-10-01 15:38:35 +00:00
trying a bunch of things to get performance to match
This commit is contained in:
@@ -37,8 +37,8 @@
|
|||||||
.lazy = true,
|
.lazy = true,
|
||||||
},
|
},
|
||||||
.uucode = .{
|
.uucode = .{
|
||||||
.url = "https://github.com/jacobsandlund/uucode/archive/38b82297e69a3b2dc55dc8df25f3851be37f9327.tar.gz",
|
.url = "https://github.com/jacobsandlund/uucode/archive/69782fbe79e06a34ee177978d3479ed5801ce0af.tar.gz",
|
||||||
.hash = "uucode-0.0.0-ZZjBPiqdPwB-rG3ieaq3c6tMpnksWYs4_rGj2IvFGjjB",
|
.hash = "uucode-0.0.0-ZZjBPl_dPwC-BPhSJLID4Hs9O0zw-vZKGXdaOBFch8c8",
|
||||||
},
|
},
|
||||||
.zig_wayland = .{
|
.zig_wayland = .{
|
||||||
// codeberg ifreund/zig-wayland
|
// codeberg ifreund/zig-wayland
|
||||||
|
@@ -10,6 +10,7 @@ const assert = std.debug.assert;
|
|||||||
const Allocator = std.mem.Allocator;
|
const Allocator = std.mem.Allocator;
|
||||||
const Benchmark = @import("Benchmark.zig");
|
const Benchmark = @import("Benchmark.zig");
|
||||||
const options = @import("options.zig");
|
const options = @import("options.zig");
|
||||||
|
const uucode = @import("uucode");
|
||||||
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
||||||
const simd = @import("../simd/main.zig");
|
const simd = @import("../simd/main.zig");
|
||||||
const table = @import("../unicode/main.zig").table;
|
const table = @import("../unicode/main.zig").table;
|
||||||
@@ -47,6 +48,9 @@ pub const Mode = enum {
|
|||||||
|
|
||||||
/// Test our lookup table implementation.
|
/// Test our lookup table implementation.
|
||||||
table,
|
table,
|
||||||
|
|
||||||
|
/// Using uucode, with custom `width` extension based on `wcwidth`.
|
||||||
|
uucode,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Create a new terminal stream handler for the given arguments.
|
/// Create a new terminal stream handler for the given arguments.
|
||||||
@@ -71,6 +75,7 @@ pub fn benchmark(self: *CodepointWidth) Benchmark {
|
|||||||
.wcwidth => stepWcwidth,
|
.wcwidth => stepWcwidth,
|
||||||
.table => stepTable,
|
.table => stepTable,
|
||||||
.simd => stepSimd,
|
.simd => stepSimd,
|
||||||
|
.uucode => stepUucode,
|
||||||
},
|
},
|
||||||
.setupFn = setup,
|
.setupFn = setup,
|
||||||
.teardownFn = teardown,
|
.teardownFn = teardown,
|
||||||
@@ -192,6 +197,41 @@ fn stepSimd(ptr: *anyopaque) Benchmark.Error!void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
|
||||||
|
const self: *CodepointWidth = @ptrCast(@alignCast(ptr));
|
||||||
|
|
||||||
|
const f = self.data_f orelse return;
|
||||||
|
var r = std.io.bufferedReader(f.reader());
|
||||||
|
var d: UTF8Decoder = .{};
|
||||||
|
var buf: [4096]u8 = undefined;
|
||||||
|
while (true) {
|
||||||
|
const n = r.read(&buf) catch |err| {
|
||||||
|
log.warn("error reading data file err={}", .{err});
|
||||||
|
return error.BenchmarkFailed;
|
||||||
|
};
|
||||||
|
if (n == 0) break; // EOF reached
|
||||||
|
|
||||||
|
for (buf[0..n]) |c| {
|
||||||
|
const cp_, const consumed = d.next(c);
|
||||||
|
assert(consumed);
|
||||||
|
if (cp_) |cp| {
|
||||||
|
// This is the same trick we do in terminal.zig so we
|
||||||
|
// keep it here.
|
||||||
|
const width = if (cp <= 0xFF)
|
||||||
|
1
|
||||||
|
else
|
||||||
|
//uucode.getX(.width, @intCast(cp));
|
||||||
|
//uucode.getWidth(@intCast(cp));
|
||||||
|
uucode.getSpecial(@intCast(cp)).width;
|
||||||
|
|
||||||
|
// Write the width to the buffer to avoid it being compiled
|
||||||
|
// away
|
||||||
|
buf[0] = @intCast(width);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test CodepointWidth {
|
test CodepointWidth {
|
||||||
const testing = std.testing;
|
const testing = std.testing;
|
||||||
const alloc = testing.allocator;
|
const alloc = testing.allocator;
|
||||||
|
@@ -8,6 +8,7 @@ const assert = std.debug.assert;
|
|||||||
const Allocator = std.mem.Allocator;
|
const Allocator = std.mem.Allocator;
|
||||||
const Benchmark = @import("Benchmark.zig");
|
const Benchmark = @import("Benchmark.zig");
|
||||||
const options = @import("options.zig");
|
const options = @import("options.zig");
|
||||||
|
const uucode = @import("uucode");
|
||||||
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
||||||
const unicode = @import("../unicode/main.zig");
|
const unicode = @import("../unicode/main.zig");
|
||||||
|
|
||||||
@@ -38,6 +39,9 @@ pub const Mode = enum {
|
|||||||
|
|
||||||
/// Ghostty's table-based approach.
|
/// Ghostty's table-based approach.
|
||||||
table,
|
table,
|
||||||
|
|
||||||
|
/// Uucode
|
||||||
|
uucode,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Create a new terminal stream handler for the given arguments.
|
/// Create a new terminal stream handler for the given arguments.
|
||||||
@@ -60,6 +64,7 @@ pub fn benchmark(self: *GraphemeBreak) Benchmark {
|
|||||||
.stepFn = switch (self.opts.mode) {
|
.stepFn = switch (self.opts.mode) {
|
||||||
.noop => stepNoop,
|
.noop => stepNoop,
|
||||||
.table => stepTable,
|
.table => stepTable,
|
||||||
|
.uucode => stepUucode,
|
||||||
},
|
},
|
||||||
.setupFn = setup,
|
.setupFn = setup,
|
||||||
.teardownFn = teardown,
|
.teardownFn = teardown,
|
||||||
@@ -134,6 +139,160 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);
|
||||||
|
|
||||||
|
pub fn computeGraphemeBoundaryClass(
|
||||||
|
gb1: GraphemeBoundaryClass,
|
||||||
|
gb2: GraphemeBoundaryClass,
|
||||||
|
state: *uucode.grapheme.BreakState,
|
||||||
|
) bool {
|
||||||
|
// Set state back to default when `gb1` or `gb2` is not expected in sequence.
|
||||||
|
switch (state.*) {
|
||||||
|
.regional_indicator => {
|
||||||
|
if (gb1 != .regional_indicator or gb2 != .regional_indicator) {
|
||||||
|
state.* = .default;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.extended_pictographic => {
|
||||||
|
switch (gb1) {
|
||||||
|
.extend,
|
||||||
|
.zwj,
|
||||||
|
.extended_pictographic,
|
||||||
|
=> {},
|
||||||
|
|
||||||
|
else => state.* = .default,
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (gb2) {
|
||||||
|
.extend,
|
||||||
|
.zwj,
|
||||||
|
.extended_pictographic,
|
||||||
|
=> {},
|
||||||
|
|
||||||
|
else => state.* = .default,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.default, .indic_conjunct_break_consonant, .indic_conjunct_break_linker => {},
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB6: L x (L | V | LV | VT)
|
||||||
|
if (gb1 == .L) {
|
||||||
|
if (gb2 == .L or
|
||||||
|
gb2 == .V or
|
||||||
|
gb2 == .LV or
|
||||||
|
gb2 == .LVT) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB7: (LV | V) x (V | T)
|
||||||
|
if (gb1 == .LV or gb1 == .V) {
|
||||||
|
if (gb2 == .V or gb2 == .T) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB8: (LVT | T) x T
|
||||||
|
if (gb1 == .LVT or gb1 == .T) {
|
||||||
|
if (gb2 == .T) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle GB9 (Extend | ZWJ) later, since it can also match the start of
|
||||||
|
// GB9c (Indic) and GB11 (Emoji ZWJ)
|
||||||
|
|
||||||
|
// GB9a: SpacingMark
|
||||||
|
if (gb2 == .spacing_mark) return false;
|
||||||
|
|
||||||
|
// GB9b: Prepend
|
||||||
|
if (gb1 == .prepend) return false;
|
||||||
|
|
||||||
|
// GB11: Emoji ZWJ sequence
|
||||||
|
if (gb1 == .extended_pictographic) {
|
||||||
|
// start of sequence:
|
||||||
|
|
||||||
|
// In normal operation, we'll be in this state, but
|
||||||
|
// precomputeGraphemeBreak iterates all states.
|
||||||
|
// std.debug.assert(state.* == .default);
|
||||||
|
|
||||||
|
if (gb2 == .extend or gb2 == .zwj) {
|
||||||
|
state.* = .extended_pictographic;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// else, not an Emoji ZWJ sequence
|
||||||
|
} else if (state.* == .extended_pictographic) {
|
||||||
|
// continue or end sequence:
|
||||||
|
|
||||||
|
if (gb1 == .extend and (gb2 == .extend or gb2 == .zwj)) {
|
||||||
|
// continue extend* ZWJ sequence
|
||||||
|
return false;
|
||||||
|
} else if (gb1 == .zwj and gb2 == .extended_pictographic) {
|
||||||
|
// ZWJ -> end of sequence
|
||||||
|
state.* = .default;
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
// Not a valid Emoji ZWJ sequence
|
||||||
|
state.* = .default;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB12 and GB13: Regional Indicator
|
||||||
|
if (gb1 == .regional_indicator and gb2 == .regional_indicator) {
|
||||||
|
if (state.* == .default) {
|
||||||
|
state.* = .regional_indicator;
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
state.* = .default;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB9: x (Extend | ZWJ)
|
||||||
|
if (gb2 == .extend or gb2 == .zwj) return false;
|
||||||
|
|
||||||
|
// GB999: Otherwise, break everywhere
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn isBreak(
|
||||||
|
cp1: u21,
|
||||||
|
cp2: u21,
|
||||||
|
state: *uucode.grapheme.BreakState,
|
||||||
|
) bool {
|
||||||
|
const table = comptime uucode.grapheme.precomputeGraphemeBreak(
|
||||||
|
GraphemeBoundaryClass,
|
||||||
|
computeGraphemeBoundaryClass,
|
||||||
|
);
|
||||||
|
const gb1 = uucode.getX(.grapheme_boundary_class, cp1);
|
||||||
|
const gb2 = uucode.getX(.grapheme_boundary_class, cp2);
|
||||||
|
const result = table.get(gb1, gb2, state.*);
|
||||||
|
state.* = result.state;
|
||||||
|
return result.result;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
|
||||||
|
const self: *GraphemeBreak = @ptrCast(@alignCast(ptr));
|
||||||
|
|
||||||
|
const f = self.data_f orelse return;
|
||||||
|
var r = std.io.bufferedReader(f.reader());
|
||||||
|
var d: UTF8Decoder = .{};
|
||||||
|
var state: uucode.grapheme.BreakState = .default;
|
||||||
|
var cp1: u21 = 0;
|
||||||
|
var buf: [4096]u8 = undefined;
|
||||||
|
while (true) {
|
||||||
|
const n = r.read(&buf) catch |err| {
|
||||||
|
log.warn("error reading data file err={}", .{err});
|
||||||
|
return error.BenchmarkFailed;
|
||||||
|
};
|
||||||
|
if (n == 0) break; // EOF reached
|
||||||
|
|
||||||
|
for (buf[0..n]) |c| {
|
||||||
|
const cp_, const consumed = d.next(c);
|
||||||
|
assert(consumed);
|
||||||
|
if (cp_) |cp2| {
|
||||||
|
const v = isBreak(cp1, @intCast(cp2), &state);
|
||||||
|
buf[0] = @intCast(@intFromBool(v));
|
||||||
|
cp1 = cp2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test GraphemeBreak {
|
test GraphemeBreak {
|
||||||
const testing = std.testing;
|
const testing = std.testing;
|
||||||
const alloc = testing.allocator;
|
const alloc = testing.allocator;
|
||||||
|
@@ -24,14 +24,16 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
|
|||||||
if (b.lazyDependency("uucode", .{
|
if (b.lazyDependency("uucode", .{
|
||||||
.target = b.graph.host,
|
.target = b.graph.host,
|
||||||
.@"tables.zig" = uucode_tables_zig,
|
.@"tables.zig" = uucode_tables_zig,
|
||||||
|
.build_config_path = b.path("src/build/uucode_config.zig"),
|
||||||
})) |dep| {
|
})) |dep| {
|
||||||
exe.root_module.addImport("uucode", dep.module("uucode"));
|
exe.root_module.addImport("uucode", dep.module("uucode"));
|
||||||
}
|
}
|
||||||
|
|
||||||
const run = b.addRunArtifact(exe);
|
const run = b.addRunArtifact(exe);
|
||||||
|
const output = run.addOutputFileArg("tables.zig");
|
||||||
return .{
|
return .{
|
||||||
.exe = exe,
|
.exe = exe,
|
||||||
.output = run.captureStdOut(),
|
.output = output,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -3,6 +3,93 @@ const config_x = @import("config.x.zig");
|
|||||||
const d = config.default;
|
const d = config.default;
|
||||||
const wcwidth = config_x.wcwidth;
|
const wcwidth = config_x.wcwidth;
|
||||||
|
|
||||||
|
pub const log_level = .debug;
|
||||||
|
|
||||||
|
fn computeWidth(cp: u21, data: anytype, backing: anytype, tracking: anytype) void {
|
||||||
|
_ = cp;
|
||||||
|
_ = backing;
|
||||||
|
_ = tracking;
|
||||||
|
if (data.wcwidth < 0) {
|
||||||
|
data.width = 0;
|
||||||
|
} else if (data.wcwidth > 2) {
|
||||||
|
data.width = 2;
|
||||||
|
} else {
|
||||||
|
data.width = @intCast(data.wcwidth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const width = config.Extension{ .inputs = &.{"wcwidth"}, .compute = &computeWidth, .fields = &.{
|
||||||
|
.{ .name = "width", .type = u2 },
|
||||||
|
} };
|
||||||
|
|
||||||
|
pub const GraphemeBoundaryClass = enum(u4) {
|
||||||
|
invalid,
|
||||||
|
L,
|
||||||
|
V,
|
||||||
|
T,
|
||||||
|
LV,
|
||||||
|
LVT,
|
||||||
|
prepend,
|
||||||
|
extend,
|
||||||
|
zwj,
|
||||||
|
spacing_mark,
|
||||||
|
regional_indicator,
|
||||||
|
extended_pictographic,
|
||||||
|
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
||||||
|
emoji_modifier, // \p{Emoji_Modifier}
|
||||||
|
};
|
||||||
|
|
||||||
|
fn computeGraphemeBoundaryClass(cp: u21, data: anytype, backing: anytype, tracking: anytype) void {
|
||||||
|
_ = cp;
|
||||||
|
_ = backing;
|
||||||
|
_ = tracking;
|
||||||
|
if (data.is_emoji_modifier) {
|
||||||
|
data.grapheme_boundary_class = .emoji_modifier;
|
||||||
|
} else if (data.is_emoji_modifier_base) {
|
||||||
|
data.grapheme_boundary_class = .extended_pictographic_base;
|
||||||
|
} else {
|
||||||
|
data.grapheme_boundary_class = switch (data.grapheme_break) {
|
||||||
|
.extended_pictographic => .extended_pictographic,
|
||||||
|
.l => .L,
|
||||||
|
.v => .V,
|
||||||
|
.t => .T,
|
||||||
|
.lv => .LV,
|
||||||
|
.lvt => .LVT,
|
||||||
|
.prepend => .prepend,
|
||||||
|
.zwj => .zwj,
|
||||||
|
.spacing_mark => .spacing_mark,
|
||||||
|
.regional_indicator => .regional_indicator,
|
||||||
|
|
||||||
|
.zwnj,
|
||||||
|
.indic_conjunct_break_extend,
|
||||||
|
.indic_conjunct_break_linker,
|
||||||
|
=> .extend,
|
||||||
|
|
||||||
|
// This is obviously not INVALID invalid, there is SOME grapheme
|
||||||
|
// boundary class for every codepoint. But we don't care about
|
||||||
|
// anything that doesn't fit into the above categories.
|
||||||
|
.other,
|
||||||
|
.indic_conjunct_break_consonant,
|
||||||
|
.cr,
|
||||||
|
.lf,
|
||||||
|
.control,
|
||||||
|
=> .invalid,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const grapheme_boundary_class = config.Extension{
|
||||||
|
.inputs = &.{
|
||||||
|
"grapheme_break",
|
||||||
|
"is_emoji_modifier",
|
||||||
|
"is_emoji_modifier_base",
|
||||||
|
},
|
||||||
|
.compute = &computeGraphemeBoundaryClass,
|
||||||
|
.fields = &.{
|
||||||
|
.{ .name = "grapheme_boundary_class", .type = GraphemeBoundaryClass },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
pub const tables = [_]config.Table{
|
pub const tables = [_]config.Table{
|
||||||
.{
|
.{
|
||||||
.extensions = &.{wcwidth},
|
.extensions = &.{wcwidth},
|
||||||
@@ -14,9 +101,16 @@ pub const tables = [_]config.Table{
|
|||||||
d.field("case_folding_full"),
|
d.field("case_folding_full"),
|
||||||
// Alternative:
|
// Alternative:
|
||||||
// d.field("case_folding_simple"),
|
// d.field("case_folding_simple"),
|
||||||
d.field("grapheme_break"),
|
|
||||||
d.field("is_emoji_modifier"),
|
d.field("is_emoji_modifier"),
|
||||||
d.field("is_emoji_modifier_base"),
|
d.field("is_emoji_modifier_base"),
|
||||||
|
d.field("grapheme_break"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
.{
|
||||||
|
.extensions = &.{ wcwidth, width, grapheme_boundary_class },
|
||||||
|
.fields = &.{
|
||||||
|
width.field("width"),
|
||||||
|
grapheme_boundary_class.field("grapheme_boundary_class"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@@ -29,7 +29,8 @@ test "codepointWidth basic" {
|
|||||||
// const uucode = @import("uucode");
|
// const uucode = @import("uucode");
|
||||||
//
|
//
|
||||||
// const min = 0xFF + 1; // start outside ascii
|
// const min = 0xFF + 1; // start outside ascii
|
||||||
// for (min..uucode.code_point_range_end) |cp| {
|
// const max = std.math.maxInt(u21) + 1;
|
||||||
|
// for (min..max) |cp| {
|
||||||
// const simd = codepointWidth(@intCast(cp));
|
// const simd = codepointWidth(@intCast(cp));
|
||||||
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
|
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
|
||||||
// if (simd != uu) mismatch: {
|
// if (simd != uu) mismatch: {
|
||||||
|
@@ -345,7 +345,7 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||||||
if (c == 0xFE0F or c == 0xFE0E) {
|
if (c == 0xFE0F or c == 0xFE0E) {
|
||||||
// This only applies to emoji
|
// This only applies to emoji
|
||||||
const prev_props = unicode.getProperties(prev.cell.content.codepoint);
|
const prev_props = unicode.getProperties(prev.cell.content.codepoint);
|
||||||
const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
|
const emoji = unicode.isExtendedPictographic(prev_props.grapheme_boundary_class);
|
||||||
if (!emoji) return;
|
if (!emoji) return;
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
@@ -2,6 +2,7 @@ const std = @import("std");
|
|||||||
const props = @import("props.zig");
|
const props = @import("props.zig");
|
||||||
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
|
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
|
||||||
const table = props.table;
|
const table = props.table;
|
||||||
|
const isExtendedPictographic = props.isExtendedPictographic;
|
||||||
|
|
||||||
/// Determines if there is a grapheme break between two codepoints. This
|
/// Determines if there is a grapheme break between two codepoints. This
|
||||||
/// must be called sequentially maintaining the state between calls.
|
/// must be called sequentially maintaining the state between calls.
|
||||||
@@ -80,7 +81,7 @@ fn graphemeBreakClass(
|
|||||||
state: *BreakState,
|
state: *BreakState,
|
||||||
) bool {
|
) bool {
|
||||||
// GB11: Emoji Extend* ZWJ x Emoji
|
// GB11: Emoji Extend* ZWJ x Emoji
|
||||||
if (!state.extended_pictographic and gbc1.isExtendedPictographic()) {
|
if (!state.extended_pictographic and isExtendedPictographic(gbc1)) {
|
||||||
state.extended_pictographic = true;
|
state.extended_pictographic = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,7 +132,7 @@ fn graphemeBreakClass(
|
|||||||
// GB11: Emoji Extend* ZWJ x Emoji
|
// GB11: Emoji Extend* ZWJ x Emoji
|
||||||
if (state.extended_pictographic and
|
if (state.extended_pictographic and
|
||||||
gbc1 == .zwj and
|
gbc1 == .zwj and
|
||||||
gbc2.isExtendedPictographic())
|
isExtendedPictographic(gbc2))
|
||||||
{
|
{
|
||||||
state.extended_pictographic = false;
|
state.extended_pictographic = false;
|
||||||
return false;
|
return false;
|
||||||
|
@@ -7,6 +7,7 @@ pub const Properties = props.Properties;
|
|||||||
pub const getProperties = props.get;
|
pub const getProperties = props.get;
|
||||||
pub const graphemeBreak = grapheme.graphemeBreak;
|
pub const graphemeBreak = grapheme.graphemeBreak;
|
||||||
pub const GraphemeBreakState = grapheme.BreakState;
|
pub const GraphemeBreakState = grapheme.BreakState;
|
||||||
|
pub const isExtendedPictographic = props.isExtendedPictographic;
|
||||||
|
|
||||||
test {
|
test {
|
||||||
@import("std").testing.refAllDecls(@This());
|
@import("std").testing.refAllDecls(@This());
|
||||||
|
@@ -6,10 +6,11 @@ const lut = @import("lut.zig");
|
|||||||
|
|
||||||
/// The lookup tables for Ghostty.
|
/// The lookup tables for Ghostty.
|
||||||
pub const table = table: {
|
pub const table = table: {
|
||||||
|
const Props = uucode.PackedTypeOf("1");
|
||||||
// This is only available after running main() below as part of the Ghostty
|
// This is only available after running main() below as part of the Ghostty
|
||||||
// build.zig, but due to Zig's lazy analysis we can still reference it here.
|
// build.zig, but due to Zig's lazy analysis we can still reference it here.
|
||||||
const generated = @import("unicode_tables").Tables(Properties);
|
const generated = @import("unicode_tables").Tables(Props);
|
||||||
const Tables = lut.Tables(Properties);
|
const Tables = lut.Tables(Props);
|
||||||
break :table Tables{
|
break :table Tables{
|
||||||
.stage1 = &generated.stage1,
|
.stage1 = &generated.stage1,
|
||||||
.stage2 = &generated.stage2,
|
.stage2 = &generated.stage2,
|
||||||
@@ -61,81 +62,62 @@ pub const Properties = struct {
|
|||||||
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
||||||
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
||||||
/// impossible because they're handled by the terminal.
|
/// impossible because they're handled by the terminal.
|
||||||
pub const GraphemeBoundaryClass = enum(u4) {
|
pub const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);
|
||||||
invalid,
|
|
||||||
L,
|
|
||||||
V,
|
|
||||||
T,
|
|
||||||
LV,
|
|
||||||
LVT,
|
|
||||||
prepend,
|
|
||||||
extend,
|
|
||||||
zwj,
|
|
||||||
spacing_mark,
|
|
||||||
regional_indicator,
|
|
||||||
extended_pictographic,
|
|
||||||
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
|
||||||
emoji_modifier, // \p{Emoji_Modifier}
|
|
||||||
|
|
||||||
/// Gets the grapheme boundary class for a codepoint.
|
/// Gets the grapheme boundary class for a codepoint.
|
||||||
/// The use case for this is only in generating lookup tables.
|
/// The use case for this is only in generating lookup tables.
|
||||||
pub fn init(cp: u21) GraphemeBoundaryClass {
|
pub fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
|
||||||
if (cp < uucode.code_point_range_end) {
|
if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
|
||||||
if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
|
if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
|
||||||
if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
|
|
||||||
|
|
||||||
return switch (uucode.get(.grapheme_break, cp)) {
|
return switch (uucode.get(.grapheme_break, cp)) {
|
||||||
.extended_pictographic => .extended_pictographic,
|
.extended_pictographic => .extended_pictographic,
|
||||||
.l => .L,
|
.l => .L,
|
||||||
.v => .V,
|
.v => .V,
|
||||||
.t => .T,
|
.t => .T,
|
||||||
.lv => .LV,
|
.lv => .LV,
|
||||||
.lvt => .LVT,
|
.lvt => .LVT,
|
||||||
.prepend => .prepend,
|
.prepend => .prepend,
|
||||||
.zwj => .zwj,
|
.zwj => .zwj,
|
||||||
.spacing_mark => .spacing_mark,
|
.spacing_mark => .spacing_mark,
|
||||||
.regional_indicator => .regional_indicator,
|
.regional_indicator => .regional_indicator,
|
||||||
|
|
||||||
.zwnj,
|
.zwnj,
|
||||||
.indic_conjunct_break_extend,
|
.indic_conjunct_break_extend,
|
||||||
.indic_conjunct_break_linker,
|
.indic_conjunct_break_linker,
|
||||||
=> .extend,
|
=> .extend,
|
||||||
|
|
||||||
// This is obviously not INVALID invalid, there is SOME grapheme
|
// This is obviously not INVALID invalid, there is SOME grapheme
|
||||||
// boundary class for every codepoint. But we don't care about
|
// boundary class for every codepoint. But we don't care about
|
||||||
// anything that doesn't fit into the above categories.
|
// anything that doesn't fit into the above categories.
|
||||||
.other,
|
.other,
|
||||||
.indic_conjunct_break_consonant,
|
.indic_conjunct_break_consonant,
|
||||||
.cr,
|
.cr,
|
||||||
.lf,
|
.lf,
|
||||||
.control,
|
.control,
|
||||||
=> .invalid,
|
=> .invalid,
|
||||||
};
|
};
|
||||||
} else {
|
}
|
||||||
return .invalid;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if this is an extended pictographic type. This
|
/// Returns true if this is an extended pictographic type. This
|
||||||
/// should be used instead of comparing the enum value directly
|
/// should be used instead of comparing the enum value directly
|
||||||
/// because we classify multiple.
|
/// because we classify multiple.
|
||||||
pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
|
pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
|
||||||
return switch (self) {
|
return switch (self) {
|
||||||
.extended_pictographic,
|
.extended_pictographic,
|
||||||
.extended_pictographic_base,
|
.extended_pictographic_base,
|
||||||
=> true,
|
=> true,
|
||||||
|
|
||||||
else => false,
|
else => false,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
pub fn get(cp: u21) Properties {
|
pub fn get(cp: u21) Properties {
|
||||||
const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;
|
const wcwidth = uucode.get(.wcwidth, cp);
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.width = @intCast(@min(2, @max(0, wcwidth))),
|
.width = @intCast(@min(2, @max(0, wcwidth))),
|
||||||
.grapheme_boundary_class = .init(cp),
|
.grapheme_boundary_class = computeGraphemeBoundaryClass(cp),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -145,6 +127,13 @@ pub fn main() !void {
|
|||||||
defer arena_state.deinit();
|
defer arena_state.deinit();
|
||||||
const alloc = arena_state.allocator();
|
const alloc = arena_state.allocator();
|
||||||
|
|
||||||
|
var args_iter = try std.process.argsWithAllocator(alloc);
|
||||||
|
defer args_iter.deinit();
|
||||||
|
_ = args_iter.skip(); // Skip program name
|
||||||
|
|
||||||
|
const output_path = args_iter.next() orelse std.debug.panic("No output file arg!", .{});
|
||||||
|
std.debug.print("Unicode tables output_path = {s}\n", .{output_path});
|
||||||
|
|
||||||
const gen: lut.Generator(
|
const gen: lut.Generator(
|
||||||
Properties,
|
Properties,
|
||||||
struct {
|
struct {
|
||||||
@@ -164,7 +153,10 @@ pub fn main() !void {
|
|||||||
defer alloc.free(t.stage1);
|
defer alloc.free(t.stage1);
|
||||||
defer alloc.free(t.stage2);
|
defer alloc.free(t.stage2);
|
||||||
defer alloc.free(t.stage3);
|
defer alloc.free(t.stage3);
|
||||||
try t.writeZig(std.io.getStdOut().writer());
|
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||||
|
defer out_file.close();
|
||||||
|
const writer = out_file.writer();
|
||||||
|
try t.writeZig(writer);
|
||||||
|
|
||||||
// Uncomment when manually debugging to see our table sizes.
|
// Uncomment when manually debugging to see our table sizes.
|
||||||
// std.log.warn("stage1={} stage2={} stage3={}", .{
|
// std.log.warn("stage1={} stage2={} stage3={}", .{
|
||||||
@@ -180,7 +172,8 @@ pub fn main() !void {
|
|||||||
// const testing = std.testing;
|
// const testing = std.testing;
|
||||||
//
|
//
|
||||||
// const min = 0xFF + 1; // start outside ascii
|
// const min = 0xFF + 1; // start outside ascii
|
||||||
// for (min..uucode.code_point_range_end) |cp| {
|
// const max = std.math.maxInt(u21) + 1;
|
||||||
|
// for (min..max) |cp| {
|
||||||
// const t = table.get(@intCast(cp));
|
// const t = table.get(@intCast(cp));
|
||||||
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
|
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
|
||||||
// if (t.width != uu) {
|
// if (t.width != uu) {
|
||||||
|
Reference in New Issue
Block a user