font/shaper/harfbuzz: Update position logic to match recent CoreText changes (#10332)

This PR incorporates the recent changes made to CoreText in the
following PRs:

* https://github.com/ghostty-org/ghostty/pull/9883
* https://github.com/ghostty-org/ghostty/pull/10179
* https://github.com/ghostty-org/ghostty/pull/10295

Tests have been added verifying the behavior. Below are all the changes
in shaping from the above PRs but now using HarfBuzz.

## Tai Tham vowels (https://github.com/ghostty-org/ghostty/pull/9883)

Browser:

ᨯᩰ

Before:

(Note that this worked correctly with the old logic for HarfBuzz in
contrast to CoreText)

<img width="498" height="96" alt="CleanShot 2026-01-15 at 09 50 50@2x"
src="https://github.com/user-attachments/assets/a87abb4e-7829-4df0-9cb6-9fc254c10ba2"
/>

After:

<img width="496" height="96" alt="CleanShot 2026-01-15 at 09 51 07@2x"
src="https://github.com/user-attachments/assets/64cb8b12-0b99-4e9f-8189-d793469048f6"
/>


## Tibetan characters (discussion
https://github.com/ghostty-org/ghostty/discussions/8054)

Browser:

ༀ

Before (with Noto Serif Tibetan):

<img width="586" height="154" alt="CleanShot 2026-01-16 at 09 28 32@2x"
src="https://github.com/user-attachments/assets/5b7df9c1-13ff-45fd-977e-9e152063517c"
/>

After:

<img width="604" height="202" alt="CleanShot 2026-01-16 at 09 28 46@2x"
src="https://github.com/user-attachments/assets/3832b8b0-1ce6-448d-a063-622127cfb213"
/>


## Tai Tham ligature (https://github.com/ghostty-org/ghostty/pull/10179
and https://github.com/ghostty-org/ghostty/pull/10295)

Browser:

ᩉ᩠ᨿᩩ

Before:

<img width="860" height="144" alt="CleanShot 2026-01-15 at 09 57 30@2x"
src="https://github.com/user-attachments/assets/56c85a06-1853-4f88-992b-568b7f1d4b4e"
/>

After:

<img width="860" height="108" alt="CleanShot 2026-01-15 at 09 57 44@2x"
src="https://github.com/user-attachments/assets/fe236aad-ac2c-4665-aef4-bc996e3b2938"
/>

## Javanese ligature (https://github.com/ghostty-org/ghostty/pull/10295)

Browser:

ᩉ᩠ᨿᩩ

Before:

<img width="856" height="128" alt="CleanShot 2026-01-15 at 09 59 04@2x"
src="https://github.com/user-attachments/assets/73c60445-a80d-4003-ae2b-c8d53f3c9cf9"
/>

After:

<img width="866" height="114" alt="CleanShot 2026-01-15 at 09 59 23@2x"
src="https://github.com/user-attachments/assets/fce80429-096a-455e-aabd-00bf33fdae54"
/>

## Chakma ligature (https://github.com/ghostty-org/ghostty/pull/10295)

Browser:
𑄝𑄖𑄳𑄠𑄬

Before:

<img width="1298" height="104" alt="CleanShot 2026-01-15 at 10 00 31@2x"
src="https://github.com/user-attachments/assets/72e2be85-fb4b-4ca0-98d7-30279fd2613f"
/>

After:

<img width="1310" height="84" alt="CleanShot 2026-01-15 at 10 00 52@2x"
src="https://github.com/user-attachments/assets/a5e2a89c-7f1e-474c-9d61-b90c6a6ffedd"
/>

## Bengali ligature (https://github.com/ghostty-org/ghostty/pull/10295)

Browser:
রাষ্ট্রে

Before:
<img width="1268" height="94" alt="CleanShot 2026-01-15 at 10 02 16@2x"
src="https://github.com/user-attachments/assets/2946701c-a41b-45c5-b442-915dad45d380"
/>

After:
<img width="1274" height="110" alt="CleanShot 2026-01-15 at 10 02 32@2x"
src="https://github.com/user-attachments/assets/c1684149-cca4-43a7-b384-13e3d4854765"
/>

## Devanagari string (test changed in
https://github.com/ghostty-org/ghostty/pull/10295)

Browser:
अपार्टमेंट

Before:

<img width="560" height="98" alt="CleanShot 2026-01-16 at 09 31 45@2x"
src="https://github.com/user-attachments/assets/8df36427-c1fe-4f3c-9e2a-7e9b798411b4"
/>

After:

<img width="570" height="88" alt="CleanShot 2026-01-16 at 09 32 06@2x"
src="https://github.com/user-attachments/assets/972ed525-f341-400b-a09c-29b0a9135502"
/>


## AI disclaimer

I used Amp to copy the logic from CoreText, but had to help it along. I
took over after I noticed the different behavior with HarfBuzz's default
cluster level.

[Adopt CoreText changes to HarfBuzz
shaper](https://ampcode.com/threads/T-019bbccd-74d3-76c8-add4-6270f0f5375f)
[Rename debug_codepoints, explore HarfBuzz
alternatives](https://ampcode.com/threads/T-019bbce3-bed6-70fa-9eec-b0d1ee448ee9)
[Fix cluster indexing in codepoints
array](https://ampcode.com/threads/T-019bbced-d67b-7239-b507-9b3bd027faeb)
This commit is contained in:
Mitchell Hashimoto
2026-01-26 20:20:26 -08:00
committed by GitHub
3 changed files with 791 additions and 21 deletions

View File

@@ -238,6 +238,12 @@ pub const Buffer = struct {
pub fn guessSegmentProperties(self: Buffer) void {
c.hb_buffer_guess_segment_properties(self.handle);
}
/// Sets the cluster level of a buffer. The `ClusterLevel` dictates one
/// aspect of how HarfBuzz will treat non-base characters during shaping.
pub fn setClusterLevel(self: Buffer, level: ClusterLevel) void {
c.hb_buffer_set_cluster_level(self.handle, @intFromEnum(level));
}
};
/// The type of hb_buffer_t contents.
@@ -252,6 +258,40 @@ pub const ContentType = enum(u2) {
glyphs = c.HB_BUFFER_CONTENT_TYPE_GLYPHS,
};
/// Data type for holding HarfBuzz's clustering behavior options. The cluster
/// level dictates one aspect of how HarfBuzz will treat non-base characters
/// during shaping.
pub const ClusterLevel = enum(u2) {
/// In `monotone_graphemes`, non-base characters are merged into the
/// cluster of the base character that precedes them. There is also cluster
/// merging every time the clusters will otherwise become non-monotone.
/// This is the default cluster level.
monotone_graphemes = c.HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES,
/// In `monotone_characters`, non-base characters are initially assigned
/// their own cluster values, which are not merged into preceding base
/// clusters. This allows HarfBuzz to perform additional operations like
/// reorder sequences of adjacent marks. The output is still monotone, but
/// the cluster values are more granular.
monotone_characters = c.HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS,
/// In `characters`, non-base characters are assigned their own cluster
/// values, which are not merged into preceding base clusters. Moreover,
/// the cluster values are not merged into monotone order. This is the most
/// granular cluster level, and it is useful for clients that need to know
/// the exact cluster values of each character, but is harder to use for
/// clients, since clusters might appear in any order.
characters = c.HB_BUFFER_CLUSTER_LEVEL_CHARACTERS,
/// In `graphemes`, non-base characters are merged into the cluster of the
/// base character that precedes them. This is similar to the Unicode
/// Grapheme Cluster algorithm, but it is not exactly the same. The output
/// is not forced to be monotone. This is useful for clients that want to
/// use HarfBuzz as a cheap implementation of the Unicode Grapheme Cluster
/// algorithm.
graphemes = c.HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES,
};
/// The hb_glyph_info_t is the structure that holds information about the
/// glyphs and their relation to input text.
pub const GlyphInfo = extern struct {

View File

@@ -13,6 +13,7 @@ pub const coretext = @import("coretext.zig");
pub const MemoryMode = blob.MemoryMode;
pub const Blob = blob.Blob;
pub const Buffer = buffer.Buffer;
pub const GlyphPosition = buffer.GlyphPosition;
pub const Direction = common.Direction;
pub const Script = common.Script;
pub const Language = common.Language;

View File

@@ -4,6 +4,7 @@ const Allocator = std.mem.Allocator;
const harfbuzz = @import("harfbuzz");
const font = @import("../main.zig");
const terminal = @import("../../terminal/main.zig");
const unicode = @import("../../unicode/main.zig");
const Feature = font.shape.Feature;
const FeatureList = font.shape.FeatureList;
const default_features = font.shape.default_features;
@@ -19,7 +20,7 @@ const log = std.log.scoped(.font_shaper);
/// Shaper that uses Harfbuzz.
pub const Shaper = struct {
/// The allocated used for the feature list and cell buf.
/// The allocated used for the feature list, cell buf, and codepoints.
alloc: Allocator,
/// The buffer used for text shaping. We reuse it across multiple shaping
@@ -32,8 +33,29 @@ pub const Shaper = struct {
/// The features to use for shaping.
hb_feats: []harfbuzz.Feature,
/// The codepoints added to the buffer before shaping. We need to keep
/// these separately because after shaping, HarfBuzz replaces codepoints
/// with glyph indices in the buffer.
codepoints: std.ArrayListUnmanaged(Codepoint) = .{},
const Codepoint = struct {
cluster: u32,
codepoint: u32,
};
const CellBuf = std.ArrayListUnmanaged(font.shape.Cell);
const RunOffset = struct {
cluster: u32 = 0,
x: i32 = 0,
y: i32 = 0,
};
const CellOffset = struct {
cluster: u32 = 0,
x: i32 = 0,
};
/// The cell_buf argument is the buffer to use for storing shaped results.
/// This should be at least the number of columns in the terminal.
pub fn init(alloc: Allocator, opts: font.shape.Options) !Shaper {
@@ -74,6 +96,7 @@ pub const Shaper = struct {
self.hb_buf.destroy();
self.cell_buf.deinit(self.alloc);
self.alloc.free(self.hb_feats);
self.codepoints.deinit(self.alloc);
}
pub fn endFrame(self: *const Shaper) void {
@@ -135,33 +158,97 @@ pub const Shaper = struct {
// If it isn't true, I'd like to catch it and learn more.
assert(info.len == pos.len);
// This keeps track of the current offsets within a single cell.
var cell_offset: struct {
cluster: u32 = 0,
x: i32 = 0,
y: i32 = 0,
} = .{};
// This keeps track of the current x and y offsets (sum of advances)
// and the furthest cluster we've seen so far (max).
var run_offset: RunOffset = .{};
// This keeps track of the cell starting x and cluster.
var cell_offset: CellOffset = .{};
// Convert all our info/pos to cells and set it.
self.cell_buf.clearRetainingCapacity();
for (info, pos) |info_v, pos_v| {
// If our cluster changed then we've moved to a new cell.
if (info_v.cluster != cell_offset.cluster) cell_offset = .{
.cluster = info_v.cluster,
};
// info_v.cluster is the index into our codepoints array. We use it
// to get the original cluster.
const index = info_v.cluster;
// Our cluster is also our cell X position. If the cluster changes
// then we need to reset our current cell offsets.
const cluster = self.codepoints.items[index].cluster;
if (cell_offset.cluster != cluster) {
const is_after_glyph_from_current_or_next_clusters =
cluster <= run_offset.cluster;
try self.cell_buf.append(self.alloc, .{
.x = @intCast(info_v.cluster),
.x_offset = @intCast(cell_offset.x),
.y_offset = @intCast(cell_offset.y),
.glyph_index = info_v.codepoint,
});
const is_first_codepoint_in_cluster = blk: {
var i = index;
while (i > 0) {
i -= 1;
const codepoint = self.codepoints.items[i];
break :blk codepoint.cluster != cluster;
} else break :blk true;
};
// We need to reset the `cell_offset` at the start of a new
// cluster, but we do that conditionally if the codepoint
// `is_first_codepoint_in_cluster` and the cluster is not
// `is_after_glyph_from_current_or_next_clusters`, which is
// a heuristic to detect ligatures and avoid positioning
// glyphs that mark ligatures incorrectly. The idea is that
// if the first codepoint in a cluster doesn't appear in
// the stream, it's very likely that it combined with
// codepoints from a previous cluster into a ligature.
// Then, the subsequent codepoints are very likely marking
// glyphs that are placed relative to that ligature, so if
// we were to reset the `cell_offset` to align it with the
// grid, the positions would be off. The
// `!is_after_glyph_from_current_or_next_clusters` check is
// needed in case these marking glyphs come from a later
// cluster but are rendered first (see the Chakma and
// Bengali tests). In that case when we get to the
// codepoint that `is_first_codepoint_in_cluster`, but in a
// cluster that
// `is_after_glyph_from_current_or_next_clusters`, we don't
// want to reset to the grid and cause the positions to be
// off. (Note that we could go back and align the cells to
// the grid starting from the one from the cluster that
// rendered out of order, but that is more complicated so
// we don't do that for now. Also, it's TBD if there are
// exceptions to this heuristic for detecting ligatures,
// but using the logging below seems to show it works
// well.)
if (is_first_codepoint_in_cluster and
!is_after_glyph_from_current_or_next_clusters)
{
cell_offset = .{
.cluster = cluster,
.x = run_offset.x,
};
}
}
// Under both FreeType and CoreText the harfbuzz scale is
// in 26.6 fixed point units, so we round to the nearest
// whole value here.
cell_offset.x += (pos_v.x_advance + 0b100_000) >> 6;
cell_offset.y += (pos_v.y_advance + 0b100_000) >> 6;
const x_offset = run_offset.x - cell_offset.x + ((pos_v.x_offset + 0b100_000) >> 6);
const y_offset = run_offset.y + ((pos_v.y_offset + 0b100_000) >> 6);
// For debugging positions, turn this on:
//try self.debugPositions(run_offset, cell_offset, pos_v, index);
try self.cell_buf.append(self.alloc, .{
.x = @intCast(cell_offset.cluster),
.x_offset = @intCast(x_offset),
.y_offset = @intCast(y_offset),
.glyph_index = info_v.codepoint,
});
// Add our advances to keep track of our run offsets.
// Advances apply to the NEXT cell.
// Under both FreeType and CoreText the harfbuzz scale is
// in 26.6 fixed point units, so we round to the nearest
// whole value here.
run_offset.x += (pos_v.x_advance + 0b100_000) >> 6;
run_offset.y += (pos_v.y_advance + 0b100_000) >> 6;
run_offset.cluster = @max(run_offset.cluster, cluster);
// const i = self.cell_buf.items.len - 1;
// log.warn("i={} info={} pos={} cell={}", .{ i, info_v, pos_v, self.cell_buf.items[i] });
@@ -180,6 +267,13 @@ pub const Shaper = struct {
self.shaper.hb_buf.reset();
self.shaper.hb_buf.setContentType(.unicode);
// We set the cluster level to `characters` to give us the most
// granularity, matching the CoreText shaper, and allowing us
// to use our same ligature detection heuristics.
self.shaper.hb_buf.setClusterLevel(.characters);
self.shaper.codepoints.clearRetainingCapacity();
// We don't support RTL text because RTL in terminals is messy.
// Its something we want to improve. For now, we force LTR because
// our renderers assume a strictly increasing X value.
@@ -188,13 +282,156 @@ pub const Shaper = struct {
pub fn addCodepoint(self: RunIteratorHook, cp: u32, cluster: u32) !void {
// log.warn("cluster={} cp={x}", .{ cluster, cp });
self.shaper.hb_buf.add(cp, cluster);
// We pass the index into codepoints as the cluster value to HarfBuzz.
// After shaping, we use info.cluster to get back the index, which
// lets us look up the original cluster value from codepoints.
const index: u32 = @intCast(self.shaper.codepoints.items.len);
self.shaper.hb_buf.add(cp, index);
try self.shaper.codepoints.append(self.shaper.alloc, .{
.cluster = cluster,
.codepoint = cp,
});
}
pub fn finalize(self: RunIteratorHook) void {
self.shaper.hb_buf.guessSegmentProperties();
}
};
fn debugPositions(
self: *Shaper,
run_offset: RunOffset,
cell_offset: CellOffset,
pos_v: harfbuzz.GlyphPosition,
index: u32,
) !void {
const x_offset = run_offset.x - cell_offset.x + ((pos_v.x_offset + 0b100_000) >> 6);
const y_offset = run_offset.y + ((pos_v.y_offset + 0b100_000) >> 6);
const advance_x_offset = run_offset.x - cell_offset.x;
const advance_y_offset = run_offset.y;
const x_offset_diff = x_offset - advance_x_offset;
const y_offset_diff = y_offset - advance_y_offset;
const positions_differ = @abs(x_offset_diff) > 0 or @abs(y_offset_diff) > 0;
const y_offset_differs = run_offset.y != 0;
const cluster = self.codepoints.items[index].cluster;
const cluster_differs = cluster != cell_offset.cluster;
// To debug every loop, flip this to true:
const extra_debugging = false;
const is_previous_codepoint_prepend = if (cluster_differs or
extra_debugging)
blk: {
var i = index;
while (i > 0) {
i -= 1;
const codepoint = self.codepoints.items[i];
break :blk unicode.table.get(@intCast(codepoint.codepoint)).grapheme_boundary_class == .prepend;
}
break :blk false;
} else false;
const formatted_cps: ?[]u8 = if (positions_differ or
y_offset_differs or
cluster_differs or
extra_debugging)
blk: {
var allocating = std.Io.Writer.Allocating.init(self.alloc);
defer allocating.deinit();
const writer = &allocating.writer;
const codepoints = self.codepoints.items;
var last_cluster: ?u32 = null;
for (codepoints, 0..) |cp, i| {
if (@as(i32, @intCast(cp.cluster)) >= @as(i32, @intCast(cell_offset.cluster)) - 1 and
cp.cluster <= cluster + 1)
{
if (last_cluster) |last| {
if (cp.cluster != last) {
try writer.writeAll(" ");
}
}
if (i == index) {
try writer.writeAll("");
}
// Using Python syntax for easier debugging
if (cp.codepoint > 0xFFFF) {
try writer.print("\\U{x:0>8}", .{cp.codepoint});
} else {
try writer.print("\\u{x:0>4}", .{cp.codepoint});
}
last_cluster = cp.cluster;
}
}
try writer.writeAll("");
for (codepoints) |cp| {
if (@as(i32, @intCast(cp.cluster)) >= @as(i32, @intCast(cell_offset.cluster)) - 1 and
cp.cluster <= cluster + 1)
{
try writer.print("{u}", .{@as(u21, @intCast(cp.codepoint))});
}
}
break :blk try allocating.toOwnedSlice();
} else null;
defer if (formatted_cps) |cps| self.alloc.free(cps);
if (extra_debugging) {
log.warn("extra debugging of positions index={d} cell_offset.cluster={d} cluster={d} run_offset.cluster={d} diff={d} pos=({d},{d}) run_offset=({d},{d}) cell_offset.x={d} is_prev_prepend={} cps = {s}", .{
index,
cell_offset.cluster,
cluster,
run_offset.cluster,
@as(isize, @intCast(cluster)) - @as(isize, @intCast(cell_offset.cluster)),
x_offset,
y_offset,
run_offset.x,
run_offset.y,
cell_offset.x,
is_previous_codepoint_prepend,
formatted_cps.?,
});
}
if (positions_differ) {
log.warn("position differs from advance: cluster={d} pos=({d},{d}) adv=({d},{d}) diff=({d},{d}) cps = {s}", .{
cluster,
x_offset,
y_offset,
advance_x_offset,
advance_y_offset,
x_offset_diff,
y_offset_diff,
formatted_cps.?,
});
}
if (y_offset_differs) {
log.warn("run_offset.y differs from zero: cluster={d} pos=({d},{d}) run_offset=({d},{d}) cell_offset.x={d} cps = {s}", .{
cluster,
x_offset,
y_offset,
run_offset.x,
run_offset.y,
cell_offset.x,
formatted_cps.?,
});
}
if (cluster_differs) {
log.warn("cell_offset.cluster differs from cluster (potential ligature detected) cell_offset.cluster={d} cluster={d} run_offset.cluster={d} diff={d} pos=({d},{d}) run_offset=({d},{d}) cell_offset.x={d} is_prev_prepend={} cps = {s}", .{
cell_offset.cluster,
cluster,
run_offset.cluster,
@as(isize, @intCast(cluster)) - @as(isize, @intCast(cell_offset.cluster)),
x_offset,
y_offset,
run_offset.x,
run_offset.y,
cell_offset.x,
is_previous_codepoint_prepend,
formatted_cps.?,
});
}
}
};
test "run iterator" {
@@ -737,7 +974,7 @@ test "shape with empty cells in between" {
}
}
test "shape Chinese characters" {
test "shape Combining characters" {
const testing = std.testing;
const alloc = testing.allocator;
@@ -786,6 +1023,443 @@ test "shape Chinese characters" {
try testing.expectEqual(@as(usize, 1), count);
}
// This test exists because the string it uses causes HarfBuzz to output a
// non-monotonic run with our cluster level set to `characters`, which we need
// to handle by tracking the max cluster for the run.
test "shape Devanagari string" {
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that supports devanagari for this to work, if we can't
// find Arial Unicode MS, which is a system font on macOS, we just skip
// the test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Arial Unicode MS",
) catch return error.SkipZigTest;
defer testdata.deinit();
// Make a screen with some data
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Disable grapheme clustering
t.modes.set(.grapheme_cluster, false);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("अपार्टमेंट");
var state: terminal.RenderState = .empty;
defer state.deinit(alloc);
try state.update(alloc, &t);
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.cells = state.row_data.get(0).cells.slice(),
});
const run = try it.next(alloc);
try testing.expect(run != null);
const cells = try shaper.shape(run.?);
try testing.expectEqual(@as(usize, 8), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
try testing.expectEqual(@as(u16, 1), cells[1].x);
try testing.expectEqual(@as(u16, 2), cells[2].x);
try testing.expectEqual(@as(u16, 4), cells[3].x);
try testing.expectEqual(@as(u16, 4), cells[4].x);
try testing.expectEqual(@as(u16, 5), cells[5].x);
try testing.expectEqual(@as(u16, 5), cells[6].x);
try testing.expectEqual(@as(u16, 6), cells[7].x);
try testing.expect(try it.next(alloc) == null);
}
test "shape Tai Tham vowels (position differs from advance)" {
// Note that while this test was necessary for CoreText, the old logic was
// working for HarfBuzz. Still we keep it to ensure it has the correct
// behavior.
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that supports Tai Tham for this to work, if we can't find
// Noto Sans Tai Tham, which is a system font on macOS, we just skip the
// test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Noto Sans Tai Tham",
) catch return error.SkipZigTest;
defer testdata.deinit();
var buf: [32]u8 = undefined;
var buf_idx: usize = 0;
buf_idx += try std.unicode.utf8Encode(0x1a2F, buf[buf_idx..]); // ᨯ
buf_idx += try std.unicode.utf8Encode(0x1a70, buf[buf_idx..]); // ᩰ
// Make a screen with some data
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice(buf[0..buf_idx]);
var state: terminal.RenderState = .empty;
defer state.deinit(alloc);
try state.update(alloc, &t);
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.cells = state.row_data.get(0).cells.slice(),
});
var count: usize = 0;
while (try it.next(alloc)) |run| {
count += 1;
const cells = try shaper.shape(run);
try testing.expectEqual(@as(usize, 2), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
try testing.expectEqual(@as(u16, 0), cells[1].x);
// The first glyph renders in the next cell. We expect the x_offset
// to equal the cell width. However, with FreeType the cell_width is
// computed from ASCII glyphs, and Noto Sans Tai Tham only has the
// space character in ASCII (with a 3px advance), so the cell_width
// metric doesn't match the actual Tai Tham glyph positioning.
const expected_x_offset: i16 = if (comptime font.options.backend.hasFreetype()) 7 else @intCast(run.grid.metrics.cell_width);
try testing.expectEqual(expected_x_offset, cells[0].x_offset);
try testing.expectEqual(@as(i16, 0), cells[1].x_offset);
}
try testing.expectEqual(@as(usize, 1), count);
}
test "shape Tibetan characters" {
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that has multiple glyphs for this codepoint to reproduce
// the old broken behavior, and Noto Serif Tibetan is one of them. It's not
// a default Mac font, and if we can't find it we just skip the test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Noto Serif Tibetan",
) catch return error.SkipZigTest;
defer testdata.deinit();
var buf: [32]u8 = undefined;
var buf_idx: usize = 0;
buf_idx += try std.unicode.utf8Encode(0x0f00, buf[buf_idx..]); // ༀ
// Make a screen with some data
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice(buf[0..buf_idx]);
var state: terminal.RenderState = .empty;
defer state.deinit(alloc);
try state.update(alloc, &t);
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.cells = state.row_data.get(0).cells.slice(),
});
var count: usize = 0;
while (try it.next(alloc)) |run| {
count += 1;
const cells = try shaper.shape(run);
try testing.expectEqual(@as(usize, 2), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
try testing.expectEqual(@as(u16, 0), cells[1].x);
// The second glyph renders at the correct location
try testing.expect(cells[1].x_offset < 2);
}
try testing.expectEqual(@as(usize, 1), count);
}
test "shape Tai Tham letters (run_offset.y differs from zero)" {
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that supports Tai Tham for this to work, if we can't find
// Noto Sans Tai Tham, which is a system font on macOS, we just skip the
// test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Noto Sans Tai Tham",
) catch return error.SkipZigTest;
defer testdata.deinit();
var buf: [32]u8 = undefined;
var buf_idx: usize = 0;
// First grapheme cluster:
buf_idx += try std.unicode.utf8Encode(0x1a49, buf[buf_idx..]); // HA
buf_idx += try std.unicode.utf8Encode(0x1a60, buf[buf_idx..]); // SAKOT
// Second grapheme cluster, combining with the first in a ligature:
buf_idx += try std.unicode.utf8Encode(0x1a3f, buf[buf_idx..]); // YA
buf_idx += try std.unicode.utf8Encode(0x1a69, buf[buf_idx..]); // U
// Make a screen with some data
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice(buf[0..buf_idx]);
var state: terminal.RenderState = .empty;
defer state.deinit(alloc);
try state.update(alloc, &t);
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.cells = state.row_data.get(0).cells.slice(),
});
var count: usize = 0;
while (try it.next(alloc)) |run| {
count += 1;
const cells = try shaper.shape(run);
try testing.expectEqual(@as(usize, 3), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
try testing.expectEqual(@as(u16, 0), cells[1].x);
try testing.expectEqual(@as(u16, 0), cells[2].x); // U from second grapheme
// The U glyph renders at a y below zero
try testing.expectEqual(@as(i16, -3), cells[2].y_offset);
}
try testing.expectEqual(@as(usize, 1), count);
}
test "shape Javanese ligatures" {
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that supports Javanese for this to work, if we can't find
// Noto Sans Javanese Regular, which is a system font on macOS, we just
// skip the test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Noto Sans Javanese",
) catch return error.SkipZigTest;
defer testdata.deinit();
var buf: [32]u8 = undefined;
var buf_idx: usize = 0;
// First grapheme cluster:
buf_idx += try std.unicode.utf8Encode(0xa9a4, buf[buf_idx..]); // NA
buf_idx += try std.unicode.utf8Encode(0xa9c0, buf[buf_idx..]); // PANGKON
// Second grapheme cluster, combining with the first in a ligature:
buf_idx += try std.unicode.utf8Encode(0xa9b2, buf[buf_idx..]); // HA
buf_idx += try std.unicode.utf8Encode(0xa9b8, buf[buf_idx..]); // Vowel sign SUKU
// Make a screen with some data
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice(buf[0..buf_idx]);
var state: terminal.RenderState = .empty;
defer state.deinit(alloc);
try state.update(alloc, &t);
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.cells = state.row_data.get(0).cells.slice(),
});
var count: usize = 0;
while (try it.next(alloc)) |run| {
count += 1;
const cells = try shaper.shape(run);
const cell_width = run.grid.metrics.cell_width;
try testing.expectEqual(@as(usize, 3), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
try testing.expectEqual(@as(u16, 0), cells[1].x);
try testing.expectEqual(@as(u16, 0), cells[2].x);
// The vowel sign SUKU renders with correct x_offset
try testing.expect(cells[2].x_offset > 3 * cell_width);
}
try testing.expectEqual(@as(usize, 1), count);
}
test "shape Chakma vowel sign with ligature (vowel sign renders first)" {
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that supports Chakma for this to work, if we can't find
// Noto Sans Chakma Regular, which is a system font on macOS, we just skip
// the test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Noto Sans Chakma",
) catch return error.SkipZigTest;
defer testdata.deinit();
var buf: [32]u8 = undefined;
var buf_idx: usize = 0;
// First grapheme cluster:
buf_idx += try std.unicode.utf8Encode(0x1111d, buf[buf_idx..]); // BAA
// Second grapheme cluster:
buf_idx += try std.unicode.utf8Encode(0x11116, buf[buf_idx..]); // TAA
buf_idx += try std.unicode.utf8Encode(0x11133, buf[buf_idx..]); // Virama
// Third grapheme cluster, combining with the second in a ligature:
buf_idx += try std.unicode.utf8Encode(0x11120, buf[buf_idx..]); // YYAA
buf_idx += try std.unicode.utf8Encode(0x1112c, buf[buf_idx..]); // Vowel Sign U
// Make a screen with some data
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice(buf[0..buf_idx]);
var state: terminal.RenderState = .empty;
defer state.deinit(alloc);
try state.update(alloc, &t);
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.cells = state.row_data.get(0).cells.slice(),
});
var count: usize = 0;
while (try it.next(alloc)) |run| {
count += 1;
const cells = try shaper.shape(run);
try testing.expectEqual(@as(usize, 4), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
// See the giant "We need to reset the `cell_offset`" comment, but here
// we should technically have the rest of these be `x` of 1, but that
// would require going back in the stream to adjust past cells, and
// we don't take on that complexity.
try testing.expectEqual(@as(u16, 0), cells[1].x);
try testing.expectEqual(@as(u16, 0), cells[2].x);
try testing.expectEqual(@as(u16, 0), cells[3].x);
// The vowel sign U renders before the TAA:
try testing.expect(cells[1].x_offset < cells[2].x_offset);
}
try testing.expectEqual(@as(usize, 1), count);
}
test "shape Bengali ligatures with out of order vowels" {
// Whereas this test in CoreText had everything shaping into one giant
// ligature, HarfBuzz splits it into a few clusters. It still looks okay
// (see #10332).
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that supports Bengali for this to work, if we can't find
// Arial Unicode MS, which is a system font on macOS, we just skip the
// test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Arial Unicode MS",
) catch return error.SkipZigTest;
defer testdata.deinit();
var buf: [32]u8 = undefined;
var buf_idx: usize = 0;
// First grapheme cluster:
buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA
buf_idx += try std.unicode.utf8Encode(0x09be, buf[buf_idx..]); // Vowel sign AA
// Second grapheme cluster:
buf_idx += try std.unicode.utf8Encode(0x09b7, buf[buf_idx..]); // SSA
buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama
// Third grapheme cluster:
buf_idx += try std.unicode.utf8Encode(0x099f, buf[buf_idx..]); // TTA
buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama
// Fourth grapheme cluster:
buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA
buf_idx += try std.unicode.utf8Encode(0x09c7, buf[buf_idx..]); // Vowel sign E
// Make a screen with some data
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice(buf[0..buf_idx]);
var state: terminal.RenderState = .empty;
defer state.deinit(alloc);
try state.update(alloc, &t);
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.cells = state.row_data.get(0).cells.slice(),
});
var count: usize = 0;
while (try it.next(alloc)) |run| {
count += 1;
const cells = try shaper.shape(run);
try testing.expectEqual(@as(usize, 8), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
try testing.expectEqual(@as(u16, 0), cells[1].x);
// Whereas CoreText puts everything all into the first cell (see the
// corresponding test), HarfBuzz splits into two clusters.
try testing.expectEqual(@as(u16, 1), cells[2].x);
try testing.expectEqual(@as(u16, 1), cells[3].x);
try testing.expectEqual(@as(u16, 1), cells[4].x);
try testing.expectEqual(@as(u16, 1), cells[5].x);
try testing.expectEqual(@as(u16, 1), cells[6].x);
try testing.expectEqual(@as(u16, 1), cells[7].x);
// The vowel sign E renders before the SSA:
try testing.expect(cells[2].x_offset < cells[3].x_offset);
}
try testing.expectEqual(@as(usize, 1), count);
}
test "shape box glyphs" {
const testing = std.testing;
const alloc = testing.allocator;
@@ -1432,3 +2106,58 @@ fn testShaperWithFont(alloc: Allocator, font_req: TestFont) !TestShaper {
.lib = lib,
};
}
fn testShaperWithDiscoveredFont(alloc: Allocator, font_req: [:0]const u8) !TestShaper {
if (font.Discover == void) return error.SkipZigTest;
var lib = try Library.init(alloc);
errdefer lib.deinit();
var c = Collection.init();
c.load_options = .{ .library = lib };
// Discover and add our font to the collection.
{
var disco = font.Discover.init();
defer disco.deinit();
var disco_it = try disco.discover(alloc, .{
.family = font_req,
.size = 12,
.monospace = false,
});
defer disco_it.deinit();
var face: font.DeferredFace = (try disco_it.next()) orelse return error.FontNotFound;
errdefer face.deinit();
// Check which font was discovered - skip if it doesn't match the request
var name_buf: [256]u8 = undefined;
const face_name = face.name(&name_buf) catch "(unknown)";
if (std.mem.indexOf(u8, face_name, font_req) == null) {
return error.SkipZigTest;
}
_ = try c.add(
alloc,
try face.load(lib, .{ .size = .{ .points = 12 } }),
.{
.style = .regular,
.fallback = false,
.size_adjustment = .none,
},
);
}
const grid_ptr = try alloc.create(SharedGrid);
errdefer alloc.destroy(grid_ptr);
grid_ptr.* = try .init(alloc, .{ .collection = c });
errdefer grid_ptr.*.deinit(alloc);
var shaper = try Shaper.init(alloc, .{});
errdefer shaper.deinit();
return TestShaper{
.alloc = alloc,
.shaper = shaper,
.grid = grid_ptr,
.lib = lib,
};
}