set cluster level to match CoreText logic

2026-04-19 14:00:29 +00:00 · 2026-01-16 09:25:50 -05:00
parent e48cf5a6e3
commit 80bf50be1d
2 changed files with 487 additions and 323 deletions
--- a/pkg/harfbuzz/buffer.zig
+++ b/pkg/harfbuzz/buffer.zig
@@ -238,6 +238,12 @@ pub const Buffer = struct {
    pub fn guessSegmentProperties(self: Buffer) void {
        c.hb_buffer_guess_segment_properties(self.handle);
    }
+
+    /// Sets the cluster level of a buffer. The `ClusterLevel` dictates one
+    /// aspect of how HarfBuzz will treat non-base characters during shaping.
+    pub fn setClusterLevel(self: Buffer, level: ClusterLevel) void {
+        c.hb_buffer_set_cluster_level(self.handle, @intFromEnum(level));
+    }
 };

 /// The type of hb_buffer_t contents.
@@ -252,6 +258,40 @@ pub const ContentType = enum(u2) {
    glyphs = c.HB_BUFFER_CONTENT_TYPE_GLYPHS,
 };

+/// Data type for holding HarfBuzz's clustering behavior options. The cluster
+/// level dictates one aspect of how HarfBuzz will treat non-base characters
+/// during shaping.
+pub const ClusterLevel = enum(u2) {
+    /// In `monotone_graphemes`, non-base characters are merged into the
+    /// cluster of the base character that precedes them. There is also cluster
+    /// merging every time the clusters will otherwise become non-monotone.
+    /// This is the default cluster level.
+    monotone_graphemes = c.HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES,
+
+    /// In `monotone_characters`, non-base characters are initially assigned
+    /// their own cluster values, which are not merged into preceding base
+    /// clusters. This allows HarfBuzz to perform additional operations like
+    /// reorder sequences of adjacent marks. The output is still monotone, but
+    /// the cluster values are more granular.
+    monotone_characters = c.HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS,
+
+    /// In `characters`, non-base characters are assigned their own cluster
+    /// values, which are not merged into preceding base clusters. Moreover,
+    /// the cluster values are not merged into monotone order. This is the most
+    /// granular cluster level, and it is useful for clients that need to know
+    /// the exact cluster values of each character, but is harder to use for
+    /// clients, since clusters might appear in any order.
+    characters = c.HB_BUFFER_CLUSTER_LEVEL_CHARACTERS,
+
+    /// In `graphemes`, non-base characters are merged into the cluster of the
+    /// base character that precedes them. This is similar to the Unicode
+    /// Grapheme Cluster algorithm, but it is not exactly the same. The output
+    /// is not forced to be monotone. This is useful for clients that want to
+    /// use HarfBuzz as a cheap implementation of the Unicode Grapheme Cluster
+    /// algorithm.
+    graphemes = c.HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES,
+};
+
 /// The hb_glyph_info_t is the structure that holds information about the
 /// glyphs and their relation to input text.
 pub const GlyphInfo = extern struct {
--- a/src/font/shaper/harfbuzz.zig
+++ b/src/font/shaper/harfbuzz.zig
@@ -266,6 +266,12 @@ pub const Shaper = struct {
            // Reset the buffer for our current run
            self.shaper.hb_buf.reset();
            self.shaper.hb_buf.setContentType(.unicode);
+
+            // We set the cluster level to `characters` to give us the most
+            // granularity, matching the CoreText shaper, and allowing us
+            // to use our same ligature detection heuristics.
+            self.shaper.hb_buf.setClusterLevel(.characters);
+
            self.shaper.codepoints.clearRetainingCapacity();

            // We don't support RTL text because RTL in terminals is messy.
@@ -325,12 +331,13 @@ pub const Shaper = struct {
            break :blk false;
        } else false;

-        const formatted_cps = if (positions_differ or
+        const formatted_cps: ?[]u8 = if (positions_differ or
            y_offset_differs or
            cluster_differs or
            extra_debugging)
        blk: {
            var allocating = std.Io.Writer.Allocating.init(self.alloc);
+            defer allocating.deinit();
            const writer = &allocating.writer;
            const codepoints = self.codepoints.items;
            var last_cluster: ?u32 = null;
@@ -364,7 +371,8 @@ pub const Shaper = struct {
                }
            }
            break :blk try allocating.toOwnedSlice();
-        } else "";
+        } else null;
+        defer if (formatted_cps) |cps| self.alloc.free(cps);

        if (extra_debugging) {
            log.warn("extra debugging of positions index={d} cell_offset.cluster={d} cluster={d} run_offset.cluster={d} diff={d} pos=({d},{d}) run_offset=({d},{d}) cell_offset.x={d} is_prev_prepend={} cps = {s}", .{
@@ -379,7 +387,7 @@ pub const Shaper = struct {
                run_offset.y,
                cell_offset.x,
                is_previous_codepoint_prepend,
-                formatted_cps,
+                formatted_cps.?,
            });
        }

@@ -392,19 +400,19 @@ pub const Shaper = struct {
                advance_y_offset,
                x_offset_diff,
                y_offset_diff,
-                formatted_cps,
+                formatted_cps.?,
            });
        }

        if (y_offset_differs) {
-            log.warn("y_offset differs from zero: cluster={d} pos=({d},{d}) run_offset=({d},{d}) cell_offset.x={d} cps = {s}", .{
+            log.warn("run_offset.y differs from zero: cluster={d} pos=({d},{d}) run_offset=({d},{d}) cell_offset.x={d} cps = {s}", .{
                cluster,
                x_offset,
                y_offset,
                run_offset.x,
                run_offset.y,
                cell_offset.x,
-                formatted_cps,
+                formatted_cps.?,
            });
        }

@@ -420,7 +428,7 @@ pub const Shaper = struct {
                run_offset.y,
                cell_offset.x,
                is_previous_codepoint_prepend,
-                formatted_cps,
+                formatted_cps.?,
            });
        }
    }
@@ -966,7 +974,7 @@ test "shape with empty cells in between" {
    }
 }

-test "shape Chinese characters" {
+test "shape Combining characters" {
    const testing = std.testing;
    const alloc = testing.allocator;

@@ -1015,6 +1023,437 @@ test "shape Chinese characters" {
    try testing.expectEqual(@as(usize, 1), count);
 }

+// This test exists because the string it uses causes HarfBuzz to output a
+// non-monotonic run with our cluster level set to `characters`, which we need
+// to handle by tracking the max cluster for the run.
+test "shape Devanagari string" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports devanagari for this to work, if we can't
+    // find Arial Unicode MS, which is a system font on macOS, we just skip
+    // the test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Arial Unicode MS",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Disable grapheme clustering
+    t.modes.set(.grapheme_cluster, false);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice("अपार्टमेंट");
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+
+    const run = try it.next(alloc);
+    try testing.expect(run != null);
+    const cells = try shaper.shape(run.?);
+
+    try testing.expectEqual(@as(usize, 8), cells.len);
+    try testing.expectEqual(@as(u16, 0), cells[0].x);
+    try testing.expectEqual(@as(u16, 1), cells[1].x);
+    try testing.expectEqual(@as(u16, 2), cells[2].x);
+    try testing.expectEqual(@as(u16, 3), cells[3].x);
+    try testing.expectEqual(@as(u16, 4), cells[4].x);
+    try testing.expectEqual(@as(u16, 5), cells[5].x);
+    try testing.expectEqual(@as(u16, 5), cells[6].x);
+    try testing.expectEqual(@as(u16, 6), cells[7].x);
+
+    try testing.expect(try it.next(alloc) == null);
+}
+
+test "shape Tai Tham vowels (position differs from advance)" {
+    // Note that while this test was necessary for CoreText, the old logic was
+    // working for HarfBuzz. Still we keep it to ensure it has the correct
+    // behavior.
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Tai Tham for this to work, if we can't find
+    // Noto Sans Tai Tham, which is a system font on macOS, we just skip the
+    // test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Noto Sans Tai Tham",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+    buf_idx += try std.unicode.utf8Encode(0x1a2F, buf[buf_idx..]); // ᨯ
+    buf_idx += try std.unicode.utf8Encode(0x1a70, buf[buf_idx..]); //  ᩰ
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        const cell_width = run.grid.metrics.cell_width;
+        try testing.expectEqual(@as(usize, 2), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+
+        // The first glyph renders in the next cell
+        try testing.expectEqual(@as(i16, @intCast(cell_width)), cells[0].x_offset);
+        try testing.expectEqual(@as(i16, 0), cells[1].x_offset);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
+test "shape Tibetan characters" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that has multiple glyphs for this codepoint to reproduce
+    // the old broken behavior, and Noto Serif Tibetan is one of them. It's not
+    // a default Mac font, and if we can't find it we just skip the test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Noto Serif Tibetan",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+    buf_idx += try std.unicode.utf8Encode(0x0f00, buf[buf_idx..]); // ༀ
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        try testing.expectEqual(@as(usize, 2), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+
+        // The second glyph renders at the correct location
+        try testing.expect(cells[1].x_offset < 2);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
+test "shape Tai Tham letters (run_offset.y differs from zero)" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Tai Tham for this to work, if we can't find
+    // Noto Sans Tai Tham, which is a system font on macOS, we just skip the
+    // test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Noto Sans Tai Tham",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+
+    // First grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x1a48, buf[buf_idx..]); // MA
+    buf_idx += try std.unicode.utf8Encode(0x1a60, buf[buf_idx..]); // SAKOT
+    buf_idx += try std.unicode.utf8Encode(0x1a3f, buf[buf_idx..]); // LOW LA
+    buf_idx += try std.unicode.utf8Encode(0x1a75, buf[buf_idx..]); // Tone-1
+    // Second grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x1a41, buf[buf_idx..]); // HIGH PA
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        try testing.expectEqual(@as(usize, 3), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+        try testing.expectEqual(@as(u16, 0), cells[2].x); // U from second grapheme
+
+        // The U glyph renders at a y below zero
+        try testing.expectEqual(@as(i16, -3), cells[2].y_offset);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
+test "shape Javanese ligatures" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Javanese for this to work, if we can't find
+    // Noto Sans Javanese Regular, which is a system font on macOS, we just
+    // skip the test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Noto Sans Javanese",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+
+    // First grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0xa9a4, buf[buf_idx..]); // NA
+    buf_idx += try std.unicode.utf8Encode(0xa9c0, buf[buf_idx..]); // PANGKON
+    // Second grapheme cluster, combining with the first in a ligature:
+    buf_idx += try std.unicode.utf8Encode(0xa9b2, buf[buf_idx..]); // HA
+    buf_idx += try std.unicode.utf8Encode(0xa9b8, buf[buf_idx..]); // Vowel sign SUKU
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        const cell_width = run.grid.metrics.cell_width;
+        try testing.expectEqual(@as(usize, 3), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+        try testing.expectEqual(@as(u16, 0), cells[2].x);
+
+        // The vowel sign SUKU renders with correct x_offset
+        try testing.expect(cells[2].x_offset > 3 * cell_width);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
+test "shape Chakma vowel sign with ligature (vowel sign renders first)" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Chakma for this to work, if we can't find
+    // Noto Sans Chakma Regular, which is a system font on macOS, we just skip
+    // the test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Noto Sans Chakma",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+
+    // First grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x1111d, buf[buf_idx..]); // BAA
+    // Second grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x11116, buf[buf_idx..]); // TAA
+    buf_idx += try std.unicode.utf8Encode(0x11133, buf[buf_idx..]); // Virama
+    // Third grapheme cluster, combining with the second in a ligature:
+    buf_idx += try std.unicode.utf8Encode(0x11120, buf[buf_idx..]); // YYAA
+    buf_idx += try std.unicode.utf8Encode(0x1112c, buf[buf_idx..]); // Vowel Sign U
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        try testing.expectEqual(@as(usize, 4), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        // See the giant "We need to reset the `cell_offset`" comment, but here
+        // we should technically have the rest of these be `x` of 1, but that
+        // would require going back in the stream to adjust past cells, and
+        // we don't take on that complexity.
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+        try testing.expectEqual(@as(u16, 0), cells[2].x);
+        try testing.expectEqual(@as(u16, 0), cells[3].x);
+
+        // The vowel sign U renders before the TAA:
+        try testing.expect(cells[1].x_offset < cells[2].x_offset);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
+test "shape Bengali ligatures with out of order vowels" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Bengali for this to work, if we can't find
+    // Arial Unicode MS, which is a system font on macOS, we just skip the
+    // test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Arial Unicode MS",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+
+    // First grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA
+    buf_idx += try std.unicode.utf8Encode(0x09be, buf[buf_idx..]); // Vowel sign AA
+    // Second grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x09b7, buf[buf_idx..]); // SSA
+    buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama
+    // Third grapheme cluster, combining with the second in a ligature:
+    buf_idx += try std.unicode.utf8Encode(0x099f, buf[buf_idx..]); // TTA
+    buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama
+    // Fourth grapheme cluster, combining with the previous two in a ligature:
+    buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA
+    buf_idx += try std.unicode.utf8Encode(0x09c7, buf[buf_idx..]); // Vowel sign E
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        try testing.expectEqual(@as(usize, 8), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+        // See the giant "We need to reset the `cell_offset`" comment, but here
+        // we should technically have the rest of these be `x` of 1, but that
+        // would require going back in the stream to adjust past cells, and
+        // we don't take on that complexity.
+        try testing.expectEqual(@as(u16, 0), cells[2].x);
+        try testing.expectEqual(@as(u16, 0), cells[3].x);
+        try testing.expectEqual(@as(u16, 0), cells[4].x);
+        try testing.expectEqual(@as(u16, 0), cells[5].x);
+        try testing.expectEqual(@as(u16, 0), cells[6].x);
+        try testing.expectEqual(@as(u16, 0), cells[7].x);
+
+        // The vowel sign E renders before the SSA:
+        try testing.expect(cells[2].x_offset < cells[3].x_offset);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
 test "shape box glyphs" {
    const testing = std.testing;
    const alloc = testing.allocator;
@@ -1707,318 +2146,3 @@ fn testShaperWithDiscoveredFont(alloc: Allocator, font_req: [:0]const u8) !TestS
        .lib = lib,
    };
 }
-
-test "shape Tai Tham vowels (y_offset differs from zero)" {
-    const testing = std.testing;
-    const alloc = testing.allocator;
-
-    // We need a font that supports Tai Tham for this to work, if we can't find
-    // Noto Sans Tai Tham, we just skip the test.
-    var testdata = testShaperWithDiscoveredFont(
-        alloc,
-        "Noto Sans Tai Tham",
-    ) catch return error.SkipZigTest;
-    defer testdata.deinit();
-
-    var buf: [32]u8 = undefined;
-    var buf_idx: usize = 0;
-    buf_idx += try std.unicode.utf8Encode(0x1a2F, buf[buf_idx..]); // ᨯ
-    buf_idx += try std.unicode.utf8Encode(0x1a70, buf[buf_idx..]); //  ᩰ
-
-    // Make a screen with some data
-    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
-    defer t.deinit(alloc);
-
-    // Enable grapheme clustering
-    t.modes.set(.grapheme_cluster, true);
-
-    var s = t.vtStream();
-    defer s.deinit();
-    try s.nextSlice(buf[0..buf_idx]);
-
-    var state: terminal.RenderState = .empty;
-    defer state.deinit(alloc);
-    try state.update(alloc, &t);
-
-    // Get our run iterator
-    var shaper = &testdata.shaper;
-    var it = shaper.runIterator(.{
-        .grid = testdata.grid,
-        .cells = state.row_data.get(0).cells.slice(),
-    });
-    var count: usize = 0;
-    while (try it.next(alloc)) |run| {
-        count += 1;
-
-        const cells = try shaper.shape(run);
-        const cell_width = run.grid.metrics.cell_width;
-        try testing.expectEqual(@as(usize, 2), cells.len);
-        try testing.expectEqual(@as(u16, 0), cells[0].x);
-        try testing.expectEqual(@as(u16, 0), cells[1].x);
-
-        // The first glyph renders in the next cell
-        try testing.expectEqual(@as(i16, @intCast(cell_width)), cells[0].x_offset);
-        try testing.expectEqual(@as(i16, 0), cells[1].x_offset);
-    }
-    try testing.expectEqual(@as(usize, 1), count);
-}
-
-test "shape Tai Tham letters (y_offset differs from zero)" {
-    const testing = std.testing;
-    const alloc = testing.allocator;
-
-    // We need a font that supports Tai Tham for this to work, if we can't find
-    // Noto Sans Tai Tham, we just skip the test.
-    var testdata = testShaperWithDiscoveredFont(
-        alloc,
-        "Noto Sans Tai Tham",
-    ) catch return error.SkipZigTest;
-    defer testdata.deinit();
-
-    var buf: [32]u8 = undefined;
-    var buf_idx: usize = 0;
-
-    // First grapheme cluster:
-    buf_idx += try std.unicode.utf8Encode(0x1a48, buf[buf_idx..]); // MA
-    buf_idx += try std.unicode.utf8Encode(0x1a60, buf[buf_idx..]); // SAKOT
-    buf_idx += try std.unicode.utf8Encode(0x1a3f, buf[buf_idx..]); // LOW LA
-    buf_idx += try std.unicode.utf8Encode(0x1a75, buf[buf_idx..]); // Tone-1
-    // Second grapheme cluster:
-    buf_idx += try std.unicode.utf8Encode(0x1a41, buf[buf_idx..]); // HIGH PA
-
-    // Make a screen with some data
-    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
-    defer t.deinit(alloc);
-
-    // Enable grapheme clustering
-    t.modes.set(.grapheme_cluster, true);
-
-    var s = t.vtStream();
-    defer s.deinit();
-    try s.nextSlice(buf[0..buf_idx]);
-
-    var state: terminal.RenderState = .empty;
-    defer state.deinit(alloc);
-    try state.update(alloc, &t);
-
-    // Get our run iterator
-    var shaper = &testdata.shaper;
-    var it = shaper.runIterator(.{
-        .grid = testdata.grid,
-        .cells = state.row_data.get(0).cells.slice(),
-    });
-    var count: usize = 0;
-    while (try it.next(alloc)) |run| {
-        count += 1;
-
-        const cells = try shaper.shape(run);
-        try testing.expectEqual(@as(usize, 3), cells.len);
-        try testing.expectEqual(@as(u16, 0), cells[0].x);
-        try testing.expectEqual(@as(u16, 0), cells[1].x);
-        try testing.expectEqual(@as(u16, 0), cells[2].x); // U from second grapheme
-
-        // The U glyph renders at a y below zero
-        try testing.expectEqual(@as(i16, -3), cells[2].y_offset);
-    }
-    try testing.expectEqual(@as(usize, 1), count);
-}
-
-test "shape Javanese ligatures" {
-    const testing = std.testing;
-    const alloc = testing.allocator;
-
-    // We need a font that supports Javanese for this to work, if we can't find
-    // Noto Sans Javanese Regular, we just skip the test.
-    var testdata = testShaperWithDiscoveredFont(
-        alloc,
-        "Noto Sans Javanese",
-    ) catch return error.SkipZigTest;
-    defer testdata.deinit();
-
-    var buf: [32]u8 = undefined;
-    var buf_idx: usize = 0;
-
-    // First grapheme cluster:
-    buf_idx += try std.unicode.utf8Encode(0xa9a4, buf[buf_idx..]); // NA
-    buf_idx += try std.unicode.utf8Encode(0xa9c0, buf[buf_idx..]); // PANGKON
-    // Second grapheme cluster, combining with the first in a ligature:
-    buf_idx += try std.unicode.utf8Encode(0xa9b2, buf[buf_idx..]); // HA
-    buf_idx += try std.unicode.utf8Encode(0xa9b8, buf[buf_idx..]); // Vowel sign SUKU
-
-    // Make a screen with some data
-    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
-    defer t.deinit(alloc);
-
-    // Enable grapheme clustering
-    t.modes.set(.grapheme_cluster, true);
-
-    var s = t.vtStream();
-    defer s.deinit();
-    try s.nextSlice(buf[0..buf_idx]);
-
-    var state: terminal.RenderState = .empty;
-    defer state.deinit(alloc);
-    try state.update(alloc, &t);
-
-    // Get our run iterator
-    var shaper = &testdata.shaper;
-    var it = shaper.runIterator(.{
-        .grid = testdata.grid,
-        .cells = state.row_data.get(0).cells.slice(),
-    });
-    var count: usize = 0;
-    while (try it.next(alloc)) |run| {
-        count += 1;
-
-        const cells = try shaper.shape(run);
-        const cell_width = run.grid.metrics.cell_width;
-        try testing.expectEqual(@as(usize, 3), cells.len);
-        try testing.expectEqual(@as(u16, 0), cells[0].x);
-        try testing.expectEqual(@as(u16, 0), cells[1].x);
-        try testing.expectEqual(@as(u16, 0), cells[2].x);
-
-        // The vowel sign SUKU renders with correct x_offset
-        try testing.expect(cells[2].x_offset > 3 * cell_width);
-    }
-    try testing.expectEqual(@as(usize, 1), count);
-}
-
-test "shape Chakma vowel sign with ligature (vowel sign renders first)" {
-    const testing = std.testing;
-    const alloc = testing.allocator;
-
-    // We need a font that supports Chakma for this to work, if we can't find
-    // Noto Sans Chakma Regular, we just skip the test.
-    var testdata = testShaperWithDiscoveredFont(
-        alloc,
-        "Noto Sans Chakma",
-    ) catch return error.SkipZigTest;
-    defer testdata.deinit();
-
-    var buf: [32]u8 = undefined;
-    var buf_idx: usize = 0;
-
-    // First grapheme cluster:
-    buf_idx += try std.unicode.utf8Encode(0x1111d, buf[buf_idx..]); // BAA
-    // Second grapheme cluster:
-    buf_idx += try std.unicode.utf8Encode(0x11116, buf[buf_idx..]); // TAA
-    buf_idx += try std.unicode.utf8Encode(0x11133, buf[buf_idx..]); // Virama
-    // Third grapheme cluster, combining with the second in a ligature:
-    buf_idx += try std.unicode.utf8Encode(0x11120, buf[buf_idx..]); // YYAA
-    buf_idx += try std.unicode.utf8Encode(0x1112c, buf[buf_idx..]); // Vowel Sign U
-
-    // Make a screen with some data
-    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
-    defer t.deinit(alloc);
-
-    // Enable grapheme clustering
-    t.modes.set(.grapheme_cluster, true);
-
-    var s = t.vtStream();
-    defer s.deinit();
-    try s.nextSlice(buf[0..buf_idx]);
-
-    var state: terminal.RenderState = .empty;
-    defer state.deinit(alloc);
-    try state.update(alloc, &t);
-
-    // Get our run iterator
-    var shaper = &testdata.shaper;
-    var it = shaper.runIterator(.{
-        .grid = testdata.grid,
-        .cells = state.row_data.get(0).cells.slice(),
-    });
-    var count: usize = 0;
-    while (try it.next(alloc)) |run| {
-        count += 1;
-
-        const cells = try shaper.shape(run);
-        try testing.expectEqual(@as(usize, 4), cells.len);
-        try testing.expectEqual(@as(u16, 0), cells[0].x);
-        // See the giant "We need to reset the `cell_offset`" comment, but here
-        // we should technically have the rest of these be `x` of 1, but that
-        // would require going back in the stream to adjust past cells, and
-        // we don't take on that complexity.
-        try testing.expectEqual(@as(u16, 0), cells[1].x);
-        try testing.expectEqual(@as(u16, 0), cells[2].x);
-        try testing.expectEqual(@as(u16, 0), cells[3].x);
-
-        // The vowel sign U renders before the TAA:
-        try testing.expect(cells[1].x_offset < cells[2].x_offset);
-    }
-    try testing.expectEqual(@as(usize, 1), count);
-}
-
-test "shape Bengali ligatures with out of order vowels" {
-    const testing = std.testing;
-    const alloc = testing.allocator;
-
-    // We need a font that supports Bengali for this to work, if we can't find
-    // Arial Unicode MS, we just skip the test.
-    var testdata = testShaperWithDiscoveredFont(
-        alloc,
-        "Arial Unicode MS",
-    ) catch return error.SkipZigTest;
-    defer testdata.deinit();
-
-    var buf: [32]u8 = undefined;
-    var buf_idx: usize = 0;
-
-    // First grapheme cluster:
-    buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA
-    buf_idx += try std.unicode.utf8Encode(0x09be, buf[buf_idx..]); // Vowel sign AA
-    // Second grapheme cluster:
-    buf_idx += try std.unicode.utf8Encode(0x09b7, buf[buf_idx..]); // SSA
-    buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama
-    // Third grapheme cluster, combining with the second in a ligature:
-    buf_idx += try std.unicode.utf8Encode(0x099f, buf[buf_idx..]); // TTA
-    buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama
-    // Fourth grapheme cluster, combining with the previous two in a ligature:
-    buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA
-    buf_idx += try std.unicode.utf8Encode(0x09c7, buf[buf_idx..]); // Vowel sign E
-
-    // Make a screen with some data
-    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
-    defer t.deinit(alloc);
-
-    // Enable grapheme clustering
-    t.modes.set(.grapheme_cluster, true);
-
-    var s = t.vtStream();
-    defer s.deinit();
-    try s.nextSlice(buf[0..buf_idx]);
-
-    var state: terminal.RenderState = .empty;
-    defer state.deinit(alloc);
-    try state.update(alloc, &t);
-
-    // Get our run iterator
-    var shaper = &testdata.shaper;
-    var it = shaper.runIterator(.{
-        .grid = testdata.grid,
-        .cells = state.row_data.get(0).cells.slice(),
-    });
-    var count: usize = 0;
-    while (try it.next(alloc)) |run| {
-        count += 1;
-
-        const cells = try shaper.shape(run);
-        try testing.expectEqual(@as(usize, 8), cells.len);
-        try testing.expectEqual(@as(u16, 0), cells[0].x);
-        try testing.expectEqual(@as(u16, 0), cells[1].x);
-        // See the giant "We need to reset the `cell_offset`" comment, but here
-        // we should technically have the rest of these be `x` of 1, but that
-        // would require going back in the stream to adjust past cells, and
-        // we don't take on that complexity.
-        try testing.expectEqual(@as(u16, 0), cells[2].x);
-        try testing.expectEqual(@as(u16, 0), cells[3].x);
-        try testing.expectEqual(@as(u16, 0), cells[4].x);
-        try testing.expectEqual(@as(u16, 0), cells[5].x);
-        try testing.expectEqual(@as(u16, 0), cells[6].x);
-        try testing.expectEqual(@as(u16, 0), cells[7].x);
-
-        // The vowel sign E renders before the SSA:
-        try testing.expect(cells[2].x_offset < cells[3].x_offset);
-    }
-    try testing.expectEqual(@as(usize, 1), count);
-}