Merge remote-tracking branch 'upstream/main' into harfbuzz-positions

2026-07-27 11:06:31 +00:00 · 2026-01-16 09:05:04 -05:00
parent 2757d320c9 26e243a919
commit e48cf5a6e3
8 changed files with 703 additions and 70 deletions
--- a/src/font/shaper/coretext.zig
+++ b/src/font/shaper/coretext.zig
@@ -6,6 +6,7 @@ const macos = @import("macos");
 const font = @import("../main.zig");
 const os = @import("../../os/main.zig");
 const terminal = @import("../../terminal/main.zig");
+const unicode = @import("../../unicode/main.zig");
 const Feature = font.shape.Feature;
 const FeatureList = font.shape.FeatureList;
 const default_features = font.shape.default_features;
@@ -103,7 +104,7 @@ pub const Shaper = struct {
        }
    };

-    const CellOffset = struct {
+    const Offset = struct {
        cluster: u32 = 0,
        x: f64 = 0,
    };
@@ -382,11 +383,12 @@ pub const Shaper = struct {
        const line = typesetter.createLine(.{ .location = 0, .length = 0 });
        self.cf_release_pool.appendAssumeCapacity(line);

-        // This keeps track of the current x offset (sum of advance.width)
-        var run_offset_x: f64 = 0.0;
+        // This keeps track of the current x offset (sum of advance.width) and
+        // the furthest cluster we've seen so far (max).
+        var run_offset: Offset = .{};

        // This keeps track of the cell starting x and cluster.
-        var cell_offset: CellOffset = .{};
+        var cell_offset: Offset = .{};

        // For debugging positions, turn this on:
        //var run_offset_y: f64 = 0.0;
@@ -410,8 +412,8 @@ pub const Shaper = struct {
        // other so we can iterate over them and just append to our
        // cell buffer.
        const runs = line.getGlyphRuns();
-        for (0..runs.getCount()) |i| {
-            const ctrun = runs.getValueAtIndex(macos.text.Run, i);
+        for (0..runs.getCount()) |run_i| {
+            const ctrun = runs.getValueAtIndex(macos.text.Run, run_i);

            const status = ctrun.getStatus();
            if (status.non_monotonic or status.right_to_left) non_ltr = true;
@@ -434,30 +436,75 @@ pub const Shaper = struct {
                // Our cluster is also our cell X position. If the cluster changes
                // then we need to reset our current cell offsets.
                const cluster = state.codepoints.items[index].cluster;
-                if (cell_offset.cluster != cluster) pad: {
-                    // We previously asserted this but for rtl text this is
-                    // not true. So we check for this and break out. In the
-                    // future we probably need to reverse pad for rtl but
-                    // I don't have a solid test case for this yet so let's
-                    // wait for that.
-                    if (cell_offset.cluster > cluster) break :pad;
+                if (cell_offset.cluster != cluster) {
+                    // We previously asserted that the new cluster is greater
+                    // than cell_offset.cluster, but this isn't always true.
+                    // See e.g. the "shape Chakma vowel sign with ligature
+                    // (vowel sign renders first)" test.

-                    cell_offset = .{
-                        .cluster = cluster,
-                        .x = run_offset_x,
+                    const is_after_glyph_from_current_or_next_clusters =
+                        cluster <= run_offset.cluster;
+
+                    const is_first_codepoint_in_cluster = blk: {
+                        var i = index;
+                        while (i > 0) {
+                            i -= 1;
+                            const codepoint = state.codepoints.items[i];
+
+                            // Skip surrogate pair padding
+                            if (codepoint.codepoint == 0) continue;
+                            break :blk codepoint.cluster != cluster;
+                        } else break :blk true;
                    };

-                    // For debugging positions, turn this on:
-                    //cell_offset_y = run_offset_y;
+                    // We need to reset the `cell_offset` at the start of a new
+                    // cluster, but we do that conditionally if the codepoint
+                    // `is_first_codepoint_in_cluster` and the cluster is not
+                    // `is_after_glyph_from_current_or_next_clusters`, which is
+                    // a heuristic to detect ligatures and avoid positioning
+                    // glyphs that mark ligatures incorrectly. The idea is that
+                    // if the first codepoint in a cluster doesn't appear in
+                    // the stream, it's very likely that it combined with
+                    // codepoints from a previous cluster into a ligature.
+                    // Then, the subsequent codepoints are very likely marking
+                    // glyphs that are placed relative to that ligature, so if
+                    // we were to reset the `cell_offset` to align it with the
+                    // grid, the positions would be off. The
+                    // `!is_after_glyph_from_current_or_next_clusters` check is
+                    // needed in case these marking glyphs come from a later
+                    // cluster but are rendered first (see the Chakma and
+                    // Bengali tests). In that case when we get to the
+                    // codepoint that `is_first_codepoint_in_cluster`, but in a
+                    // cluster that
+                    // `is_after_glyph_from_current_or_next_clusters`, we don't
+                    // want to reset to the grid and cause the positions to be
+                    // off. (Note that we could go back and align the cells to
+                    // the grid starting from the one from the cluster that
+                    // rendered out of order, but that is more complicated so
+                    // we don't do that for now. Also, it's TBD if there are
+                    // exceptions to this heuristic for detecting ligatures,
+                    // but using the logging below seems to show it works
+                    // well.)
+                    if (is_first_codepoint_in_cluster and
+                        !is_after_glyph_from_current_or_next_clusters)
+                    {
+                        cell_offset = .{
+                            .cluster = cluster,
+                            .x = run_offset.x,
+                        };
+
+                        // For debugging positions, turn this on:
+                        //cell_offset_y = run_offset_y;
+                    }
                }

                // For debugging positions, turn this on:
-                //try self.debugPositions(alloc, run_offset_x, run_offset_y, cell_offset, cell_offset_y, position, index);
+                //try self.debugPositions(alloc, run_offset, run_offset_y, cell_offset, cell_offset_y, position, index);

                const x_offset = position.x - cell_offset.x;

                self.cell_buf.appendAssumeCapacity(.{
-                    .x = @intCast(cluster),
+                    .x = @intCast(cell_offset.cluster),
                    .x_offset = @intFromFloat(@round(x_offset)),
                    .y_offset = @intFromFloat(@round(position.y)),
                    .glyph_index = glyph,
@@ -465,7 +512,8 @@ pub const Shaper = struct {

                // Add our advances to keep track of our run offsets.
                // Advances apply to the NEXT cell.
-                run_offset_x += advance.width;
+                run_offset.x += advance.width;
+                run_offset.cluster = @max(run_offset.cluster, cluster);

                // For debugging positions, turn this on:
                //run_offset_y += advance.height;
@@ -641,31 +689,56 @@ pub const Shaper = struct {
    fn debugPositions(
        self: *Shaper,
        alloc: Allocator,
-        run_offset_x: f64,
+        run_offset: Offset,
        run_offset_y: f64,
-        cell_offset: CellOffset,
+        cell_offset: Offset,
        cell_offset_y: f64,
        position: macos.graphics.Point,
        index: usize,
    ) !void {
        const state = &self.run_state;
        const x_offset = position.x - cell_offset.x;
-        const advance_x_offset = run_offset_x - cell_offset.x;
+        const advance_x_offset = run_offset.x - cell_offset.x;
        const advance_y_offset = run_offset_y - cell_offset_y;
        const x_offset_diff = x_offset - advance_x_offset;
        const y_offset_diff = position.y - advance_y_offset;
        const positions_differ = @abs(x_offset_diff) > 0.0001 or @abs(y_offset_diff) > 0.0001;
        const old_offset_y = position.y - cell_offset_y;
        const position_y_differs = @abs(cell_offset_y) > 0.0001;
+        const cluster = state.codepoints.items[index].cluster;
+        const cluster_differs = cluster != cell_offset.cluster;

-        if (positions_differ or position_y_differs) {
+        // To debug every loop, flip this to true:
+        const extra_debugging = false;
+
+        const is_previous_codepoint_prepend = if (cluster_differs or
+            extra_debugging)
+        blk: {
+            var i = index;
+            while (i > 0) {
+                i -= 1;
+                const codepoint = state.codepoints.items[i];
+
+                // Skip surrogate pair padding
+                if (codepoint.codepoint == 0) continue;
+
+                break :blk unicode.table.get(@intCast(codepoint.codepoint)).grapheme_boundary_class == .prepend;
+            }
+            break :blk false;
+        } else false;
+
+        const formatted_cps = if (positions_differ or
+            position_y_differs or
+            cluster_differs or
+            extra_debugging)
+        blk: {
            var allocating = std.Io.Writer.Allocating.init(alloc);
            const writer = &allocating.writer;
            const codepoints = state.codepoints.items;
-            const current_cp = state.codepoints.items[index].codepoint;
            var last_cluster: ?u32 = null;
-            for (codepoints) |cp| {
-                if ((cp.cluster == cell_offset.cluster or cp.cluster == cell_offset.cluster - 1 or cp.cluster == cell_offset.cluster + 1) and
+            for (codepoints, 0..) |cp, i| {
+                if ((@as(i32, @intCast(cp.cluster)) >= @as(i32, @intCast(cell_offset.cluster)) - 1 and
+                    cp.cluster <= cluster + 1) and
                    cp.codepoint != 0 // Skip surrogate pair padding
                ) {
                    if (last_cluster) |last| {
@@ -673,49 +746,90 @@ pub const Shaper = struct {
                            try writer.writeAll(" ");
                        }
                    }
-                    if (cp.cluster == cell_offset.cluster and cp.codepoint == current_cp) {
+                    if (i == index) {
                        try writer.writeAll("▸");
                    }
-                    try writer.print("\\u{{{x}}}", .{cp.codepoint});
+                    // Using Python syntax for easier debugging
+                    if (cp.codepoint > 0xFFFF) {
+                        try writer.print("\\U{x:0>8}", .{cp.codepoint});
+                    } else {
+                        try writer.print("\\u{x:0>4}", .{cp.codepoint});
+                    }
                    last_cluster = cp.cluster;
                }
            }
            try writer.writeAll(" → ");
            for (codepoints) |cp| {
-                if ((cp.cluster == cell_offset.cluster or cp.cluster == cell_offset.cluster - 1 or cp.cluster == cell_offset.cluster + 1) and
+                if ((@as(i32, @intCast(cp.cluster)) >= @as(i32, @intCast(cell_offset.cluster)) - 1 and
+                    cp.cluster <= cluster + 1) and
                    cp.codepoint != 0 // Skip surrogate pair padding
                ) {
                    try writer.print("{u}", .{@as(u21, @intCast(cp.codepoint))});
                }
            }
-            const formatted_cps = try allocating.toOwnedSlice();
+            break :blk try allocating.toOwnedSlice();
+        } else "";

-            if (positions_differ) {
-                log.warn("position differs from advance: cluster={d} pos=({d:.2},{d:.2}) adv=({d:.2},{d:.2}) diff=({d:.2},{d:.2}) cps = {s}", .{
-                    cell_offset.cluster,
-                    x_offset,
-                    position.y,
-                    advance_x_offset,
-                    advance_y_offset,
-                    x_offset_diff,
-                    y_offset_diff,
-                    formatted_cps,
-                });
-            }
+        if (extra_debugging) {
+            log.warn("extra debugging of positions index={d} cell_offset.cluster={d} cluster={d} run_offset.cluster={d} diff={d} pos=({d:.2},{d:.2}) run_offset=({d:.2},{d:.2}) cell_offset=({d:.2},{d:.2}) is_prev_prepend={} cps = {s}", .{
+                index,
+                cell_offset.cluster,
+                cluster,
+                run_offset.cluster,
+                @as(isize, @intCast(cluster)) - @as(isize, @intCast(cell_offset.cluster)),
+                x_offset,
+                position.y,
+                run_offset.x,
+                run_offset_y,
+                cell_offset.x,
+                cell_offset_y,
+                is_previous_codepoint_prepend,
+                formatted_cps,
+            });
+        }

-            if (position_y_differs) {
-                log.warn("position.y differs from old offset.y: cluster={d} pos=({d:.2},{d:.2}) run_offset=({d:.2},{d:.2}) cell_offset=({d:.2},{d:.2}) old offset.y={d:.2} cps = {s}", .{
-                    cell_offset.cluster,
-                    x_offset,
-                    position.y,
-                    run_offset_x,
-                    run_offset_y,
-                    cell_offset.x,
-                    cell_offset_y,
-                    old_offset_y,
-                    formatted_cps,
-                });
-            }
+        if (positions_differ) {
+            log.warn("position differs from advance: cluster={d} pos=({d:.2},{d:.2}) adv=({d:.2},{d:.2}) diff=({d:.2},{d:.2}) cps = {s}", .{
+                cluster,
+                x_offset,
+                position.y,
+                advance_x_offset,
+                advance_y_offset,
+                x_offset_diff,
+                y_offset_diff,
+                formatted_cps,
+            });
+        }
+
+        if (position_y_differs) {
+            log.warn("position.y differs from old offset.y: cluster={d} pos=({d:.2},{d:.2}) run_offset=({d:.2},{d:.2}) cell_offset=({d:.2},{d:.2}) old offset.y={d:.2} cps = {s}", .{
+                cluster,
+                x_offset,
+                position.y,
+                run_offset.x,
+                run_offset_y,
+                cell_offset.x,
+                cell_offset_y,
+                old_offset_y,
+                formatted_cps,
+            });
+        }
+
+        if (cluster_differs) {
+            log.warn("cell_offset.cluster differs from cluster (potential ligature detected) cell_offset.cluster={d} cluster={d} run_offset.cluster={d} diff={d} pos=({d:.2},{d:.2}) run_offset=({d:.2},{d:.2}) cell_offset=({d:.2},{d:.2}) is_prev_prepend={} cps = {s}", .{
+                cell_offset.cluster,
+                cluster,
+                run_offset.cluster,
+                @as(isize, @intCast(cluster)) - @as(isize, @intCast(cell_offset.cluster)),
+                x_offset,
+                position.y,
+                run_offset.x,
+                run_offset_y,
+                cell_offset.x,
+                cell_offset_y,
+                is_previous_codepoint_prepend,
+                formatted_cps,
+            });
        }
    }
 };
@@ -1463,11 +1577,13 @@ test "shape Devanagari string" {
    try testing.expect(run != null);
    const cells = try shaper.shape(run.?);

+    // To understand the `x`/`cluster` assertions here, run with the "For
+    // debugging positions" code turned on and `extra_debugging` set to true.
    try testing.expectEqual(@as(usize, 8), cells.len);
    try testing.expectEqual(@as(u16, 0), cells[0].x);
    try testing.expectEqual(@as(u16, 1), cells[1].x);
    try testing.expectEqual(@as(u16, 2), cells[2].x);
-    try testing.expectEqual(@as(u16, 3), cells[3].x);
+    try testing.expectEqual(@as(u16, 4), cells[3].x);
    try testing.expectEqual(@as(u16, 4), cells[4].x);
    try testing.expectEqual(@as(u16, 5), cells[5].x);
    try testing.expectEqual(@as(u16, 5), cells[6].x);
@@ -1584,7 +1700,7 @@ test "shape Tai Tham letters (position.y differs from advance)" {
        try testing.expectEqual(@as(usize, 3), cells.len);
        try testing.expectEqual(@as(u16, 0), cells[0].x);
        try testing.expectEqual(@as(u16, 0), cells[1].x);
-        try testing.expectEqual(@as(u16, 1), cells[2].x); // U from second grapheme
+        try testing.expectEqual(@as(u16, 0), cells[2].x); // U from second grapheme

        // The U glyph renders at a y below zero
        try testing.expectEqual(@as(i16, -3), cells[2].y_offset);
@@ -1592,6 +1708,209 @@ test "shape Tai Tham letters (position.y differs from advance)" {
    try testing.expectEqual(@as(usize, 1), count);
 }

+test "shape Javanese ligatures" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Javanese for this to work, if we can't find
+    // Noto Sans Javanese Regular, which is a system font on macOS, we just
+    // skip the test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Noto Sans Javanese",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+
+    // First grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0xa9a4, buf[buf_idx..]); // NA
+    buf_idx += try std.unicode.utf8Encode(0xa9c0, buf[buf_idx..]); // PANGKON
+    // Second grapheme cluster, combining with the first in a ligature:
+    buf_idx += try std.unicode.utf8Encode(0xa9b2, buf[buf_idx..]); // HA
+    buf_idx += try std.unicode.utf8Encode(0xa9b8, buf[buf_idx..]); // Vowel sign SUKU
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        const cell_width = run.grid.metrics.cell_width;
+        try testing.expectEqual(@as(usize, 3), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+        try testing.expectEqual(@as(u16, 0), cells[2].x);
+
+        // The vowel sign SUKU renders with correct x_offset
+        try testing.expect(cells[2].x_offset > 3 * cell_width);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
+test "shape Chakma vowel sign with ligature (vowel sign renders first)" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Chakma for this to work, if we can't find
+    // Noto Sans Chakma Regular, which is a system font on macOS, we just skip
+    // the test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Noto Sans Chakma",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+
+    // First grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x1111d, buf[buf_idx..]); // BAA
+    // Second grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x11116, buf[buf_idx..]); // TAA
+    buf_idx += try std.unicode.utf8Encode(0x11133, buf[buf_idx..]); // Virama
+    // Third grapheme cluster, combining with the second in a ligature:
+    buf_idx += try std.unicode.utf8Encode(0x11120, buf[buf_idx..]); // YYAA
+    buf_idx += try std.unicode.utf8Encode(0x1112c, buf[buf_idx..]); // Vowel Sign U
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        try testing.expectEqual(@as(usize, 4), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        // See the giant "We need to reset the `cell_offset`" comment, but here
+        // we should technically have the rest of these be `x` of 1, but that
+        // would require going back in the stream to adjust past cells, and
+        // we don't take on that complexity.
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+        try testing.expectEqual(@as(u16, 0), cells[2].x);
+        try testing.expectEqual(@as(u16, 0), cells[3].x);
+
+        // The vowel sign U renders before the TAA:
+        try testing.expect(cells[1].x_offset < cells[2].x_offset);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
+test "shape Bengali ligatures with out of order vowels" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Bengali for this to work, if we can't find
+    // Arial Unicode MS, which is a system font on macOS, we just skip the
+    // test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Arial Unicode MS",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+
+    // First grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA
+    buf_idx += try std.unicode.utf8Encode(0x09be, buf[buf_idx..]); // Vowel sign AA
+    // Second grapheme cluster:
+    buf_idx += try std.unicode.utf8Encode(0x09b7, buf[buf_idx..]); // SSA
+    buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama
+    // Third grapheme cluster, combining with the second in a ligature:
+    buf_idx += try std.unicode.utf8Encode(0x099f, buf[buf_idx..]); // TTA
+    buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama
+    // Fourth grapheme cluster, combining with the previous two in a ligature:
+    buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA
+    buf_idx += try std.unicode.utf8Encode(0x09c7, buf[buf_idx..]); // Vowel sign E
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        try testing.expectEqual(@as(usize, 8), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+        // See the giant "We need to reset the `cell_offset`" comment, but here
+        // we should technically have the rest of these be `x` of 1, but that
+        // would require going back in the stream to adjust past cells, and
+        // we don't take on that complexity.
+        try testing.expectEqual(@as(u16, 0), cells[2].x);
+        try testing.expectEqual(@as(u16, 0), cells[3].x);
+        try testing.expectEqual(@as(u16, 0), cells[4].x);
+        try testing.expectEqual(@as(u16, 0), cells[5].x);
+        try testing.expectEqual(@as(u16, 0), cells[6].x);
+        try testing.expectEqual(@as(u16, 0), cells[7].x);
+
+        // The vowel sign E renders before the SSA:
+        try testing.expect(cells[2].x_offset < cells[3].x_offset);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
 test "shape box glyphs" {
    const testing = std.testing;
    const alloc = testing.allocator;
@@ -2329,7 +2648,7 @@ fn testShaperWithDiscoveredFont(alloc: Allocator, font_req: [:0]const u8) !TestS
            .monospace = false,
        });
        defer disco_it.deinit();
-        var face: font.DeferredFace = (try disco_it.next()).?;
+        var face: font.DeferredFace = (try disco_it.next()) orelse return error.FontNotFound;
        errdefer face.deinit();
        _ = try c.add(
            alloc,
--- a/src/inspector/termio.zig
+++ b/src/inspector/termio.zig
@@ -286,18 +286,19 @@ pub const VTEvent = struct {
            ),

            else => switch (Value) {
-                u8, u16 => try md.put(
-                    key,
-                    try std.fmt.allocPrintSentinel(alloc, "{}", .{value}, 0),
-                ),
-
                []const u8,
                [:0]const u8,
                => try md.put(key, try alloc.dupeZ(u8, value)),

-                else => |T| {
-                    @compileLog(T);
-                    @compileError("unsupported type, see log");
+                else => |T| switch (@typeInfo(T)) {
+                    .int => try md.put(
+                        key,
+                        try std.fmt.allocPrintSentinel(alloc, "{}", .{value}, 0),
+                    ),
+                    else => {
+                        @compileLog(T);
+                        @compileError("unsupported type, see log");
+                    },
                },
            },
        }
--- a/src/terminal/osc.zig
+++ b/src/terminal/osc.zig
@@ -14,6 +14,8 @@ const Allocator = mem.Allocator;
 const LibEnum = @import("../lib/enum.zig").Enum;
 const kitty_color = @import("kitty/color.zig");
 const parsers = @import("osc/parsers.zig");
+const encoding = @import("osc/encoding.zig");
+
 pub const color = parsers.color;

 const log = std.log.scoped(.osc);
@@ -191,6 +193,9 @@ pub const Command = union(Key) {
    /// ConEmu GUI macro (OSC 9;6)
    conemu_guimacro: [:0]const u8,

+    /// Kitty text sizing protocol (OSC 66)
+    kitty_text_sizing: parsers.kitty_text_sizing.OSC,
+
    pub const Key = LibEnum(
        if (build_options.c_abi) .c else .zig,
        // NOTE: Order matters, see LibEnum documentation.
@@ -216,6 +221,7 @@ pub const Command = union(Key) {
            "conemu_progress_report",
            "conemu_wait_input",
            "conemu_guimacro",
+            "kitty_text_sizing",
        },
    );

@@ -342,6 +348,7 @@ pub const Parser = struct {
        @"2",
        @"4",
        @"5",
+        @"6",
        @"7",
        @"8",
        @"9",
@@ -358,6 +365,7 @@ pub const Parser = struct {
        @"21",
        @"22",
        @"52",
+        @"66",
        @"77",
        @"104",
        @"110",
@@ -431,6 +439,7 @@ pub const Parser = struct {
            .prompt_start,
            .report_pwd,
            .show_desktop_notification,
+            .kitty_text_sizing,
            => {},
        }

@@ -510,6 +519,7 @@ pub const Parser = struct {
                '2' => self.state = .@"2",
                '4' => self.state = .@"4",
                '5' => self.state = .@"5",
+                '6' => self.state = .@"6",
                '7' => self.state = .@"7",
                '8' => self.state = .@"8",
                '9' => self.state = .@"9",
@@ -600,7 +610,14 @@ pub const Parser = struct {
                else => self.state = .invalid,
            },

-            .@"52" => switch (c) {
+            .@"6" => switch (c) {
+                '6' => self.state = .@"66",
+                else => self.state = .invalid,
+            },
+
+            .@"52",
+            .@"66",
+            => switch (c) {
                ';' => self.writeToAllocating(),
                else => self.state = .invalid,
            },
@@ -685,6 +702,10 @@ pub const Parser = struct {

            .@"52" => parsers.clipboard_operation.parse(self, terminator_ch),

+            .@"6" => null,
+
+            .@"66" => parsers.kitty_text_sizing.parse(self, terminator_ch),
+
            .@"77" => null,

            .@"133" => parsers.semantic_prompt.parse(self, terminator_ch),
@@ -696,4 +717,5 @@ pub const Parser = struct {

 test {
    _ = parsers;
+    _ = encoding;
 }
--- a/src/terminal/osc/encoding.zig
+++ b/src/terminal/osc/encoding.zig
@@ -0,0 +1,38 @@
+//! Specialized encodings used in some OSC protocols.
+const std = @import("std");
+
+/// Kitty defines "Escape code safe UTF-8" as valid UTF-8 with the
+/// additional requirement of not containing any C0 escape codes
+/// (0x00-0x1f), DEL (0x7f) and C1 escape codes (0x80-0x9f).
+///
+/// Used by OSC 66 (text sizing) and OSC 99 (Kitty notifications).
+///
+/// See: https://sw.kovidgoyal.net/kitty/desktop-notifications/#safe-utf8
+pub fn isSafeUtf8(s: []const u8) bool {
+    const utf8 = std.unicode.Utf8View.init(s) catch {
+        @branchHint(.cold);
+        return false;
+    };
+
+    var it = utf8.iterator();
+    while (it.nextCodepoint()) |cp| switch (cp) {
+        0x00...0x1f, 0x7f, 0x80...0x9f => {
+            @branchHint(.cold);
+            return false;
+        },
+        else => {},
+    };
+
+    return true;
+}
+
+test isSafeUtf8 {
+    const testing = std.testing;
+
+    try testing.expect(isSafeUtf8("Hello world!"));
+    try testing.expect(isSafeUtf8("安全的ユニコード☀️"));
+    try testing.expect(!isSafeUtf8("No linebreaks\nallowed"));
+    try testing.expect(!isSafeUtf8("\x07no bells"));
+    try testing.expect(!isSafeUtf8("\x1b]9;no OSCs\x1b\\\x1b[m"));
+    try testing.expect(!isSafeUtf8("\x9f8-bit escapes are clever, but no"));
+}
--- a/src/terminal/osc/parsers.zig
+++ b/src/terminal/osc/parsers.zig
@@ -6,6 +6,7 @@ pub const clipboard_operation = @import("parsers/clipboard_operation.zig");
 pub const color = @import("parsers/color.zig");
 pub const hyperlink = @import("parsers/hyperlink.zig");
 pub const kitty_color = @import("parsers/kitty_color.zig");
+pub const kitty_text_sizing = @import("parsers/kitty_text_sizing.zig");
 pub const mouse_shape = @import("parsers/mouse_shape.zig");
 pub const osc9 = @import("parsers/osc9.zig");
 pub const report_pwd = @import("parsers/report_pwd.zig");
@@ -19,6 +20,7 @@ test {
    _ = color;
    _ = hyperlink;
    _ = kitty_color;
+    _ = kitty_text_sizing;
    _ = mouse_shape;
    _ = osc9;
    _ = report_pwd;
--- a/src/terminal/osc/parsers/kitty_text_sizing.zig
+++ b/src/terminal/osc/parsers/kitty_text_sizing.zig
@@ -0,0 +1,250 @@
+//! Kitty's text sizing protocol (OSC 66)
+//! Specification: https://sw.kovidgoyal.net/kitty/text-sizing-protocol/
+
+const std = @import("std");
+const build_options = @import("terminal_options");
+
+const assert = @import("../../../quirks.zig").inlineAssert;
+
+const Parser = @import("../../osc.zig").Parser;
+const Command = @import("../../osc.zig").Command;
+const encoding = @import("../encoding.zig");
+const lib = @import("../../../lib/main.zig");
+const lib_target: lib.Target = if (build_options.c_abi) .c else .zig;
+
+const log = std.log.scoped(.kitty_text_sizing);
+
+pub const max_payload_length = 4096;
+
+pub const VAlign = lib.Enum(lib_target, &.{
+    "top",
+    "bottom",
+    "center",
+});
+
+pub const HAlign = lib.Enum(lib_target, &.{
+    "left",
+    "right",
+    "center",
+});
+
+pub const OSC = struct {
+    scale: u3 = 1, // 1 - 7
+    width: u3 = 0, // 0 - 7 (0 means default)
+    numerator: u4 = 0,
+    denominator: u4 = 0,
+    valign: VAlign = .top,
+    halign: HAlign = .left,
+    text: [:0]const u8,
+
+    /// We don't currently support encoding this to C in any way.
+    pub const C = void;
+
+    pub fn cval(_: OSC) C {
+        return {};
+    }
+
+    fn update(self: *OSC, key: u8, value: []const u8) !void {
+        // All values are numeric, so we can do a small hack here
+        const v = try std.fmt.parseInt(u4, value, 10);
+
+        switch (key) {
+            's' => {
+                if (v == 0) return error.InvalidValue;
+                self.scale = std.math.cast(u3, v) orelse return error.Overflow;
+            },
+            'w' => self.width = std.math.cast(u3, v) orelse return error.Overflow,
+            'n' => self.numerator = v,
+            'd' => self.denominator = v,
+            'v' => self.valign = std.enums.fromInt(VAlign, v) orelse return error.InvalidValue,
+            'h' => self.halign = std.enums.fromInt(HAlign, v) orelse return error.InvalidValue,
+            else => return error.UnknownKey,
+        }
+    }
+};
+
+pub fn parse(parser: *Parser, _: ?u8) ?*Command {
+    assert(parser.state == .@"66");
+
+    const writer = parser.writer orelse {
+        parser.state = .invalid;
+        return null;
+    };
+
+    // Write a NUL byte to ensure that `text` is NUL-terminated
+    writer.writeByte(0) catch {
+        parser.state = .invalid;
+        return null;
+    };
+    const data = writer.buffered();
+
+    const payload_start = std.mem.indexOfScalar(u8, data, ';') orelse {
+        log.warn("missing semicolon before payload", .{});
+        parser.state = .invalid;
+        return null;
+    };
+    const payload = data[payload_start + 1 .. data.len - 1 :0];
+
+    // Payload has to be a URL-safe UTF-8 string,
+    // and be under the size limit.
+    if (payload.len > max_payload_length) {
+        log.warn("payload is too long", .{});
+        parser.state = .invalid;
+        return null;
+    }
+    if (!encoding.isSafeUtf8(payload)) {
+        log.warn("payload is not escape code safe UTF-8", .{});
+        parser.state = .invalid;
+        return null;
+    }
+
+    parser.command = .{
+        .kitty_text_sizing = .{ .text = payload },
+    };
+    const cmd = &parser.command.kitty_text_sizing;
+
+    // Parse any arguments if given
+    if (payload_start > 0) {
+        var kv_it = std.mem.splitScalar(
+            u8,
+            data[0..payload_start],
+            ':',
+        );
+
+        while (kv_it.next()) |kv| {
+            var it = std.mem.splitScalar(u8, kv, '=');
+            const k = it.next() orelse {
+                log.warn("missing key", .{});
+                continue;
+            };
+            if (k.len != 1) {
+                log.warn("key must be a single character", .{});
+                continue;
+            }
+
+            const value = it.next() orelse {
+                log.warn("missing value", .{});
+                continue;
+            };
+
+            cmd.update(k[0], value) catch |err| {
+                switch (err) {
+                    error.UnknownKey => log.warn("unknown key: '{c}'", .{k[0]}),
+                    else => log.warn("invalid value for key '{c}': {}", .{ k[0], err }),
+                }
+                continue;
+            };
+        }
+    }
+
+    return &parser.command;
+}
+
+test "OSC 66: empty parameters" {
+    const testing = std.testing;
+
+    var p: Parser = .init(null);
+
+    const input = "66;;bobr";
+    for (input) |ch| p.next(ch);
+
+    const cmd = p.end('\x1b').?.*;
+    try testing.expect(cmd == .kitty_text_sizing);
+    try testing.expectEqual(1, cmd.kitty_text_sizing.scale);
+    try testing.expectEqualStrings("bobr", cmd.kitty_text_sizing.text);
+}
+
+test "OSC 66: single parameter" {
+    const testing = std.testing;
+
+    var p: Parser = .init(null);
+
+    const input = "66;s=2;kurwa";
+    for (input) |ch| p.next(ch);
+
+    const cmd = p.end('\x1b').?.*;
+    try testing.expect(cmd == .kitty_text_sizing);
+    try testing.expectEqual(2, cmd.kitty_text_sizing.scale);
+    try testing.expectEqualStrings("kurwa", cmd.kitty_text_sizing.text);
+}
+
+test "OSC 66: multiple parameters" {
+    const testing = std.testing;
+
+    var p: Parser = .init(null);
+
+    const input = "66;s=2:w=7:n=13:d=15:v=1:h=2;long";
+    for (input) |ch| p.next(ch);
+
+    const cmd = p.end('\x1b').?.*;
+    try testing.expect(cmd == .kitty_text_sizing);
+    try testing.expectEqual(2, cmd.kitty_text_sizing.scale);
+    try testing.expectEqual(7, cmd.kitty_text_sizing.width);
+    try testing.expectEqual(13, cmd.kitty_text_sizing.numerator);
+    try testing.expectEqual(15, cmd.kitty_text_sizing.denominator);
+    try testing.expectEqual(.bottom, cmd.kitty_text_sizing.valign);
+    try testing.expectEqual(.center, cmd.kitty_text_sizing.halign);
+    try testing.expectEqualStrings("long", cmd.kitty_text_sizing.text);
+}
+
+test "OSC 66: scale is zero" {
+    const testing = std.testing;
+
+    var p: Parser = .init(null);
+
+    const input = "66;s=0;nope";
+    for (input) |ch| p.next(ch);
+    const cmd = p.end('\x1b').?.*;
+
+    try testing.expect(cmd == .kitty_text_sizing);
+    try testing.expectEqual(1, cmd.kitty_text_sizing.scale);
+}
+
+test "OSC 66: invalid parameters" {
+    const testing = std.testing;
+
+    var p: Parser = .init(null);
+
+    for ("66;w=8:v=3:n=16;") |ch| p.next(ch);
+    const cmd = p.end('\x1b').?.*;
+
+    try testing.expect(cmd == .kitty_text_sizing);
+    try testing.expectEqual(0, cmd.kitty_text_sizing.width);
+    try testing.expect(cmd.kitty_text_sizing.valign == .top);
+    try testing.expectEqual(0, cmd.kitty_text_sizing.numerator);
+}
+
+test "OSC 66: UTF-8" {
+    const testing = std.testing;
+
+    var p: Parser = .init(null);
+
+    const input = "66;;👻魑魅魍魉ゴースッティ";
+    for (input) |ch| p.next(ch);
+
+    const cmd = p.end('\x1b').?.*;
+    try testing.expect(cmd == .kitty_text_sizing);
+    try testing.expectEqualStrings("👻魑魅魍魉ゴースッティ", cmd.kitty_text_sizing.text);
+}
+
+test "OSC 66: unsafe UTF-8" {
+    const testing = std.testing;
+
+    var p: Parser = .init(null);
+
+    const input = "66;;\n";
+    for (input) |ch| p.next(ch);
+
+    try testing.expect(p.end('\x1b') == null);
+}
+
+test "OSC 66: overlong UTF-8" {
+    const testing = std.testing;
+
+    var p: Parser = .init(null);
+
+    const input = "66;;" ++ "bobr" ** 1025;
+    for (input) |ch| p.next(ch);
+
+    try testing.expect(p.end('\x1b') == null);
+}
--- a/src/terminal/stream.zig
+++ b/src/terminal/stream.zig
@@ -2107,6 +2107,7 @@ pub fn Stream(comptime Handler: type) type {
                .conemu_change_tab_title,
                .conemu_wait_input,
                .conemu_guimacro,
+                .kitty_text_sizing,
                => {
                    log.debug("unimplemented OSC callback: {}", .{cmd});
                },