diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 45127e032..a9adcfbc2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1083,7 +1083,7 @@ jobs: uses: namespacelabs/nscloud-setup@d1c625762f7c926a54bd39252efff0705fd11c64 # v0.0.10 - name: Configure Namespace powered Buildx - uses: namespacelabs/nscloud-setup-buildx-action@91c2e6537780e3b092cb8476406be99a8f91bd5e # v0.0.20 + uses: namespacelabs/nscloud-setup-buildx-action@a7e525416136ee2842da3c800e7067b72a27200e # v0.0.21 - name: Download Source Tarball Artifacts uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 diff --git a/src/font/shaper/coretext.zig b/src/font/shaper/coretext.zig index c8822a373..cc05022c4 100644 --- a/src/font/shaper/coretext.zig +++ b/src/font/shaper/coretext.zig @@ -6,6 +6,7 @@ const macos = @import("macos"); const font = @import("../main.zig"); const os = @import("../../os/main.zig"); const terminal = @import("../../terminal/main.zig"); +const unicode = @import("../../unicode/main.zig"); const Feature = font.shape.Feature; const FeatureList = font.shape.FeatureList; const default_features = font.shape.default_features; @@ -103,7 +104,7 @@ pub const Shaper = struct { } }; - const CellOffset = struct { + const Offset = struct { cluster: u32 = 0, x: f64 = 0, }; @@ -382,11 +383,12 @@ pub const Shaper = struct { const line = typesetter.createLine(.{ .location = 0, .length = 0 }); self.cf_release_pool.appendAssumeCapacity(line); - // This keeps track of the current x offset (sum of advance.width) - var run_offset_x: f64 = 0.0; + // This keeps track of the current x offset (sum of advance.width) and + // the furthest cluster we've seen so far (max). + var run_offset: Offset = .{}; // This keeps track of the cell starting x and cluster. - var cell_offset: CellOffset = .{}; + var cell_offset: Offset = .{}; // For debugging positions, turn this on: //var run_offset_y: f64 = 0.0; @@ -410,8 +412,8 @@ pub const Shaper = struct { // other so we can iterate over them and just append to our // cell buffer. const runs = line.getGlyphRuns(); - for (0..runs.getCount()) |i| { - const ctrun = runs.getValueAtIndex(macos.text.Run, i); + for (0..runs.getCount()) |run_i| { + const ctrun = runs.getValueAtIndex(macos.text.Run, run_i); const status = ctrun.getStatus(); if (status.non_monotonic or status.right_to_left) non_ltr = true; @@ -434,30 +436,75 @@ pub const Shaper = struct { // Our cluster is also our cell X position. If the cluster changes // then we need to reset our current cell offsets. const cluster = state.codepoints.items[index].cluster; - if (cell_offset.cluster != cluster) pad: { - // We previously asserted this but for rtl text this is - // not true. So we check for this and break out. In the - // future we probably need to reverse pad for rtl but - // I don't have a solid test case for this yet so let's - // wait for that. - if (cell_offset.cluster > cluster) break :pad; + if (cell_offset.cluster != cluster) { + // We previously asserted that the new cluster is greater + // than cell_offset.cluster, but this isn't always true. + // See e.g. the "shape Chakma vowel sign with ligature + // (vowel sign renders first)" test. - cell_offset = .{ - .cluster = cluster, - .x = run_offset_x, + const is_after_glyph_from_current_or_next_clusters = + cluster <= run_offset.cluster; + + const is_first_codepoint_in_cluster = blk: { + var i = index; + while (i > 0) { + i -= 1; + const codepoint = state.codepoints.items[i]; + + // Skip surrogate pair padding + if (codepoint.codepoint == 0) continue; + break :blk codepoint.cluster != cluster; + } else break :blk true; }; - // For debugging positions, turn this on: - //cell_offset_y = run_offset_y; + // We need to reset the `cell_offset` at the start of a new + // cluster, but we do that conditionally if the codepoint + // `is_first_codepoint_in_cluster` and the cluster is not + // `is_after_glyph_from_current_or_next_clusters`, which is + // a heuristic to detect ligatures and avoid positioning + // glyphs that mark ligatures incorrectly. The idea is that + // if the first codepoint in a cluster doesn't appear in + // the stream, it's very likely that it combined with + // codepoints from a previous cluster into a ligature. + // Then, the subsequent codepoints are very likely marking + // glyphs that are placed relative to that ligature, so if + // we were to reset the `cell_offset` to align it with the + // grid, the positions would be off. The + // `!is_after_glyph_from_current_or_next_clusters` check is + // needed in case these marking glyphs come from a later + // cluster but are rendered first (see the Chakma and + // Bengali tests). In that case when we get to the + // codepoint that `is_first_codepoint_in_cluster`, but in a + // cluster that + // `is_after_glyph_from_current_or_next_clusters`, we don't + // want to reset to the grid and cause the positions to be + // off. (Note that we could go back and align the cells to + // the grid starting from the one from the cluster that + // rendered out of order, but that is more complicated so + // we don't do that for now. Also, it's TBD if there are + // exceptions to this heuristic for detecting ligatures, + // but using the logging below seems to show it works + // well.) + if (is_first_codepoint_in_cluster and + !is_after_glyph_from_current_or_next_clusters) + { + cell_offset = .{ + .cluster = cluster, + .x = run_offset.x, + }; + + // For debugging positions, turn this on: + //cell_offset_y = run_offset_y; + } } // For debugging positions, turn this on: - //try self.debugPositions(alloc, run_offset_x, run_offset_y, cell_offset, cell_offset_y, position, index); + //try self.debugPositions(alloc, run_offset, run_offset_y, cell_offset, cell_offset_y, position, index); const x_offset = position.x - cell_offset.x; self.cell_buf.appendAssumeCapacity(.{ - .x = @intCast(cluster), + .x = @intCast(cell_offset.cluster), .x_offset = @intFromFloat(@round(x_offset)), .y_offset = @intFromFloat(@round(position.y)), .glyph_index = glyph, @@ -465,7 +512,8 @@ pub const Shaper = struct { // Add our advances to keep track of our run offsets. // Advances apply to the NEXT cell. - run_offset_x += advance.width; + run_offset.x += advance.width; + run_offset.cluster = @max(run_offset.cluster, cluster); // For debugging positions, turn this on: //run_offset_y += advance.height; @@ -641,31 +689,56 @@ pub const Shaper = struct { fn debugPositions( self: *Shaper, alloc: Allocator, - run_offset_x: f64, + run_offset: Offset, run_offset_y: f64, - cell_offset: CellOffset, + cell_offset: Offset, cell_offset_y: f64, position: macos.graphics.Point, index: usize, ) !void { const state = &self.run_state; const x_offset = position.x - cell_offset.x; - const advance_x_offset = run_offset_x - cell_offset.x; + const advance_x_offset = run_offset.x - cell_offset.x; const advance_y_offset = run_offset_y - cell_offset_y; const x_offset_diff = x_offset - advance_x_offset; const y_offset_diff = position.y - advance_y_offset; const positions_differ = @abs(x_offset_diff) > 0.0001 or @abs(y_offset_diff) > 0.0001; const old_offset_y = position.y - cell_offset_y; const position_y_differs = @abs(cell_offset_y) > 0.0001; + const cluster = state.codepoints.items[index].cluster; + const cluster_differs = cluster != cell_offset.cluster; - if (positions_differ or position_y_differs) { + // To debug every loop, flip this to true: + const extra_debugging = false; + + const is_previous_codepoint_prepend = if (cluster_differs or + extra_debugging) + blk: { + var i = index; + while (i > 0) { + i -= 1; + const codepoint = state.codepoints.items[i]; + + // Skip surrogate pair padding + if (codepoint.codepoint == 0) continue; + + break :blk unicode.table.get(@intCast(codepoint.codepoint)).grapheme_boundary_class == .prepend; + } + break :blk false; + } else false; + + const formatted_cps = if (positions_differ or + position_y_differs or + cluster_differs or + extra_debugging) + blk: { var allocating = std.Io.Writer.Allocating.init(alloc); const writer = &allocating.writer; const codepoints = state.codepoints.items; - const current_cp = state.codepoints.items[index].codepoint; var last_cluster: ?u32 = null; - for (codepoints) |cp| { - if ((cp.cluster == cell_offset.cluster or cp.cluster == cell_offset.cluster - 1 or cp.cluster == cell_offset.cluster + 1) and + for (codepoints, 0..) |cp, i| { + if ((@as(i32, @intCast(cp.cluster)) >= @as(i32, @intCast(cell_offset.cluster)) - 1 and + cp.cluster <= cluster + 1) and cp.codepoint != 0 // Skip surrogate pair padding ) { if (last_cluster) |last| { @@ -673,49 +746,90 @@ pub const Shaper = struct { try writer.writeAll(" "); } } - if (cp.cluster == cell_offset.cluster and cp.codepoint == current_cp) { + if (i == index) { try writer.writeAll("▸"); } - try writer.print("\\u{{{x}}}", .{cp.codepoint}); + // Using Python syntax for easier debugging + if (cp.codepoint > 0xFFFF) { + try writer.print("\\U{x:0>8}", .{cp.codepoint}); + } else { + try writer.print("\\u{x:0>4}", .{cp.codepoint}); + } last_cluster = cp.cluster; } } try writer.writeAll(" → "); for (codepoints) |cp| { - if ((cp.cluster == cell_offset.cluster or cp.cluster == cell_offset.cluster - 1 or cp.cluster == cell_offset.cluster + 1) and + if ((@as(i32, @intCast(cp.cluster)) >= @as(i32, @intCast(cell_offset.cluster)) - 1 and + cp.cluster <= cluster + 1) and cp.codepoint != 0 // Skip surrogate pair padding ) { try writer.print("{u}", .{@as(u21, @intCast(cp.codepoint))}); } } - const formatted_cps = try allocating.toOwnedSlice(); + break :blk try allocating.toOwnedSlice(); + } else ""; - if (positions_differ) { - log.warn("position differs from advance: cluster={d} pos=({d:.2},{d:.2}) adv=({d:.2},{d:.2}) diff=({d:.2},{d:.2}) cps = {s}", .{ - cell_offset.cluster, - x_offset, - position.y, - advance_x_offset, - advance_y_offset, - x_offset_diff, - y_offset_diff, - formatted_cps, - }); - } + if (extra_debugging) { + log.warn("extra debugging of positions index={d} cell_offset.cluster={d} cluster={d} run_offset.cluster={d} diff={d} pos=({d:.2},{d:.2}) run_offset=({d:.2},{d:.2}) cell_offset=({d:.2},{d:.2}) is_prev_prepend={} cps = {s}", .{ + index, + cell_offset.cluster, + cluster, + run_offset.cluster, + @as(isize, @intCast(cluster)) - @as(isize, @intCast(cell_offset.cluster)), + x_offset, + position.y, + run_offset.x, + run_offset_y, + cell_offset.x, + cell_offset_y, + is_previous_codepoint_prepend, + formatted_cps, + }); + } - if (position_y_differs) { - log.warn("position.y differs from old offset.y: cluster={d} pos=({d:.2},{d:.2}) run_offset=({d:.2},{d:.2}) cell_offset=({d:.2},{d:.2}) old offset.y={d:.2} cps = {s}", .{ - cell_offset.cluster, - x_offset, - position.y, - run_offset_x, - run_offset_y, - cell_offset.x, - cell_offset_y, - old_offset_y, - formatted_cps, - }); - } + if (positions_differ) { + log.warn("position differs from advance: cluster={d} pos=({d:.2},{d:.2}) adv=({d:.2},{d:.2}) diff=({d:.2},{d:.2}) cps = {s}", .{ + cluster, + x_offset, + position.y, + advance_x_offset, + advance_y_offset, + x_offset_diff, + y_offset_diff, + formatted_cps, + }); + } + + if (position_y_differs) { + log.warn("position.y differs from old offset.y: cluster={d} pos=({d:.2},{d:.2}) run_offset=({d:.2},{d:.2}) cell_offset=({d:.2},{d:.2}) old offset.y={d:.2} cps = {s}", .{ + cluster, + x_offset, + position.y, + run_offset.x, + run_offset_y, + cell_offset.x, + cell_offset_y, + old_offset_y, + formatted_cps, + }); + } + + if (cluster_differs) { + log.warn("cell_offset.cluster differs from cluster (potential ligature detected) cell_offset.cluster={d} cluster={d} run_offset.cluster={d} diff={d} pos=({d:.2},{d:.2}) run_offset=({d:.2},{d:.2}) cell_offset=({d:.2},{d:.2}) is_prev_prepend={} cps = {s}", .{ + cell_offset.cluster, + cluster, + run_offset.cluster, + @as(isize, @intCast(cluster)) - @as(isize, @intCast(cell_offset.cluster)), + x_offset, + position.y, + run_offset.x, + run_offset_y, + cell_offset.x, + cell_offset_y, + is_previous_codepoint_prepend, + formatted_cps, + }); } } }; @@ -1463,11 +1577,13 @@ test "shape Devanagari string" { try testing.expect(run != null); const cells = try shaper.shape(run.?); + // To understand the `x`/`cluster` assertions here, run with the "For + // debugging positions" code turned on and `extra_debugging` set to true. try testing.expectEqual(@as(usize, 8), cells.len); try testing.expectEqual(@as(u16, 0), cells[0].x); try testing.expectEqual(@as(u16, 1), cells[1].x); try testing.expectEqual(@as(u16, 2), cells[2].x); - try testing.expectEqual(@as(u16, 3), cells[3].x); + try testing.expectEqual(@as(u16, 4), cells[3].x); try testing.expectEqual(@as(u16, 4), cells[4].x); try testing.expectEqual(@as(u16, 5), cells[5].x); try testing.expectEqual(@as(u16, 5), cells[6].x); @@ -1584,7 +1700,7 @@ test "shape Tai Tham letters (position.y differs from advance)" { try testing.expectEqual(@as(usize, 3), cells.len); try testing.expectEqual(@as(u16, 0), cells[0].x); try testing.expectEqual(@as(u16, 0), cells[1].x); - try testing.expectEqual(@as(u16, 1), cells[2].x); // U from second grapheme + try testing.expectEqual(@as(u16, 0), cells[2].x); // U from second grapheme // The U glyph renders at a y below zero try testing.expectEqual(@as(i16, -3), cells[2].y_offset); @@ -1592,6 +1708,209 @@ test "shape Tai Tham letters (position.y differs from advance)" { try testing.expectEqual(@as(usize, 1), count); } +test "shape Javanese ligatures" { + const testing = std.testing; + const alloc = testing.allocator; + + // We need a font that supports Javanese for this to work, if we can't find + // Noto Sans Javanese Regular, which is a system font on macOS, we just + // skip the test. + var testdata = testShaperWithDiscoveredFont( + alloc, + "Noto Sans Javanese", + ) catch return error.SkipZigTest; + defer testdata.deinit(); + + var buf: [32]u8 = undefined; + var buf_idx: usize = 0; + + // First grapheme cluster: + buf_idx += try std.unicode.utf8Encode(0xa9a4, buf[buf_idx..]); // NA + buf_idx += try std.unicode.utf8Encode(0xa9c0, buf[buf_idx..]); // PANGKON + // Second grapheme cluster, combining with the first in a ligature: + buf_idx += try std.unicode.utf8Encode(0xa9b2, buf[buf_idx..]); // HA + buf_idx += try std.unicode.utf8Encode(0xa9b8, buf[buf_idx..]); // Vowel sign SUKU + + // Make a screen with some data + var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 }); + defer t.deinit(alloc); + + // Enable grapheme clustering + t.modes.set(.grapheme_cluster, true); + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice(buf[0..buf_idx]); + + var state: terminal.RenderState = .empty; + defer state.deinit(alloc); + try state.update(alloc, &t); + + // Get our run iterator + var shaper = &testdata.shaper; + var it = shaper.runIterator(.{ + .grid = testdata.grid, + .cells = state.row_data.get(0).cells.slice(), + }); + var count: usize = 0; + while (try it.next(alloc)) |run| { + count += 1; + + const cells = try shaper.shape(run); + const cell_width = run.grid.metrics.cell_width; + try testing.expectEqual(@as(usize, 3), cells.len); + try testing.expectEqual(@as(u16, 0), cells[0].x); + try testing.expectEqual(@as(u16, 0), cells[1].x); + try testing.expectEqual(@as(u16, 0), cells[2].x); + + // The vowel sign SUKU renders with correct x_offset + try testing.expect(cells[2].x_offset > 3 * cell_width); + } + try testing.expectEqual(@as(usize, 1), count); +} + +test "shape Chakma vowel sign with ligature (vowel sign renders first)" { + const testing = std.testing; + const alloc = testing.allocator; + + // We need a font that supports Chakma for this to work, if we can't find + // Noto Sans Chakma Regular, which is a system font on macOS, we just skip + // the test. + var testdata = testShaperWithDiscoveredFont( + alloc, + "Noto Sans Chakma", + ) catch return error.SkipZigTest; + defer testdata.deinit(); + + var buf: [32]u8 = undefined; + var buf_idx: usize = 0; + + // First grapheme cluster: + buf_idx += try std.unicode.utf8Encode(0x1111d, buf[buf_idx..]); // BAA + // Second grapheme cluster: + buf_idx += try std.unicode.utf8Encode(0x11116, buf[buf_idx..]); // TAA + buf_idx += try std.unicode.utf8Encode(0x11133, buf[buf_idx..]); // Virama + // Third grapheme cluster, combining with the second in a ligature: + buf_idx += try std.unicode.utf8Encode(0x11120, buf[buf_idx..]); // YYAA + buf_idx += try std.unicode.utf8Encode(0x1112c, buf[buf_idx..]); // Vowel Sign U + + // Make a screen with some data + var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 }); + defer t.deinit(alloc); + + // Enable grapheme clustering + t.modes.set(.grapheme_cluster, true); + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice(buf[0..buf_idx]); + + var state: terminal.RenderState = .empty; + defer state.deinit(alloc); + try state.update(alloc, &t); + + // Get our run iterator + var shaper = &testdata.shaper; + var it = shaper.runIterator(.{ + .grid = testdata.grid, + .cells = state.row_data.get(0).cells.slice(), + }); + var count: usize = 0; + while (try it.next(alloc)) |run| { + count += 1; + + const cells = try shaper.shape(run); + try testing.expectEqual(@as(usize, 4), cells.len); + try testing.expectEqual(@as(u16, 0), cells[0].x); + // See the giant "We need to reset the `cell_offset`" comment, but here + // we should technically have the rest of these be `x` of 1, but that + // would require going back in the stream to adjust past cells, and + // we don't take on that complexity. + try testing.expectEqual(@as(u16, 0), cells[1].x); + try testing.expectEqual(@as(u16, 0), cells[2].x); + try testing.expectEqual(@as(u16, 0), cells[3].x); + + // The vowel sign U renders before the TAA: + try testing.expect(cells[1].x_offset < cells[2].x_offset); + } + try testing.expectEqual(@as(usize, 1), count); +} + +test "shape Bengali ligatures with out of order vowels" { + const testing = std.testing; + const alloc = testing.allocator; + + // We need a font that supports Bengali for this to work, if we can't find + // Arial Unicode MS, which is a system font on macOS, we just skip the + // test. + var testdata = testShaperWithDiscoveredFont( + alloc, + "Arial Unicode MS", + ) catch return error.SkipZigTest; + defer testdata.deinit(); + + var buf: [32]u8 = undefined; + var buf_idx: usize = 0; + + // First grapheme cluster: + buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA + buf_idx += try std.unicode.utf8Encode(0x09be, buf[buf_idx..]); // Vowel sign AA + // Second grapheme cluster: + buf_idx += try std.unicode.utf8Encode(0x09b7, buf[buf_idx..]); // SSA + buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama + // Third grapheme cluster, combining with the second in a ligature: + buf_idx += try std.unicode.utf8Encode(0x099f, buf[buf_idx..]); // TTA + buf_idx += try std.unicode.utf8Encode(0x09cd, buf[buf_idx..]); // Virama + // Fourth grapheme cluster, combining with the previous two in a ligature: + buf_idx += try std.unicode.utf8Encode(0x09b0, buf[buf_idx..]); // RA + buf_idx += try std.unicode.utf8Encode(0x09c7, buf[buf_idx..]); // Vowel sign E + + // Make a screen with some data + var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 }); + defer t.deinit(alloc); + + // Enable grapheme clustering + t.modes.set(.grapheme_cluster, true); + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice(buf[0..buf_idx]); + + var state: terminal.RenderState = .empty; + defer state.deinit(alloc); + try state.update(alloc, &t); + + // Get our run iterator + var shaper = &testdata.shaper; + var it = shaper.runIterator(.{ + .grid = testdata.grid, + .cells = state.row_data.get(0).cells.slice(), + }); + var count: usize = 0; + while (try it.next(alloc)) |run| { + count += 1; + + const cells = try shaper.shape(run); + try testing.expectEqual(@as(usize, 8), cells.len); + try testing.expectEqual(@as(u16, 0), cells[0].x); + try testing.expectEqual(@as(u16, 0), cells[1].x); + // See the giant "We need to reset the `cell_offset`" comment, but here + // we should technically have the rest of these be `x` of 1, but that + // would require going back in the stream to adjust past cells, and + // we don't take on that complexity. + try testing.expectEqual(@as(u16, 0), cells[2].x); + try testing.expectEqual(@as(u16, 0), cells[3].x); + try testing.expectEqual(@as(u16, 0), cells[4].x); + try testing.expectEqual(@as(u16, 0), cells[5].x); + try testing.expectEqual(@as(u16, 0), cells[6].x); + try testing.expectEqual(@as(u16, 0), cells[7].x); + + // The vowel sign E renders before the SSA: + try testing.expect(cells[2].x_offset < cells[3].x_offset); + } + try testing.expectEqual(@as(usize, 1), count); +} + test "shape box glyphs" { const testing = std.testing; const alloc = testing.allocator; @@ -2329,7 +2648,7 @@ fn testShaperWithDiscoveredFont(alloc: Allocator, font_req: [:0]const u8) !TestS .monospace = false, }); defer disco_it.deinit(); - var face: font.DeferredFace = (try disco_it.next()).?; + var face: font.DeferredFace = (try disco_it.next()) orelse return error.FontNotFound; errdefer face.deinit(); _ = try c.add( alloc, diff --git a/src/inspector/termio.zig b/src/inspector/termio.zig index 9f55e6019..934bb6e2d 100644 --- a/src/inspector/termio.zig +++ b/src/inspector/termio.zig @@ -286,18 +286,19 @@ pub const VTEvent = struct { ), else => switch (Value) { - u8, u16 => try md.put( - key, - try std.fmt.allocPrintSentinel(alloc, "{}", .{value}, 0), - ), - []const u8, [:0]const u8, => try md.put(key, try alloc.dupeZ(u8, value)), - else => |T| { - @compileLog(T); - @compileError("unsupported type, see log"); + else => |T| switch (@typeInfo(T)) { + .int => try md.put( + key, + try std.fmt.allocPrintSentinel(alloc, "{}", .{value}, 0), + ), + else => { + @compileLog(T); + @compileError("unsupported type, see log"); + }, }, }, } diff --git a/src/terminal/osc.zig b/src/terminal/osc.zig index 1f4489961..14d501eaa 100644 --- a/src/terminal/osc.zig +++ b/src/terminal/osc.zig @@ -14,6 +14,8 @@ const Allocator = mem.Allocator; const LibEnum = @import("../lib/enum.zig").Enum; const kitty_color = @import("kitty/color.zig"); const parsers = @import("osc/parsers.zig"); +const encoding = @import("osc/encoding.zig"); + pub const color = parsers.color; const log = std.log.scoped(.osc); @@ -191,6 +193,9 @@ pub const Command = union(Key) { /// ConEmu GUI macro (OSC 9;6) conemu_guimacro: [:0]const u8, + /// Kitty text sizing protocol (OSC 66) + kitty_text_sizing: parsers.kitty_text_sizing.OSC, + pub const Key = LibEnum( if (build_options.c_abi) .c else .zig, // NOTE: Order matters, see LibEnum documentation. @@ -216,6 +221,7 @@ pub const Command = union(Key) { "conemu_progress_report", "conemu_wait_input", "conemu_guimacro", + "kitty_text_sizing", }, ); @@ -342,6 +348,7 @@ pub const Parser = struct { @"2", @"4", @"5", + @"6", @"7", @"8", @"9", @@ -358,6 +365,7 @@ pub const Parser = struct { @"21", @"22", @"52", + @"66", @"77", @"104", @"110", @@ -431,6 +439,7 @@ pub const Parser = struct { .prompt_start, .report_pwd, .show_desktop_notification, + .kitty_text_sizing, => {}, } @@ -510,6 +519,7 @@ pub const Parser = struct { '2' => self.state = .@"2", '4' => self.state = .@"4", '5' => self.state = .@"5", + '6' => self.state = .@"6", '7' => self.state = .@"7", '8' => self.state = .@"8", '9' => self.state = .@"9", @@ -600,7 +610,14 @@ pub const Parser = struct { else => self.state = .invalid, }, - .@"52" => switch (c) { + .@"6" => switch (c) { + '6' => self.state = .@"66", + else => self.state = .invalid, + }, + + .@"52", + .@"66", + => switch (c) { ';' => self.writeToAllocating(), else => self.state = .invalid, }, @@ -685,6 +702,10 @@ pub const Parser = struct { .@"52" => parsers.clipboard_operation.parse(self, terminator_ch), + .@"6" => null, + + .@"66" => parsers.kitty_text_sizing.parse(self, terminator_ch), + .@"77" => null, .@"133" => parsers.semantic_prompt.parse(self, terminator_ch), @@ -696,4 +717,5 @@ pub const Parser = struct { test { _ = parsers; + _ = encoding; } diff --git a/src/terminal/osc/encoding.zig b/src/terminal/osc/encoding.zig new file mode 100644 index 000000000..7491d10c2 --- /dev/null +++ b/src/terminal/osc/encoding.zig @@ -0,0 +1,38 @@ +//! Specialized encodings used in some OSC protocols. +const std = @import("std"); + +/// Kitty defines "Escape code safe UTF-8" as valid UTF-8 with the +/// additional requirement of not containing any C0 escape codes +/// (0x00-0x1f), DEL (0x7f) and C1 escape codes (0x80-0x9f). +/// +/// Used by OSC 66 (text sizing) and OSC 99 (Kitty notifications). +/// +/// See: https://sw.kovidgoyal.net/kitty/desktop-notifications/#safe-utf8 +pub fn isSafeUtf8(s: []const u8) bool { + const utf8 = std.unicode.Utf8View.init(s) catch { + @branchHint(.cold); + return false; + }; + + var it = utf8.iterator(); + while (it.nextCodepoint()) |cp| switch (cp) { + 0x00...0x1f, 0x7f, 0x80...0x9f => { + @branchHint(.cold); + return false; + }, + else => {}, + }; + + return true; +} + +test isSafeUtf8 { + const testing = std.testing; + + try testing.expect(isSafeUtf8("Hello world!")); + try testing.expect(isSafeUtf8("安全的ユニコード☀️")); + try testing.expect(!isSafeUtf8("No linebreaks\nallowed")); + try testing.expect(!isSafeUtf8("\x07no bells")); + try testing.expect(!isSafeUtf8("\x1b]9;no OSCs\x1b\\\x1b[m")); + try testing.expect(!isSafeUtf8("\x9f8-bit escapes are clever, but no")); +} diff --git a/src/terminal/osc/parsers.zig b/src/terminal/osc/parsers.zig index 152276af2..9c1c39b2c 100644 --- a/src/terminal/osc/parsers.zig +++ b/src/terminal/osc/parsers.zig @@ -6,6 +6,7 @@ pub const clipboard_operation = @import("parsers/clipboard_operation.zig"); pub const color = @import("parsers/color.zig"); pub const hyperlink = @import("parsers/hyperlink.zig"); pub const kitty_color = @import("parsers/kitty_color.zig"); +pub const kitty_text_sizing = @import("parsers/kitty_text_sizing.zig"); pub const mouse_shape = @import("parsers/mouse_shape.zig"); pub const osc9 = @import("parsers/osc9.zig"); pub const report_pwd = @import("parsers/report_pwd.zig"); @@ -19,6 +20,7 @@ test { _ = color; _ = hyperlink; _ = kitty_color; + _ = kitty_text_sizing; _ = mouse_shape; _ = osc9; _ = report_pwd; diff --git a/src/terminal/osc/parsers/kitty_text_sizing.zig b/src/terminal/osc/parsers/kitty_text_sizing.zig new file mode 100644 index 000000000..2c2d1b8fd --- /dev/null +++ b/src/terminal/osc/parsers/kitty_text_sizing.zig @@ -0,0 +1,250 @@ +//! Kitty's text sizing protocol (OSC 66) +//! Specification: https://sw.kovidgoyal.net/kitty/text-sizing-protocol/ + +const std = @import("std"); +const build_options = @import("terminal_options"); + +const assert = @import("../../../quirks.zig").inlineAssert; + +const Parser = @import("../../osc.zig").Parser; +const Command = @import("../../osc.zig").Command; +const encoding = @import("../encoding.zig"); +const lib = @import("../../../lib/main.zig"); +const lib_target: lib.Target = if (build_options.c_abi) .c else .zig; + +const log = std.log.scoped(.kitty_text_sizing); + +pub const max_payload_length = 4096; + +pub const VAlign = lib.Enum(lib_target, &.{ + "top", + "bottom", + "center", +}); + +pub const HAlign = lib.Enum(lib_target, &.{ + "left", + "right", + "center", +}); + +pub const OSC = struct { + scale: u3 = 1, // 1 - 7 + width: u3 = 0, // 0 - 7 (0 means default) + numerator: u4 = 0, + denominator: u4 = 0, + valign: VAlign = .top, + halign: HAlign = .left, + text: [:0]const u8, + + /// We don't currently support encoding this to C in any way. + pub const C = void; + + pub fn cval(_: OSC) C { + return {}; + } + + fn update(self: *OSC, key: u8, value: []const u8) !void { + // All values are numeric, so we can do a small hack here + const v = try std.fmt.parseInt(u4, value, 10); + + switch (key) { + 's' => { + if (v == 0) return error.InvalidValue; + self.scale = std.math.cast(u3, v) orelse return error.Overflow; + }, + 'w' => self.width = std.math.cast(u3, v) orelse return error.Overflow, + 'n' => self.numerator = v, + 'd' => self.denominator = v, + 'v' => self.valign = std.enums.fromInt(VAlign, v) orelse return error.InvalidValue, + 'h' => self.halign = std.enums.fromInt(HAlign, v) orelse return error.InvalidValue, + else => return error.UnknownKey, + } + } +}; + +pub fn parse(parser: *Parser, _: ?u8) ?*Command { + assert(parser.state == .@"66"); + + const writer = parser.writer orelse { + parser.state = .invalid; + return null; + }; + + // Write a NUL byte to ensure that `text` is NUL-terminated + writer.writeByte(0) catch { + parser.state = .invalid; + return null; + }; + const data = writer.buffered(); + + const payload_start = std.mem.indexOfScalar(u8, data, ';') orelse { + log.warn("missing semicolon before payload", .{}); + parser.state = .invalid; + return null; + }; + const payload = data[payload_start + 1 .. data.len - 1 :0]; + + // Payload has to be a URL-safe UTF-8 string, + // and be under the size limit. + if (payload.len > max_payload_length) { + log.warn("payload is too long", .{}); + parser.state = .invalid; + return null; + } + if (!encoding.isSafeUtf8(payload)) { + log.warn("payload is not escape code safe UTF-8", .{}); + parser.state = .invalid; + return null; + } + + parser.command = .{ + .kitty_text_sizing = .{ .text = payload }, + }; + const cmd = &parser.command.kitty_text_sizing; + + // Parse any arguments if given + if (payload_start > 0) { + var kv_it = std.mem.splitScalar( + u8, + data[0..payload_start], + ':', + ); + + while (kv_it.next()) |kv| { + var it = std.mem.splitScalar(u8, kv, '='); + const k = it.next() orelse { + log.warn("missing key", .{}); + continue; + }; + if (k.len != 1) { + log.warn("key must be a single character", .{}); + continue; + } + + const value = it.next() orelse { + log.warn("missing value", .{}); + continue; + }; + + cmd.update(k[0], value) catch |err| { + switch (err) { + error.UnknownKey => log.warn("unknown key: '{c}'", .{k[0]}), + else => log.warn("invalid value for key '{c}': {}", .{ k[0], err }), + } + continue; + }; + } + } + + return &parser.command; +} + +test "OSC 66: empty parameters" { + const testing = std.testing; + + var p: Parser = .init(null); + + const input = "66;;bobr"; + for (input) |ch| p.next(ch); + + const cmd = p.end('\x1b').?.*; + try testing.expect(cmd == .kitty_text_sizing); + try testing.expectEqual(1, cmd.kitty_text_sizing.scale); + try testing.expectEqualStrings("bobr", cmd.kitty_text_sizing.text); +} + +test "OSC 66: single parameter" { + const testing = std.testing; + + var p: Parser = .init(null); + + const input = "66;s=2;kurwa"; + for (input) |ch| p.next(ch); + + const cmd = p.end('\x1b').?.*; + try testing.expect(cmd == .kitty_text_sizing); + try testing.expectEqual(2, cmd.kitty_text_sizing.scale); + try testing.expectEqualStrings("kurwa", cmd.kitty_text_sizing.text); +} + +test "OSC 66: multiple parameters" { + const testing = std.testing; + + var p: Parser = .init(null); + + const input = "66;s=2:w=7:n=13:d=15:v=1:h=2;long"; + for (input) |ch| p.next(ch); + + const cmd = p.end('\x1b').?.*; + try testing.expect(cmd == .kitty_text_sizing); + try testing.expectEqual(2, cmd.kitty_text_sizing.scale); + try testing.expectEqual(7, cmd.kitty_text_sizing.width); + try testing.expectEqual(13, cmd.kitty_text_sizing.numerator); + try testing.expectEqual(15, cmd.kitty_text_sizing.denominator); + try testing.expectEqual(.bottom, cmd.kitty_text_sizing.valign); + try testing.expectEqual(.center, cmd.kitty_text_sizing.halign); + try testing.expectEqualStrings("long", cmd.kitty_text_sizing.text); +} + +test "OSC 66: scale is zero" { + const testing = std.testing; + + var p: Parser = .init(null); + + const input = "66;s=0;nope"; + for (input) |ch| p.next(ch); + const cmd = p.end('\x1b').?.*; + + try testing.expect(cmd == .kitty_text_sizing); + try testing.expectEqual(1, cmd.kitty_text_sizing.scale); +} + +test "OSC 66: invalid parameters" { + const testing = std.testing; + + var p: Parser = .init(null); + + for ("66;w=8:v=3:n=16;") |ch| p.next(ch); + const cmd = p.end('\x1b').?.*; + + try testing.expect(cmd == .kitty_text_sizing); + try testing.expectEqual(0, cmd.kitty_text_sizing.width); + try testing.expect(cmd.kitty_text_sizing.valign == .top); + try testing.expectEqual(0, cmd.kitty_text_sizing.numerator); +} + +test "OSC 66: UTF-8" { + const testing = std.testing; + + var p: Parser = .init(null); + + const input = "66;;👻魑魅魍魉ゴースッティ"; + for (input) |ch| p.next(ch); + + const cmd = p.end('\x1b').?.*; + try testing.expect(cmd == .kitty_text_sizing); + try testing.expectEqualStrings("👻魑魅魍魉ゴースッティ", cmd.kitty_text_sizing.text); +} + +test "OSC 66: unsafe UTF-8" { + const testing = std.testing; + + var p: Parser = .init(null); + + const input = "66;;\n"; + for (input) |ch| p.next(ch); + + try testing.expect(p.end('\x1b') == null); +} + +test "OSC 66: overlong UTF-8" { + const testing = std.testing; + + var p: Parser = .init(null); + + const input = "66;;" ++ "bobr" ** 1025; + for (input) |ch| p.next(ch); + + try testing.expect(p.end('\x1b') == null); +} diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index eef249327..74a01e8a6 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -2107,6 +2107,7 @@ pub fn Stream(comptime Handler: type) type { .conemu_change_tab_title, .conemu_wait_input, .conemu_guimacro, + .kitty_text_sizing, => { log.debug("unimplemented OSC callback: {}", .{cmd}); },