unicode: emoji modifier requires emoji modifier base preceding to not break

Fixes #2941 This fixes the rendering of the text below. For those that can't see it, it is the following in UTF-32: `0x22 0x1F3FF 0x22`. ``` "🏿" ``` `0x1F3FF` is the Fitzpatrick modifier for dark skin tone. It has the Unicode property `Emoji_Modifier`. Emoji modifiers are defined in UTS #51 and are only valid based on ED-13: ``` emoji_modifier_sequence := emoji_modifier_base emoji_modifier emoji_modifier_base := \p{Emoji_Modifier_Base} emoji_modifier := \p{Emoji_Modifier} ``` Additional quote from UTS #51: > To have an effect on an emoji, an emoji modifier must immediately follow > that base emoji character. Emoji presentation selectors are neither needed > nor recommended for emoji characters when they are followed by emoji > modifiers, and should not be used in newly generated emoji modifier > sequences; the emoji modifier automatically implies the emoji presentation > style. Our precomputed grapheme break table was mistakingly not following this rule. This commit fixes that by adding a check for that every `Emoji_Modifier` character must be preceded by an `Emoji_Modifier_Base`. This only has a cost during compilation (table generation). The runtime cost is identical; the table size didn't increase since we had leftover bits we could use.
2025-10-06 01:46:33 +00:00 · 2024-12-12 12:22:12 -08:00
parent 9f75d93a55
commit fd1201323e
3 changed files with 98 additions and 5 deletions
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@@ -342,7 +342,7 @@ pub fn print(self: *Terminal, c: u21) !void {
            if (c == 0xFE0F or c == 0xFE0E) {
                // This only applies to emoji
                const prev_props = unicode.getProperties(prev.cell.content.codepoint);
-                const emoji = prev_props.grapheme_boundary_class == .extended_pictographic;
+                const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
                if (!emoji) return;

                switch (c) {
@@ -3193,6 +3193,51 @@ test "Terminal: print multicodepoint grapheme, mode 2027" {
    }
 }

+test "Terminal: Fitzpatrick skin tone next to non-base" {
+    var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
+    defer t.deinit(testing.allocator);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    // This is: "🏿" (which may not render correctly in your editor!)
+    try t.print(0x22); // "
+    try t.print(0x1F3FF); // Dark skin tone
+    try t.print(0x22); // "
+
+    // We should have 4 cells taken up. Importantly, the skin tone
+    // should not join with the quotes.
+    try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
+    try testing.expectEqual(@as(usize, 4), t.screen.cursor.x);
+
+    // Row should be dirty
+    try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
+
+    // Assert various properties about our screen to verify
+    // we have all expected cells.
+    {
+        const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
+        const cell = list_cell.cell;
+        try testing.expectEqual(@as(u21, 0x22), cell.content.codepoint);
+        try testing.expect(!cell.hasGrapheme());
+        try testing.expectEqual(Cell.Wide.narrow, cell.wide);
+    }
+    {
+        const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 1, .y = 0 } }).?;
+        const cell = list_cell.cell;
+        try testing.expectEqual(@as(u21, 0x1F3FF), cell.content.codepoint);
+        try testing.expect(!cell.hasGrapheme());
+        try testing.expectEqual(Cell.Wide.wide, cell.wide);
+    }
+    {
+        const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 3, .y = 0 } }).?;
+        const cell = list_cell.cell;
+        try testing.expectEqual(@as(u21, 0x22), cell.content.codepoint);
+        try testing.expect(!cell.hasGrapheme());
+        try testing.expectEqual(Cell.Wide.narrow, cell.wide);
+    }
+}
+
 test "Terminal: multicodepoint grapheme marks dirty on every codepoint" {
    var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
    defer t.deinit(testing.allocator);