unicode: change cell to wide when grapheme width changes

This commit is contained in:
Jacob Sandlund
2025-11-24 04:47:23 -05:00
parent b9ab2b8c4f
commit 6afdf3400e
5 changed files with 371 additions and 53 deletions

View File

@@ -94,6 +94,7 @@ pub const tables = [_]config.Table{
},
.fields = &.{
width.field("width"),
wcwidth.field("wcwidth_zero_in_grapheme"),
grapheme_break_no_control.field("grapheme_break_no_control"),
is_symbol.field("is_symbol"),
d.field("is_emoji_vs_text"),

View File

@@ -322,10 +322,13 @@ pub fn print(self: *Terminal, c: u21) !void {
// break or not. If we are NOT, then we are still combining the
// same grapheme. Otherwise, we can stay in this cell.
const Prev = struct { cell: *Cell, left: size.CellCountInt };
const prev: Prev = prev: {
var prev: Prev = prev: {
const left: size.CellCountInt = left: {
// If we have wraparound, then we always use the prev col
if (self.modes.get(.wraparound)) break :left 1;
// If we have wraparound, then we use the prev col unless
// there's a pending wrap, in which case we use the current.
if (self.modes.get(.wraparound)) {
break :left @intFromBool(!self.screens.active.cursor.pending_wrap);
}
// If we do not have wraparound, the logic is trickier. If
// we're not on the last column, then we just use the previous
@@ -371,6 +374,8 @@ pub fn print(self: *Terminal, c: u21) !void {
// If we can NOT break, this means that "c" is part of a grapheme
// with the previous char.
if (!grapheme_break) {
var desired_wide: enum { no_change, wide, narrow } = .no_change;
// If this is an emoji variation selector then we need to modify
// the cell width accordingly. VS16 makes the character wide and
// VS15 makes it narrow.
@@ -392,71 +397,121 @@ pub fn print(self: *Terminal, c: u21) !void {
}
switch (c) {
0xFE0F => wide: {
if (prev.cell.wide == .wide) break :wide;
0xFE0F => desired_wide = .wide,
0xFE0E => desired_wide = .narrow,
else => unreachable,
}
} else if (!unicode.table.get(c).width_zero_in_grapheme) {
desired_wide = .wide;
}
// Move our cursor back to the previous. We'll move
// the cursor within this block to the proper location.
self.screens.active.cursorLeft(prev.left);
switch (desired_wide) {
.wide => wide: {
if (prev.cell.wide == .wide) break :wide;
// If we don't have space for the wide char, we need
// to insert spacers and wrap. Then we just print the wide
// char as normal.
if (self.screens.active.cursor.x == right_limit - 1) {
if (!self.modes.get(.wraparound)) return;
// Move our cursor back to the previous. We'll move
// the cursor within this block to the proper location.
self.screens.active.cursorLeft(prev.left);
// If we don't have space for the wide char, we need to
// insert spacers and wrap. We need special handling if the
// previous cell has grapheme data.
if (self.screens.active.cursor.x == right_limit - 1) {
if (!self.modes.get(.wraparound)) return;
const prev_cp = prev.cell.content.codepoint;
if (prev.cell.hasGrapheme()) {
// This is like printCell but without clearing the
// grapheme data from the cell, so we can move it
// later.
prev.cell.wide = if (right_limit == self.cols) .spacer_head else .narrow;
prev.cell.content.codepoint = 0;
try self.printWrap();
self.printCell(prev_cp, .wide);
const new_pin = self.screens.active.cursor.page_pin.*;
const new_rac = new_pin.rowAndCell();
transfer_graphemes: {
var old_pin = self.screens.active.cursor.page_pin.up(1) orelse break :transfer_graphemes;
old_pin.x = right_limit - 1;
const old_rac = old_pin.rowAndCell();
if (new_pin.node == old_pin.node) {
new_pin.node.data.moveGrapheme(prev.cell, new_rac.cell);
prev.cell.content_tag = .codepoint;
new_rac.cell.content_tag = .codepoint_grapheme;
new_rac.row.grapheme = true;
} else {
const cps = old_pin.node.data.lookupGrapheme(old_rac.cell).?;
for (cps) |cp| {
try self.screens.active.appendGrapheme(new_rac.cell, cp);
}
old_pin.node.data.clearGrapheme(old_rac.cell);
}
old_pin.node.data.updateRowGraphemeFlag(old_rac.row);
}
prev.cell = new_rac.cell;
} else {
self.printCell(
0,
if (right_limit == self.cols) .spacer_head else .narrow,
);
try self.printWrap();
self.printCell(prev_cp, .wide);
prev.cell = self.screens.active.cursor.page_cell;
}
} else {
prev.cell.wide = .wide;
}
self.printCell(prev.cell.content.codepoint, .wide);
// Write our spacer
self.screens.active.cursorRight(1);
self.printCell(0, .spacer_tail);
// Write our spacer
// Move the cursor again so we're beyond our spacer
if (self.screens.active.cursor.x == right_limit - 1) {
self.screens.active.cursor.pending_wrap = true;
} else {
self.screens.active.cursorRight(1);
self.printCell(0, .spacer_tail);
}
},
// Move the cursor again so we're beyond our spacer
if (self.screens.active.cursor.x == right_limit - 1) {
self.screens.active.cursor.pending_wrap = true;
} else {
self.screens.active.cursorRight(1);
}
},
.narrow => narrow: {
// Prev cell is no longer wide
if (prev.cell.wide != .wide) break :narrow;
prev.cell.wide = .narrow;
0xFE0E => narrow: {
// Prev cell is no longer wide
if (prev.cell.wide != .wide) break :narrow;
prev.cell.wide = .narrow;
// Remove the wide spacer tail
const cell = self.screens.active.cursorCellLeft(prev.left - 1);
cell.wide = .narrow;
// Remove the wide spacer tail
const cell = self.screens.active.cursorCellLeft(prev.left - 1);
cell.wide = .narrow;
// Back track the cursor so that we don't end up with
// an extra space after the character. Since xterm is
// not VS aware, it cannot be used as a reference for
// this behavior; but it does follow the principle of
// least surprise, and also matches the behavior that
// can be observed in Kitty, which is one of the only
// other VS aware terminals.
if (self.screens.active.cursor.x == right_limit - 1) {
// If we're already at the right edge, we stay
// here and set the pending wrap to false since
// when we pend a wrap, we only move our cursor once
// even for wide chars (tests verify).
self.screens.active.cursor.pending_wrap = false;
} else {
// Otherwise, move back.
self.screens.active.cursorLeft(1);
}
// Back track the cursor so that we don't end up with
// an extra space after the character. Since xterm is
// not VS aware, it cannot be used as a reference for
// this behavior; but it does follow the principle of
// least surprise, and also matches the behavior that
// can be observed in Kitty, which is one of the only
// other VS aware terminals.
if (self.screens.active.cursor.x == right_limit - 1) {
// If we're already at the right edge, we stay
// here and set the pending wrap to false since
// when we pend a wrap, we only move our cursor once
// even for wide chars (tests verify).
self.screens.active.cursor.pending_wrap = false;
} else {
// Otherwise, move back.
self.screens.active.cursorLeft(1);
}
break :narrow;
},
break :narrow;
},
else => unreachable,
}
else => {},
}
log.debug("c={X} grapheme attach to left={} primary_cp={X}", .{
@@ -3645,6 +3700,92 @@ test "Terminal: VS15 to make narrow character with pending wrap" {
}
}
test "Terminal: VS16 to make wide character on next line" {
var t = try init(testing.allocator, .{ .rows = 5, .cols = 3 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
t.cursorRight(2);
try t.print('#');
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
try testing.expect(t.screens.active.cursor.pending_wrap);
try testing.expect(t.isDirty(.{ .screen = .{ .x = 2, .y = 0 } }));
t.clearDirty();
try t.print(0xFE0F); // VS16 to make wide
try testing.expect(t.isDirty(.{ .screen = .{ .x = 2, .y = 0 } }));
t.clearDirty();
try testing.expectEqual(@as(usize, 1), t.screens.active.cursor.y);
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
try testing.expect(!t.screens.active.cursor.pending_wrap);
{
// Previous cell turns into spacer_head
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 2, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
try testing.expect(!cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.spacer_head, cell.wide);
}
{
// '#' cell is wide
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 0, .y = 1 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, '#'), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqualSlices(u21, &.{0xFE0F}, list_cell.node.data.lookupGrapheme(cell).?);
try testing.expectEqual(Cell.Wide.wide, cell.wide);
}
{
// spacer_tail
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 1, .y = 1 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
try testing.expect(!cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.spacer_tail, cell.wide);
}
}
test "Terminal: VS16 to make wide character with pending wrap" {
var t = try init(testing.allocator, .{ .rows = 5, .cols = 3 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
t.cursorRight(1);
try t.print('#');
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
try testing.expect(!t.screens.active.cursor.pending_wrap);
try t.print(0xFE0F); // VS16 to make wide
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
try testing.expect(t.screens.active.cursor.pending_wrap);
{
// '#' cell is wide
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 1, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, '#'), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqualSlices(u21, &.{0xFE0F}, list_cell.node.data.lookupGrapheme(cell).?);
try testing.expectEqual(Cell.Wide.wide, cell.wide);
}
{
// spacer_tail
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 2, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
try testing.expect(!cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.spacer_tail, cell.wide);
}
}
test "Terminal: VS16 to make wide character with mode 2027" {
var t = try init(testing.allocator, .{ .rows = 5, .cols = 5 });
defer t.deinit(testing.allocator);
@@ -3783,6 +3924,173 @@ test "Terminal: print invalid VS16 with second char" {
}
}
test "Terminal: print grapheme ò (o with nonspacing mark) should be narrow" {
var t = try init(testing.allocator, .{ .rows = 5, .cols = 5 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
try t.print('o');
try t.print(0x0300); // combining grave accent
// We should have 1 cell taken up.
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
try testing.expectEqual(@as(usize, 1), t.screens.active.cursor.x);
// Assert various properties about our screen to verify
// we have all expected cells.
{
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 'o'), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqualSlices(u21, &.{0x0300}, list_cell.node.data.lookupGrapheme(cell).?);
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
}
}
test "Terminal: print Devanagari grapheme should be wide" {
var t = try init(testing.allocator, .{ .rows = 5, .cols = 5 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
// क्‍ष
try t.print(0x0915);
try t.print(0x094D);
try t.print(0x200D);
try t.print(0x0937);
// We should have 2 cells taken up. It is one character but "wide".
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
// Assert various properties about our screen to verify
// we have all expected cells.
{
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x0915), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqualSlices(u21, &.{ 0x094D, 0x200D, 0x0937 }, list_cell.node.data.lookupGrapheme(cell).?);
try testing.expectEqual(Cell.Wide.wide, cell.wide);
}
{
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 1, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(Cell.Wide.spacer_tail, cell.wide);
}
}
test "Terminal: print Devanagari grapheme should be wide on next line" {
var t = try init(testing.allocator, .{ .rows = 5, .cols = 3 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
t.cursorRight(2);
// क्‍ष
try t.print(0x0915);
try t.print(0x094D);
try t.print(0x200D);
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
try testing.expect(t.screens.active.cursor.pending_wrap);
// This one increases the width to wide
try t.print(0x0937);
// We should have 2 cells taken up. It is one character but "wide".
try testing.expectEqual(@as(usize, 1), t.screens.active.cursor.y);
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
try testing.expect(!t.screens.active.cursor.pending_wrap);
{
// Previous cell turns into spacer_head
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 2, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
try testing.expect(!cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.spacer_head, cell.wide);
}
{
// Devanagari grapheme is wide
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 0, .y = 1 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x0915), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqualSlices(u21, &.{ 0x094D, 0x200D, 0x0937 }, list_cell.node.data.lookupGrapheme(cell).?);
try testing.expectEqual(Cell.Wide.wide, cell.wide);
}
{
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 1, .y = 1 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(Cell.Wide.spacer_tail, cell.wide);
}
}
test "Terminal: print Devanagari grapheme should be wide on next page" {
const rows = pagepkg.std_capacity.rows;
const cols = pagepkg.std_capacity.cols;
var t = try init(testing.allocator, .{ .rows = rows, .cols = cols });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
t.cursorDown(rows - 1);
for (rows..t.screens.active.pages.pages.first.?.data.capacity.rows) |_| {
try t.index();
}
t.cursorRight(cols - 1);
try testing.expectEqual(cols - 1, t.screens.active.cursor.x);
try testing.expectEqual(rows - 1, t.screens.active.cursor.y);
// क्‍ष
try t.print(0x0915);
try t.print(0x094D);
try t.print(0x200D);
try testing.expectEqual(cols - 1, t.screens.active.cursor.x);
try testing.expect(t.screens.active.cursor.pending_wrap);
// This one increases the width to wide
try t.print(0x0937);
// We should have 2 cells taken up. It is one character but "wide".
try testing.expectEqual(rows - 1, t.screens.active.cursor.y);
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
try testing.expect(!t.screens.active.cursor.pending_wrap);
{
// Previous cell turns into spacer_head
const list_cell = t.screens.active.pages.getCell(.{ .active = .{ .x = cols - 1, .y = rows - 2 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
try testing.expect(!cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.spacer_head, cell.wide);
}
{
// Devanagari grapheme is wide
const list_cell = t.screens.active.pages.getCell(.{ .active = .{ .x = 0, .y = rows - 1 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x0915), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqualSlices(u21, &.{ 0x094D, 0x200D, 0x0937 }, list_cell.node.data.lookupGrapheme(cell).?);
try testing.expectEqual(Cell.Wide.wide, cell.wide);
}
{
const list_cell = t.screens.active.pages.getCell(.{ .active = .{ .x = 1, .y = rows - 1 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(Cell.Wide.spacer_tail, cell.wide);
}
}
test "Terminal: overwrite grapheme should clear grapheme data" {
var t = try init(testing.allocator, .{ .rows = 5, .cols = 5 });
defer t.deinit(testing.allocator);

View File

@@ -1448,7 +1448,7 @@ pub const Page = struct {
/// WARNING: This will NOT change the content_tag on the cells because
/// there are scenarios where we want to move graphemes without changing
/// the content tag. Callers beware but assertIntegrity should catch this.
inline fn moveGrapheme(self: *Page, src: *Cell, dst: *Cell) void {
pub inline fn moveGrapheme(self: *Page, src: *Cell, dst: *Cell) void {
if (build_options.slow_runtime_safety) {
assert(src.hasGrapheme());
assert(!dst.hasGrapheme());

View File

@@ -13,6 +13,10 @@ pub const Properties = packed struct {
/// becomes a 2-em dash).
width: u2 = 0,
/// Whether the code point does not contribute to the width of a grapheme
/// cluster (not used for single code point cells).
width_zero_in_grapheme: bool = false,
/// Grapheme break property.
grapheme_break: uucode.x.types.GraphemeBreakNoControl = .other,
@@ -23,6 +27,7 @@ pub const Properties = packed struct {
// Needed for lut.Generator
pub fn eql(a: Properties, b: Properties) bool {
return a.width == b.width and
a.width_zero_in_grapheme == b.width_zero_in_grapheme and
a.grapheme_break == b.grapheme_break and
a.emoji_vs_text == b.emoji_vs_text and
a.emoji_vs_emoji == b.emoji_vs_emoji;
@@ -36,12 +41,14 @@ pub const Properties = packed struct {
try writer.print(
\\.{{
\\ .width= {},
\\ .width_zero_in_grapheme= {},
\\ .grapheme_break= .{s},
\\ .emoji_vs_text= {},
\\ .emoji_vs_emoji= {},
\\}}
, .{
self.width,
self.width_zero_in_grapheme,
@tagName(self.grapheme_break),
self.emoji_vs_text,
self.emoji_vs_emoji,

View File

@@ -8,6 +8,7 @@ const Properties = @import("props.zig").Properties;
pub fn get(cp: u21) Properties {
if (cp > uucode.config.max_code_point) return .{
.width = 1,
.width_zero_in_grapheme = true,
.grapheme_break = .other,
.emoji_vs_text = false,
.emoji_vs_emoji = false,
@@ -15,6 +16,7 @@ pub fn get(cp: u21) Properties {
return .{
.width = uucode.get(.width, cp),
.width_zero_in_grapheme = uucode.get(.wcwidth_zero_in_grapheme, cp),
.grapheme_break = uucode.get(.grapheme_break_no_control, cp),
.emoji_vs_text = uucode.get(.is_emoji_vs_text, cp),
.emoji_vs_emoji = uucode.get(.is_emoji_vs_emoji, cp),