refactor: use u21 for Unicode codepoints and Zig 0.15 ArrayList

- Change all codepoint types from u32 to u21 to align with Zig stdlib
- Update ArrayList to use Zig 0.15 unmanaged pattern (.empty)
- Remove unnecessary @intCast when encoding UTF-8
- Fix formatEntry to use stack-allocated buffer
This commit is contained in:
mauroporras
2025-10-26 11:02:31 -05:00
committed by Mitchell Hashimoto
parent 6f662d70bc
commit 9b7c20f500
3 changed files with 53 additions and 75 deletions

View File

@@ -316,7 +316,7 @@ const DerivedConfig = struct {
macos_option_as_alt: ?input.OptionAsAlt,
selection_clear_on_copy: bool,
selection_clear_on_typing: bool,
selection_word_chars: []const u32,
selection_word_chars: []const u21,
vt_kam_allowed: bool,
wait_after_command: bool,
window_padding_top: u32,

View File

@@ -5788,13 +5788,13 @@ pub const RepeatableString = struct {
/// SelectionWordChars stores the parsed codepoints for word boundary
/// characters used during text selection. The string is parsed once
/// during configuration and stored as u32 codepoints for efficient
/// during configuration and stored as u21 codepoints for efficient
/// lookup during selection operations.
pub const SelectionWordChars = struct {
const Self = @This();
/// Default boundary characters: ` \t'"│`|:;,()[]{}<>$`
const default_codepoints = [_]u32{
const default_codepoints = [_]u21{
0, // null
' ', // space
'\t', // tab
@@ -5818,58 +5818,60 @@ pub const SelectionWordChars = struct {
};
/// The parsed codepoints. Always includes null (U+0000) at index 0.
codepoints: []const u32 = &default_codepoints,
codepoints: []const u21 = &default_codepoints,
pub fn parseCLI(self: *Self, alloc: Allocator, input: ?[]const u8) !void {
const value = input orelse return error.ValueRequired;
// Parse UTF-8 string into codepoints
var list = std.ArrayList(u32).init(alloc);
defer list.deinit();
var list: std.ArrayList(u21) = .empty;
defer list.deinit(alloc);
// Always include null as first boundary
try list.append(0);
try list.append(alloc, 0);
// Parse the UTF-8 string
const utf8_view = std.unicode.Utf8View.init(value) catch {
// Invalid UTF-8, just use null boundary
self.codepoints = try list.toOwnedSlice();
self.codepoints = try list.toOwnedSlice(alloc);
return;
};
var utf8_it = utf8_view.iterator();
while (utf8_it.nextCodepoint()) |codepoint| {
try list.append(codepoint);
try list.append(alloc, codepoint);
}
self.codepoints = try list.toOwnedSlice();
self.codepoints = try list.toOwnedSlice(alloc);
}
/// Deep copy of the struct. Required by Config.
pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!Self {
const copy = try alloc.dupe(u32, self.codepoints);
const copy = try alloc.dupe(u21, self.codepoints);
return .{ .codepoints = copy };
}
/// Compare if two values are equal. Required by Config.
pub fn equal(self: Self, other: Self) bool {
return std.mem.eql(u32, self.codepoints, other.codepoints);
return std.mem.eql(u21, self.codepoints, other.codepoints);
}
/// Used by Formatter
pub fn formatEntry(self: Self, formatter: formatterpkg.EntryFormatter) !void {
// Convert codepoints back to UTF-8 string for display
var buf = std.ArrayList(u8).init(formatter.alloc);
defer buf.deinit();
var buf: [4096]u8 = undefined;
var pos: usize = 0;
// Skip the null character at index 0
for (self.codepoints[1..]) |codepoint| {
var utf8_buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(@intCast(codepoint), &utf8_buf) catch continue;
try buf.appendSlice(utf8_buf[0..len]);
const len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch continue;
if (pos + len > buf.len) break;
@memcpy(buf[pos..][0..len], utf8_buf[0..len]);
pos += len;
}
try formatter.formatEntry([]const u8, buf.items);
try formatter.formatEntry([]const u8, buf[0..pos]);
}
test "parseCLI" {
@@ -5883,11 +5885,11 @@ pub const SelectionWordChars = struct {
// Should have null + 4 characters
try testing.expectEqual(@as(usize, 5), chars.codepoints.len);
try testing.expectEqual(@as(u32, 0), chars.codepoints[0]);
try testing.expectEqual(@as(u32, ' '), chars.codepoints[1]);
try testing.expectEqual(@as(u32, '\t'), chars.codepoints[2]);
try testing.expectEqual(@as(u32, ';'), chars.codepoints[3]);
try testing.expectEqual(@as(u32, ','), chars.codepoints[4]);
try testing.expectEqual(@as(u21, 0), chars.codepoints[0]);
try testing.expectEqual(@as(u21, ' '), chars.codepoints[1]);
try testing.expectEqual(@as(u21, '\t'), chars.codepoints[2]);
try testing.expectEqual(@as(u21, ';'), chars.codepoints[3]);
try testing.expectEqual(@as(u21, ','), chars.codepoints[4]);
}
};

View File

@@ -2617,7 +2617,7 @@ pub fn selectAll(self: *Screen) ?Selection {
/// end_pt (inclusive). Because it selects "nearest" to start point, start
/// point can be before or after end point.
///
/// The boundary_codepoints parameter should be a slice of u32 codepoints that
/// The boundary_codepoints parameter should be a slice of u21 codepoints that
/// mark word boundaries, passed through to selectWord.
///
/// TODO: test this
@@ -2625,7 +2625,7 @@ pub fn selectWordBetween(
self: *Screen,
start: Pin,
end: Pin,
boundary_codepoints: []const u32,
boundary_codepoints: []const u21,
) ?Selection {
const dir: PageList.Direction = if (start.before(end)) .right_down else .left_up;
var it = start.cellIterator(dir, end);
@@ -2650,12 +2650,12 @@ pub fn selectWordBetween(
/// This will return null if a selection is impossible. The only scenario
/// this happens is if the point pt is outside of the written screen space.
///
/// The boundary_codepoints parameter should be a slice of u32 codepoints that
/// The boundary_codepoints parameter should be a slice of u21 codepoints that
/// mark word boundaries. This is expected to be pre-parsed from the config.
pub fn selectWord(
self: *Screen,
pin: Pin,
boundary_codepoints: []const u32,
boundary_codepoints: []const u21,
) ?Selection {
_ = self;
@@ -2666,9 +2666,9 @@ pub fn selectWord(
// Determine if we are a boundary or not to determine what our boundary is.
const expect_boundary = std.mem.indexOfAny(
u32,
u21,
boundary_codepoints,
&[_]u32{start_cell.content.codepoint},
&[_]u21{start_cell.content.codepoint},
) != null;
// Go forwards to find our end boundary
@@ -2684,9 +2684,9 @@ pub fn selectWord(
// If we do not match our expected set, we hit a boundary
const this_boundary = std.mem.indexOfAny(
u32,
u21,
boundary_codepoints,
&[_]u32{cell.content.codepoint},
&[_]u21{cell.content.codepoint},
) != null;
if (this_boundary != expect_boundary) break :end prev;
@@ -2721,9 +2721,9 @@ pub fn selectWord(
// If we do not match our expected set, we hit a boundary
const this_boundary = std.mem.indexOfAny(
u32,
u21,
boundary_codepoints,
&[_]u32{cell.content.codepoint},
&[_]u21{cell.content.codepoint},
) != null;
if (this_boundary != expect_boundary) break :start prev;
@@ -7687,9 +7687,9 @@ test "Screen: selectWord" {
try s.testWriteString("ABC DEF\n 123\n456");
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u32{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
const boundary_codepoints = &[_]u21{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
// Outside of active area
@@ -7808,9 +7808,9 @@ test "Screen: selectWord across soft-wrap" {
try s.testWriteString(" 1234012\n 123");
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u32{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
const boundary_codepoints = &[_]u21{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
{
@@ -7880,9 +7880,9 @@ test "Screen: selectWord whitespace across soft-wrap" {
try s.testWriteString("1 1\n 123");
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u32{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
const boundary_codepoints = &[_]u21{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
// Going forward
@@ -7942,9 +7942,9 @@ test "Screen: selectWord with character boundary" {
const alloc = testing.allocator;
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u32{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
const boundary_codepoints = &[_]u21{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
const cases = [_][]const u8{
@@ -8051,12 +8051,6 @@ test "Screen: selectOutput" {
var s = try init(alloc, .{ .cols = 10, .rows = 15, .max_scrollback = 0 });
defer s.deinit();
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u32{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
// zig fmt: off
{
// line number:
@@ -8082,7 +8076,7 @@ test "Screen: selectOutput" {
var sel = s.selectOutput(s.pages.pin(.{ .active = .{
.x = 1,
.y = 1,
} }).?, boundary_codepoints).?;
} }).?).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .active = .{
.x = 0,
@@ -8098,7 +8092,7 @@ test "Screen: selectOutput" {
var sel = s.selectOutput(s.pages.pin(.{ .active = .{
.x = 3,
.y = 7,
} }).?, boundary_codepoints).?;
} }).?).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .active = .{
.x = 0,
@@ -8114,7 +8108,7 @@ test "Screen: selectOutput" {
var sel = s.selectOutput(s.pages.pin(.{ .active = .{
.x = 2,
.y = 10,
} }).?, boundary_codepoints).?;
} }).?).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .active = .{
.x = 0,
@@ -8147,12 +8141,6 @@ test "Screen: selectPrompt basics" {
var s = try init(alloc, .{ .cols = 10, .rows = 15, .max_scrollback = 0 });
defer s.deinit();
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u32{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
// zig fmt: off
{
// line number:
@@ -8191,7 +8179,7 @@ test "Screen: selectPrompt basics" {
var sel = s.selectPrompt(s.pages.pin(.{ .active = .{
.x = 1,
.y = 6,
} }).?, boundary_codepoints).?;
} }).?).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,
@@ -8208,7 +8196,7 @@ test "Screen: selectPrompt basics" {
var sel = s.selectPrompt(s.pages.pin(.{ .active = .{
.x = 1,
.y = 3,
} }).?, boundary_codepoints).?;
} }).?).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,
@@ -8228,12 +8216,6 @@ test "Screen: selectPrompt prompt at start" {
var s = try init(alloc, .{ .cols = 10, .rows = 15, .max_scrollback = 0 });
defer s.deinit();
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u32{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
// zig fmt: off
{
// line number:
@@ -8258,7 +8240,7 @@ test "Screen: selectPrompt prompt at start" {
var sel = s.selectPrompt(s.pages.pin(.{ .active = .{
.x = 1,
.y = 1,
} }).?, boundary_codepoints).?;
} }).?).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,
@@ -8278,12 +8260,6 @@ test "Screen: selectPrompt prompt at end" {
var s = try init(alloc, .{ .cols = 10, .rows = 15, .max_scrollback = 0 });
defer s.deinit();
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u32{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
// zig fmt: off
{
// line number:
@@ -8308,7 +8284,7 @@ test "Screen: selectPrompt prompt at end" {
var sel = s.selectPrompt(s.pages.pin(.{ .active = .{
.x = 1,
.y = 2,
} }).?, boundary_codepoints).?;
} }).?).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,