config: selection-word-chars parses escape sequences

Fixes #10548

Escaped characters in selection-word-chars are now correctly parsed,
allowing for characters like `\t` to be included in the set of word
characters.
This commit is contained in:
Mitchell Hashimoto
2026-02-21 14:37:58 -08:00
parent 548930a742
commit cdfa73b403
2 changed files with 133 additions and 10 deletions

View File

@@ -39,6 +39,7 @@ pub const Path = @import("path.zig").Path;
pub const RepeatablePath = @import("path.zig").RepeatablePath;
const ClipboardCodepointMap = @import("ClipboardCodepointMap.zig");
const KeyRemapSet = @import("../input/key_mods.zig").RemapSet;
const string = @import("string.zig");
// We do this instead of importing all of terminal/main.zig to
// limit the dependency graph. This is important because some things
@@ -5965,22 +5966,15 @@ pub const SelectionWordChars = struct {
pub fn parseCLI(self: *Self, alloc: Allocator, input: ?[]const u8) !void {
const value = input orelse return error.ValueRequired;
// Parse UTF-8 string into codepoints
// Parse string with Zig escape sequence support into codepoints
var list: std.ArrayList(u21) = .empty;
defer list.deinit(alloc);
// Always include null as first boundary
try list.append(alloc, 0);
// Parse the UTF-8 string
const utf8_view = std.unicode.Utf8View.init(value) catch {
// Invalid UTF-8, just use null boundary
self.codepoints = try list.toOwnedSlice(alloc);
return;
};
var utf8_it = utf8_view.iterator();
while (utf8_it.nextCodepoint()) |codepoint| {
var it = string.codepointIterator(value);
while (it.next() catch return error.InvalidValue) |codepoint| {
try list.append(alloc, codepoint);
}
@@ -6033,6 +6027,56 @@ pub const SelectionWordChars = struct {
try testing.expectEqual(@as(u21, ';'), chars.codepoints[3]);
try testing.expectEqual(@as(u21, ','), chars.codepoints[4]);
}
test "parseCLI escape sequences" {
const testing = std.testing;
var arena = ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
// \t escape should be parsed as tab
var chars: Self = .{};
try chars.parseCLI(alloc, " \\t;,");
try testing.expectEqual(@as(usize, 5), chars.codepoints.len);
try testing.expectEqual(@as(u21, 0), chars.codepoints[0]);
try testing.expectEqual(@as(u21, ' '), chars.codepoints[1]);
try testing.expectEqual(@as(u21, '\t'), chars.codepoints[2]);
try testing.expectEqual(@as(u21, ';'), chars.codepoints[3]);
try testing.expectEqual(@as(u21, ','), chars.codepoints[4]);
}
test "parseCLI backslash escape" {
const testing = std.testing;
var arena = ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
// \\ should be parsed as a single backslash
var chars: Self = .{};
try chars.parseCLI(alloc, "\\\\;");
try testing.expectEqual(@as(usize, 3), chars.codepoints.len);
try testing.expectEqual(@as(u21, 0), chars.codepoints[0]);
try testing.expectEqual(@as(u21, '\\'), chars.codepoints[1]);
try testing.expectEqual(@as(u21, ';'), chars.codepoints[2]);
}
test "parseCLI unicode escape" {
const testing = std.testing;
var arena = ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
// \u{2502} should be parsed as │
var chars: Self = .{};
try chars.parseCLI(alloc, "\\u{2502};");
try testing.expectEqual(@as(usize, 3), chars.codepoints.len);
try testing.expectEqual(@as(u21, 0), chars.codepoints[0]);
try testing.expectEqual(@as(u21, '│'), chars.codepoints[1]);
try testing.expectEqual(@as(u21, ';'), chars.codepoints[2]);
}
};
/// FontVariation is a repeatable configuration value that sets a single

View File

@@ -36,6 +36,40 @@ pub fn parse(out: []u8, bytes: []const u8) ![]u8 {
return out[0..dst_i];
}
/// Creates an iterator that requires no allocation to extract codepoints
/// from the string literal, parsing escape sequences as it goes.
pub fn codepointIterator(bytes: []const u8) CodepointIterator {
return .{ .bytes = bytes, .i = 0 };
}
pub const CodepointIterator = struct {
bytes: []const u8,
i: usize,
pub fn next(self: *CodepointIterator) error{InvalidString}!?u21 {
if (self.i >= self.bytes.len) return null;
switch (self.bytes[self.i]) {
// An escape sequence
'\\' => return switch (std.zig.string_literal.parseEscapeSequence(
self.bytes,
&self.i,
)) {
.failure => error.InvalidString,
.success => |cp| cp,
},
// Not an escape, parse as UTF-8
else => |start| {
const cp_len = std.unicode.utf8ByteSequenceLength(start) catch
return error.InvalidString;
defer self.i += cp_len;
return std.unicode.utf8Decode(self.bytes[self.i..][0..cp_len]) catch
return error.InvalidString;
},
}
}
};
test "parse: empty" {
const testing = std.testing;
@@ -65,3 +99,48 @@ test "parse: escapes" {
try testing.expectEqualStrings("hello\u{1F601}world", result);
}
}
test "codepointIterator: empty" {
var it = codepointIterator("");
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: ascii no escapes" {
var it = codepointIterator("abc");
try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'c'), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: multibyte utf8" {
// │ is U+2502 (3 bytes in UTF-8)
var it = codepointIterator("a│b");
try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: escape sequences" {
var it = codepointIterator("a\\tb\\n\\\\");
try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, '\t'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, '\n'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, '\\'), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: unicode escape" {
var it = codepointIterator("\\u{2502}x");
try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'x'), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: emoji unicode escape" {
var it = codepointIterator("\\u{1F601}");
try std.testing.expectEqual(@as(u21, 0x1F601), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}