From cdfa73b403d2c7c26201311c9a7706da4ef11129 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 21 Feb 2026 14:37:58 -0800 Subject: [PATCH] config: selection-word-chars parses escape sequences Fixes #10548 Escaped characters in selection-word-chars are now correctly parsed, allowing for characters like `\t` to be included in the set of word characters. --- src/config/Config.zig | 64 +++++++++++++++++++++++++++++------ src/config/string.zig | 79 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 10 deletions(-) diff --git a/src/config/Config.zig b/src/config/Config.zig index 4e1ed1f4b..c1888dbe8 100644 --- a/src/config/Config.zig +++ b/src/config/Config.zig @@ -39,6 +39,7 @@ pub const Path = @import("path.zig").Path; pub const RepeatablePath = @import("path.zig").RepeatablePath; const ClipboardCodepointMap = @import("ClipboardCodepointMap.zig"); const KeyRemapSet = @import("../input/key_mods.zig").RemapSet; +const string = @import("string.zig"); // We do this instead of importing all of terminal/main.zig to // limit the dependency graph. This is important because some things @@ -5965,22 +5966,15 @@ pub const SelectionWordChars = struct { pub fn parseCLI(self: *Self, alloc: Allocator, input: ?[]const u8) !void { const value = input orelse return error.ValueRequired; - // Parse UTF-8 string into codepoints + // Parse string with Zig escape sequence support into codepoints var list: std.ArrayList(u21) = .empty; defer list.deinit(alloc); // Always include null as first boundary try list.append(alloc, 0); - // Parse the UTF-8 string - const utf8_view = std.unicode.Utf8View.init(value) catch { - // Invalid UTF-8, just use null boundary - self.codepoints = try list.toOwnedSlice(alloc); - return; - }; - - var utf8_it = utf8_view.iterator(); - while (utf8_it.nextCodepoint()) |codepoint| { + var it = string.codepointIterator(value); + while (it.next() catch return error.InvalidValue) |codepoint| { try list.append(alloc, codepoint); } @@ -6033,6 +6027,56 @@ pub const SelectionWordChars = struct { try testing.expectEqual(@as(u21, ';'), chars.codepoints[3]); try testing.expectEqual(@as(u21, ','), chars.codepoints[4]); } + + test "parseCLI escape sequences" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // \t escape should be parsed as tab + var chars: Self = .{}; + try chars.parseCLI(alloc, " \\t;,"); + + try testing.expectEqual(@as(usize, 5), chars.codepoints.len); + try testing.expectEqual(@as(u21, 0), chars.codepoints[0]); + try testing.expectEqual(@as(u21, ' '), chars.codepoints[1]); + try testing.expectEqual(@as(u21, '\t'), chars.codepoints[2]); + try testing.expectEqual(@as(u21, ';'), chars.codepoints[3]); + try testing.expectEqual(@as(u21, ','), chars.codepoints[4]); + } + + test "parseCLI backslash escape" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // \\ should be parsed as a single backslash + var chars: Self = .{}; + try chars.parseCLI(alloc, "\\\\;"); + + try testing.expectEqual(@as(usize, 3), chars.codepoints.len); + try testing.expectEqual(@as(u21, 0), chars.codepoints[0]); + try testing.expectEqual(@as(u21, '\\'), chars.codepoints[1]); + try testing.expectEqual(@as(u21, ';'), chars.codepoints[2]); + } + + test "parseCLI unicode escape" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // \u{2502} should be parsed as │ + var chars: Self = .{}; + try chars.parseCLI(alloc, "\\u{2502};"); + + try testing.expectEqual(@as(usize, 3), chars.codepoints.len); + try testing.expectEqual(@as(u21, 0), chars.codepoints[0]); + try testing.expectEqual(@as(u21, '│'), chars.codepoints[1]); + try testing.expectEqual(@as(u21, ';'), chars.codepoints[2]); + } }; /// FontVariation is a repeatable configuration value that sets a single diff --git a/src/config/string.zig b/src/config/string.zig index 71826f005..450799373 100644 --- a/src/config/string.zig +++ b/src/config/string.zig @@ -36,6 +36,40 @@ pub fn parse(out: []u8, bytes: []const u8) ![]u8 { return out[0..dst_i]; } +/// Creates an iterator that requires no allocation to extract codepoints +/// from the string literal, parsing escape sequences as it goes. +pub fn codepointIterator(bytes: []const u8) CodepointIterator { + return .{ .bytes = bytes, .i = 0 }; +} + +pub const CodepointIterator = struct { + bytes: []const u8, + i: usize, + + pub fn next(self: *CodepointIterator) error{InvalidString}!?u21 { + if (self.i >= self.bytes.len) return null; + switch (self.bytes[self.i]) { + // An escape sequence + '\\' => return switch (std.zig.string_literal.parseEscapeSequence( + self.bytes, + &self.i, + )) { + .failure => error.InvalidString, + .success => |cp| cp, + }, + + // Not an escape, parse as UTF-8 + else => |start| { + const cp_len = std.unicode.utf8ByteSequenceLength(start) catch + return error.InvalidString; + defer self.i += cp_len; + return std.unicode.utf8Decode(self.bytes[self.i..][0..cp_len]) catch + return error.InvalidString; + }, + } + } +}; + test "parse: empty" { const testing = std.testing; @@ -65,3 +99,48 @@ test "parse: escapes" { try testing.expectEqualStrings("hello\u{1F601}world", result); } } + +test "codepointIterator: empty" { + var it = codepointIterator(""); + try std.testing.expectEqual(null, try it.next()); +} + +test "codepointIterator: ascii no escapes" { + var it = codepointIterator("abc"); + try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, 'c'), (try it.next()).?); + try std.testing.expectEqual(null, try it.next()); +} + +test "codepointIterator: multibyte utf8" { + // │ is U+2502 (3 bytes in UTF-8) + var it = codepointIterator("a│b"); + try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?); + try std.testing.expectEqual(null, try it.next()); +} + +test "codepointIterator: escape sequences" { + var it = codepointIterator("a\\tb\\n\\\\"); + try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, '\t'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, '\n'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, '\\'), (try it.next()).?); + try std.testing.expectEqual(null, try it.next()); +} + +test "codepointIterator: unicode escape" { + var it = codepointIterator("\\u{2502}x"); + try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?); + try std.testing.expectEqual(@as(u21, 'x'), (try it.next()).?); + try std.testing.expectEqual(null, try it.next()); +} + +test "codepointIterator: emoji unicode escape" { + var it = codepointIterator("\\u{1F601}"); + try std.testing.expectEqual(@as(u21, 0x1F601), (try it.next()).?); + try std.testing.expectEqual(null, try it.next()); +}