From cdfa73b403d2c7c26201311c9a7706da4ef11129 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <m@mitchellh.com>
Date: Sat, 21 Feb 2026 14:37:58 -0800
Subject: [PATCH] config: selection-word-chars parses escape sequences

Fixes #10548

Escaped characters in selection-word-chars are now correctly parsed,
allowing for characters like `\t` to be included in the set of word
characters.
---
 src/config/Config.zig | 64 +++++++++++++++++++++++++++++------
 src/config/string.zig | 79 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 10 deletions(-)

diff --git a/src/config/Config.zig b/src/config/Config.zig
index 4e1ed1f4b..c1888dbe8 100644
--- a/src/config/Config.zig
+++ b/src/config/Config.zig
@@ -39,6 +39,7 @@ pub const Path = @import("path.zig").Path;
 pub const RepeatablePath = @import("path.zig").RepeatablePath;
 const ClipboardCodepointMap = @import("ClipboardCodepointMap.zig");
 const KeyRemapSet = @import("../input/key_mods.zig").RemapSet;
+const string = @import("string.zig");
 
 // We do this instead of importing all of terminal/main.zig to
 // limit the dependency graph. This is important because some things
@@ -5965,22 +5966,15 @@ pub const SelectionWordChars = struct {
     pub fn parseCLI(self: *Self, alloc: Allocator, input: ?[]const u8) !void {
         const value = input orelse return error.ValueRequired;
 
-        // Parse UTF-8 string into codepoints
+        // Parse string with Zig escape sequence support into codepoints
         var list: std.ArrayList(u21) = .empty;
         defer list.deinit(alloc);
 
         // Always include null as first boundary
         try list.append(alloc, 0);
 
-        // Parse the UTF-8 string
-        const utf8_view = std.unicode.Utf8View.init(value) catch {
-            // Invalid UTF-8, just use null boundary
-            self.codepoints = try list.toOwnedSlice(alloc);
-            return;
-        };
-
-        var utf8_it = utf8_view.iterator();
-        while (utf8_it.nextCodepoint()) |codepoint| {
+        var it = string.codepointIterator(value);
+        while (it.next() catch return error.InvalidValue) |codepoint| {
             try list.append(alloc, codepoint);
         }
 
@@ -6033,6 +6027,56 @@ pub const SelectionWordChars = struct {
         try testing.expectEqual(@as(u21, ';'), chars.codepoints[3]);
         try testing.expectEqual(@as(u21, ','), chars.codepoints[4]);
     }
+
+    test "parseCLI escape sequences" {
+        const testing = std.testing;
+        var arena = ArenaAllocator.init(testing.allocator);
+        defer arena.deinit();
+        const alloc = arena.allocator();
+
+        // \t escape should be parsed as tab
+        var chars: Self = .{};
+        try chars.parseCLI(alloc, " \\t;,");
+
+        try testing.expectEqual(@as(usize, 5), chars.codepoints.len);
+        try testing.expectEqual(@as(u21, 0), chars.codepoints[0]);
+        try testing.expectEqual(@as(u21, ' '), chars.codepoints[1]);
+        try testing.expectEqual(@as(u21, '\t'), chars.codepoints[2]);
+        try testing.expectEqual(@as(u21, ';'), chars.codepoints[3]);
+        try testing.expectEqual(@as(u21, ','), chars.codepoints[4]);
+    }
+
+    test "parseCLI backslash escape" {
+        const testing = std.testing;
+        var arena = ArenaAllocator.init(testing.allocator);
+        defer arena.deinit();
+        const alloc = arena.allocator();
+
+        // \\ should be parsed as a single backslash
+        var chars: Self = .{};
+        try chars.parseCLI(alloc, "\\\\;");
+
+        try testing.expectEqual(@as(usize, 3), chars.codepoints.len);
+        try testing.expectEqual(@as(u21, 0), chars.codepoints[0]);
+        try testing.expectEqual(@as(u21, '\\'), chars.codepoints[1]);
+        try testing.expectEqual(@as(u21, ';'), chars.codepoints[2]);
+    }
+
+    test "parseCLI unicode escape" {
+        const testing = std.testing;
+        var arena = ArenaAllocator.init(testing.allocator);
+        defer arena.deinit();
+        const alloc = arena.allocator();
+
+        // \u{2502} should be parsed as │
+        var chars: Self = .{};
+        try chars.parseCLI(alloc, "\\u{2502};");
+
+        try testing.expectEqual(@as(usize, 3), chars.codepoints.len);
+        try testing.expectEqual(@as(u21, 0), chars.codepoints[0]);
+        try testing.expectEqual(@as(u21, '│'), chars.codepoints[1]);
+        try testing.expectEqual(@as(u21, ';'), chars.codepoints[2]);
+    }
 };
 
 /// FontVariation is a repeatable configuration value that sets a single
diff --git a/src/config/string.zig b/src/config/string.zig
index 71826f005..450799373 100644
--- a/src/config/string.zig
+++ b/src/config/string.zig
@@ -36,6 +36,40 @@ pub fn parse(out: []u8, bytes: []const u8) ![]u8 {
     return out[0..dst_i];
 }
 
+/// Creates an iterator that requires no allocation to extract codepoints
+/// from the string literal, parsing escape sequences as it goes.
+pub fn codepointIterator(bytes: []const u8) CodepointIterator {
+    return .{ .bytes = bytes, .i = 0 };
+}
+
+pub const CodepointIterator = struct {
+    bytes: []const u8,
+    i: usize,
+
+    pub fn next(self: *CodepointIterator) error{InvalidString}!?u21 {
+        if (self.i >= self.bytes.len) return null;
+        switch (self.bytes[self.i]) {
+            // An escape sequence
+            '\\' => return switch (std.zig.string_literal.parseEscapeSequence(
+                self.bytes,
+                &self.i,
+            )) {
+                .failure => error.InvalidString,
+                .success => |cp| cp,
+            },
+
+            // Not an escape, parse as UTF-8
+            else => |start| {
+                const cp_len = std.unicode.utf8ByteSequenceLength(start) catch
+                    return error.InvalidString;
+                defer self.i += cp_len;
+                return std.unicode.utf8Decode(self.bytes[self.i..][0..cp_len]) catch
+                    return error.InvalidString;
+            },
+        }
+    }
+};
+
 test "parse: empty" {
     const testing = std.testing;
 
@@ -65,3 +99,48 @@ test "parse: escapes" {
         try testing.expectEqualStrings("hello\u{1F601}world", result);
     }
 }
+
+test "codepointIterator: empty" {
+    var it = codepointIterator("");
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: ascii no escapes" {
+    var it = codepointIterator("abc");
+    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'c'), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: multibyte utf8" {
+    // │ is U+2502 (3 bytes in UTF-8)
+    var it = codepointIterator("a│b");
+    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: escape sequences" {
+    var it = codepointIterator("a\\tb\\n\\\\");
+    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, '\t'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, '\n'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, '\\'), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: unicode escape" {
+    var it = codepointIterator("\\u{2502}x");
+    try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'x'), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: emoji unicode escape" {
+    var it = codepointIterator("\\u{1F601}");
+    try std.testing.expectEqual(@as(u21, 0x1F601), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}