config: selection-word-chars parses escape sequences

Fixes #10548 Escaped characters in selection-word-chars are now correctly parsed, allowing for characters like `\t` to be included in the set of word characters.
2026-05-25 06:18:37 +00:00 · 2026-02-21 14:37:58 -08:00
parent 548930a742
commit cdfa73b403
2 changed files with 133 additions and 10 deletions
--- a/src/config/string.zig
+++ b/src/config/string.zig
@@ -36,6 +36,40 @@ pub fn parse(out: []u8, bytes: []const u8) ![]u8 {
    return out[0..dst_i];
 }

+/// Creates an iterator that requires no allocation to extract codepoints
+/// from the string literal, parsing escape sequences as it goes.
+pub fn codepointIterator(bytes: []const u8) CodepointIterator {
+    return .{ .bytes = bytes, .i = 0 };
+}
+
+pub const CodepointIterator = struct {
+    bytes: []const u8,
+    i: usize,
+
+    pub fn next(self: *CodepointIterator) error{InvalidString}!?u21 {
+        if (self.i >= self.bytes.len) return null;
+        switch (self.bytes[self.i]) {
+            // An escape sequence
+            '\\' => return switch (std.zig.string_literal.parseEscapeSequence(
+                self.bytes,
+                &self.i,
+            )) {
+                .failure => error.InvalidString,
+                .success => |cp| cp,
+            },
+
+            // Not an escape, parse as UTF-8
+            else => |start| {
+                const cp_len = std.unicode.utf8ByteSequenceLength(start) catch
+                    return error.InvalidString;
+                defer self.i += cp_len;
+                return std.unicode.utf8Decode(self.bytes[self.i..][0..cp_len]) catch
+                    return error.InvalidString;
+            },
+        }
+    }
+};
+
 test "parse: empty" {
    const testing = std.testing;

@@ -65,3 +99,48 @@ test "parse: escapes" {
        try testing.expectEqualStrings("hello\u{1F601}world", result);
    }
 }
+
+test "codepointIterator: empty" {
+    var it = codepointIterator("");
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: ascii no escapes" {
+    var it = codepointIterator("abc");
+    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'c'), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: multibyte utf8" {
+    // │ is U+2502 (3 bytes in UTF-8)
+    var it = codepointIterator("a│b");
+    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: escape sequences" {
+    var it = codepointIterator("a\\tb\\n\\\\");
+    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, '\t'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, '\n'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, '\\'), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: unicode escape" {
+    var it = codepointIterator("\\u{2502}x");
+    try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
+    try std.testing.expectEqual(@as(u21, 'x'), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}
+
+test "codepointIterator: emoji unicode escape" {
+    var it = codepointIterator("\\u{1F601}");
+    try std.testing.expectEqual(@as(u21, 0x1F601), (try it.next()).?);
+    try std.testing.expectEqual(null, try it.next());
+}