config: selection-word-chars parses escape sequences

Fixes #10548

Escaped characters in selection-word-chars are now correctly parsed,
allowing for characters like `\t` to be included in the set of word
characters.
This commit is contained in:
Mitchell Hashimoto
2026-02-21 14:37:58 -08:00
parent 548930a742
commit cdfa73b403
2 changed files with 133 additions and 10 deletions

View File

@@ -36,6 +36,40 @@ pub fn parse(out: []u8, bytes: []const u8) ![]u8 {
return out[0..dst_i];
}
/// Creates an iterator that requires no allocation to extract codepoints
/// from the string literal, parsing escape sequences as it goes.
pub fn codepointIterator(bytes: []const u8) CodepointIterator {
return .{ .bytes = bytes, .i = 0 };
}
pub const CodepointIterator = struct {
bytes: []const u8,
i: usize,
pub fn next(self: *CodepointIterator) error{InvalidString}!?u21 {
if (self.i >= self.bytes.len) return null;
switch (self.bytes[self.i]) {
// An escape sequence
'\\' => return switch (std.zig.string_literal.parseEscapeSequence(
self.bytes,
&self.i,
)) {
.failure => error.InvalidString,
.success => |cp| cp,
},
// Not an escape, parse as UTF-8
else => |start| {
const cp_len = std.unicode.utf8ByteSequenceLength(start) catch
return error.InvalidString;
defer self.i += cp_len;
return std.unicode.utf8Decode(self.bytes[self.i..][0..cp_len]) catch
return error.InvalidString;
},
}
}
};
test "parse: empty" {
const testing = std.testing;
@@ -65,3 +99,48 @@ test "parse: escapes" {
try testing.expectEqualStrings("hello\u{1F601}world", result);
}
}
test "codepointIterator: empty" {
var it = codepointIterator("");
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: ascii no escapes" {
var it = codepointIterator("abc");
try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'c'), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: multibyte utf8" {
// │ is U+2502 (3 bytes in UTF-8)
var it = codepointIterator("a│b");
try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: escape sequences" {
var it = codepointIterator("a\\tb\\n\\\\");
try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, '\t'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, '\n'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, '\\'), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: unicode escape" {
var it = codepointIterator("\\u{2502}x");
try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
try std.testing.expectEqual(@as(u21, 'x'), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}
test "codepointIterator: emoji unicode escape" {
var it = codepointIterator("\\u{1F601}");
try std.testing.expectEqual(@as(u21, 0x1F601), (try it.next()).?);
try std.testing.expectEqual(null, try it.next());
}