//! Iterator to split a string into fields by commas, taking into account //! quotes and escapes. //! //! Supports the same escapes as in Zig literal strings. //! //! Quotes must begin and end with a double quote (`"`). It is an error to not //! end a quote that was begun. To include a double quote inside a quote (or to //! not have a double quote start a quoted section) escape it with a backslash. //! //! Single quotes (`'`) are not special, they do not begin a quoted block. //! //! Zig multiline string literals are NOT supported. //! //! Quotes and escapes are not stripped or decoded, that must be handled as a //! separate step! const CommaSplitter = @This(); pub const Error = error{ UnclosedQuote, UnfinishedEscape, IllegalEscape, }; /// the string that we are splitting str: []const u8, /// how much of the string has been consumed so far index: usize, /// initialize a splitter with the given string pub fn init(str: []const u8) CommaSplitter { return .{ .str = str, .index = 0, }; } /// return the next field, null if no more fields pub fn next(self: *CommaSplitter) Error!?[]const u8 { if (self.index >= self.str.len) return null; // where the current field starts const start = self.index; // state of state machine const State = enum { normal, quoted, escape, hexescape, unicodeescape, }; // keep track of the state to return to when done processing an escape // sequence. var last: State = .normal; // used to count number of digits seen in a hex escape var hexescape_digits: usize = 0; // sub-state of parsing hex escapes var unicodeescape_state: enum { start, digits, } = .start; // number of digits in a unicode escape seen so far var unicodeescape_digits: usize = 0; // accumulator for value of unicode escape var unicodeescape_value: usize = 0; loop: switch (State.normal) { .normal => { if (self.index >= self.str.len) return self.str[start..]; switch (self.str[self.index]) { ',' => { self.index += 1; return self.str[start .. self.index - 1]; }, '"' => { self.index += 1; continue :loop .quoted; }, '\\' => { self.index += 1; last = .normal; continue :loop .escape; }, else => { self.index += 1; continue :loop .normal; }, } }, .quoted => { if (self.index >= self.str.len) return error.UnclosedQuote; switch (self.str[self.index]) { '"' => { self.index += 1; continue :loop .normal; }, '\\' => { self.index += 1; last = .quoted; continue :loop .escape; }, else => { self.index += 1; continue :loop .quoted; }, } }, .escape => { if (self.index >= self.str.len) return error.UnfinishedEscape; switch (self.str[self.index]) { 'n', 'r', 't', '\\', '\'', '"' => { self.index += 1; continue :loop last; }, 'x' => { self.index += 1; hexescape_digits = 0; continue :loop .hexescape; }, 'u' => { self.index += 1; unicodeescape_state = .start; unicodeescape_digits = 0; unicodeescape_value = 0; continue :loop .unicodeescape; }, else => return error.IllegalEscape, } }, .hexescape => { if (self.index >= self.str.len) return error.UnfinishedEscape; switch (self.str[self.index]) { '0'...'9', 'a'...'f', 'A'...'F' => { self.index += 1; hexescape_digits += 1; if (hexescape_digits == 2) continue :loop last; continue :loop .hexescape; }, else => return error.IllegalEscape, } }, .unicodeescape => { if (self.index >= self.str.len) return error.UnfinishedEscape; switch (unicodeescape_state) { .start => { switch (self.str[self.index]) { '{' => { self.index += 1; unicodeescape_value = 0; unicodeescape_state = .digits; continue :loop .unicodeescape; }, else => return error.IllegalEscape, } }, .digits => { switch (self.str[self.index]) { '}' => { self.index += 1; if (unicodeescape_digits == 0) return error.IllegalEscape; continue :loop last; }, '0'...'9' => |d| { self.index += 1; unicodeescape_digits += 1; unicodeescape_value <<= 4; unicodeescape_value += d - '0'; }, 'a'...'f' => |d| { self.index += 1; unicodeescape_digits += 1; unicodeescape_value <<= 4; unicodeescape_value += d - 'a'; }, 'A'...'F' => |d| { self.index += 1; unicodeescape_digits += 1; unicodeescape_value <<= 4; unicodeescape_value += d - 'A'; }, else => return error.IllegalEscape, } if (unicodeescape_value > 0x10ffff) return error.IllegalEscape; continue :loop .unicodeescape; }, } }, } } /// Return any remaining string data, whether it has a comma or not. pub fn rest(self: *CommaSplitter) ?[]const u8 { if (self.index >= self.str.len) return null; defer self.index = self.str.len; return self.str[self.index..]; } test "splitter 1" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("a,b,c"); try testing.expectEqualStrings("a", (try s.next()).?); try testing.expectEqualStrings("b", (try s.next()).?); try testing.expectEqualStrings("c", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 2" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init(""); try testing.expect(null == try s.next()); } test "splitter 3" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("a"); try testing.expectEqualStrings("a", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 4" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\x5a"); try testing.expectEqualStrings("\\x5a", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 5" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("'a',b"); try testing.expectEqualStrings("'a'", (try s.next()).?); try testing.expectEqualStrings("b", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 6" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("'a,b',c"); try testing.expectEqualStrings("'a", (try s.next()).?); try testing.expectEqualStrings("b'", (try s.next()).?); try testing.expectEqualStrings("c", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 7" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\"a,b\",c"); try testing.expectEqualStrings("\"a,b\"", (try s.next()).?); try testing.expectEqualStrings("c", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 8" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init(" a , b "); try testing.expectEqualStrings(" a ", (try s.next()).?); try testing.expectEqualStrings(" b ", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 9" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\x"); try testing.expectError(error.UnfinishedEscape, s.next()); } test "splitter 10" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\x5"); try testing.expectError(error.UnfinishedEscape, s.next()); } test "splitter 11" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\u"); try testing.expectError(error.UnfinishedEscape, s.next()); } test "splitter 12" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\u{"); try testing.expectError(error.UnfinishedEscape, s.next()); } test "splitter 13" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\u{}"); try testing.expectError(error.IllegalEscape, s.next()); } test "splitter 14" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\u{h1}"); try testing.expectError(error.IllegalEscape, s.next()); } test "splitter 15" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\u{10ffff}"); try testing.expectEqualStrings("\\u{10ffff}", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 16" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\u{110000}"); try testing.expectError(error.IllegalEscape, s.next()); } test "splitter 17" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\d"); try testing.expectError(error.IllegalEscape, s.next()); } test "splitter 18" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\\n\\r\\t\\\"\\'\\\\"); try testing.expectEqualStrings("\\n\\r\\t\\\"\\'\\\\", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 19" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\"abc'def'ghi\""); try testing.expectEqualStrings("\"abc'def'ghi\"", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 20" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("\",\",abc"); try testing.expectEqualStrings("\",\"", (try s.next()).?); try testing.expectEqualStrings("abc", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 21" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("'a','b', 'c'"); try testing.expectEqualStrings("'a'", (try s.next()).?); try testing.expectEqualStrings("'b'", (try s.next()).?); try testing.expectEqualStrings(" 'c'", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 22" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("abc\"def"); try testing.expectError(error.UnclosedQuote, s.next()); } test "splitter 23" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("title:\"Focus Split: Up\",description:\"Focus the split above, if it exists.\",action:goto_split:up"); try testing.expectEqualStrings("title:\"Focus Split: Up\"", (try s.next()).?); try testing.expectEqualStrings("description:\"Focus the split above, if it exists.\"", (try s.next()).?); try testing.expectEqualStrings("action:goto_split:up", (try s.next()).?); try testing.expect(null == try s.next()); } test "splitter 24" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("a,b,c,def"); try testing.expectEqualStrings("a", (try s.next()).?); try testing.expectEqualStrings("b", (try s.next()).?); try testing.expectEqualStrings("c,def", s.rest().?); try testing.expect(null == try s.next()); } test "splitter 25" { const std = @import("std"); const testing = std.testing; var s: CommaSplitter = .init("a,\\u{10,df}"); try testing.expectEqualStrings("a", (try s.next()).?); try testing.expectError(error.IllegalEscape, s.next()); }