rename Splitter-CommaSplitter

2026-04-19 22:10:29 +00:00 · 2025-09-23 21:53:52 -05:00
parent 5265414a36
commit 5f3fd9742f
2 changed files with 31 additions and 31 deletions
--- a/src/cli/CommaSplitter.zig
+++ b/src/cli/CommaSplitter.zig
@@ -0,0 +1,424 @@
+//! Iterator to split a string into fields by commas, taking into account
+//! quotes and escapes.
+//!
+//! Supports the same escapes as in Zig literal strings.
+//!
+//! Quotes must begin and end with a double quote (`"`). It is an error to not
+//! end a quote that was begun. To include a double quote inside a quote (or to
+//! not have a double quote start a quoted section) escape it with a backslash.
+//!
+//! Single quotes (`'`) are not special, they do not begin a quoted block.
+//!
+//! Zig multiline string literals are NOT supported.
+//!
+//! Quotes and escapes are not stripped or decoded, that must be handled as a
+//! separate step!
+const CommaSplitter = @This();
+
+pub const Error = error{
+    UnclosedQuote,
+    UnfinishedEscape,
+    IllegalEscape,
+};
+
+/// the string that we are splitting
+str: []const u8,
+/// how much of the string has been consumed so far
+index: usize,
+
+/// initialize a splitter with the given string
+pub fn init(str: []const u8) CommaSplitter {
+    return .{
+        .str = str,
+        .index = 0,
+    };
+}
+
+/// return the next field, null if no more fields
+pub fn next(self: *CommaSplitter) Error!?[]const u8 {
+    if (self.index >= self.str.len) return null;
+
+    // where the current field starts
+    const start = self.index;
+    // state of state machine
+    const State = enum {
+        normal,
+        quoted,
+        escape,
+        hexescape,
+        unicodeescape,
+    };
+    // keep track of the state to return to when done processing an escape
+    // sequence.
+    var last: State = .normal;
+    // used to count number of digits seen in a hex escape
+    var hexescape_digits: usize = 0;
+    // sub-state of parsing hex escapes
+    var unicodeescape_state: enum {
+        start,
+        digits,
+    } = .start;
+    // number of digits in a unicode escape seen so far
+    var unicodeescape_digits: usize = 0;
+    // accumulator for value of unicode escape
+    var unicodeescape_value: usize = 0;
+
+    loop: switch (State.normal) {
+        .normal => {
+            if (self.index >= self.str.len) return self.str[start..];
+            switch (self.str[self.index]) {
+                ',' => {
+                    self.index += 1;
+                    return self.str[start .. self.index - 1];
+                },
+                '"' => {
+                    self.index += 1;
+                    continue :loop .quoted;
+                },
+                '\\' => {
+                    self.index += 1;
+                    last = .normal;
+                    continue :loop .escape;
+                },
+                else => {
+                    self.index += 1;
+                    continue :loop .normal;
+                },
+            }
+        },
+        .quoted => {
+            if (self.index >= self.str.len) return error.UnclosedQuote;
+            switch (self.str[self.index]) {
+                '"' => {
+                    self.index += 1;
+                    continue :loop .normal;
+                },
+                '\\' => {
+                    self.index += 1;
+                    last = .quoted;
+                    continue :loop .escape;
+                },
+                else => {
+                    self.index += 1;
+                    continue :loop .quoted;
+                },
+            }
+        },
+        .escape => {
+            if (self.index >= self.str.len) return error.UnfinishedEscape;
+            switch (self.str[self.index]) {
+                'n', 'r', 't', '\\', '\'', '"' => {
+                    self.index += 1;
+                    continue :loop last;
+                },
+                'x' => {
+                    self.index += 1;
+                    hexescape_digits = 0;
+                    continue :loop .hexescape;
+                },
+                'u' => {
+                    self.index += 1;
+                    unicodeescape_state = .start;
+                    unicodeescape_digits = 0;
+                    unicodeescape_value = 0;
+                    continue :loop .unicodeescape;
+                },
+                else => return error.IllegalEscape,
+            }
+        },
+        .hexescape => {
+            if (self.index >= self.str.len) return error.UnfinishedEscape;
+            switch (self.str[self.index]) {
+                '0'...'9', 'a'...'f', 'A'...'F' => {
+                    self.index += 1;
+                    hexescape_digits += 1;
+                    if (hexescape_digits == 2) continue :loop last;
+                    continue :loop .hexescape;
+                },
+                else => return error.IllegalEscape,
+            }
+        },
+        .unicodeescape => {
+            if (self.index >= self.str.len) return error.UnfinishedEscape;
+            switch (unicodeescape_state) {
+                .start => {
+                    switch (self.str[self.index]) {
+                        '{' => {
+                            self.index += 1;
+                            unicodeescape_value = 0;
+                            unicodeescape_state = .digits;
+                            continue :loop .unicodeescape;
+                        },
+                        else => return error.IllegalEscape,
+                    }
+                },
+                .digits => {
+                    switch (self.str[self.index]) {
+                        '}' => {
+                            self.index += 1;
+                            if (unicodeescape_digits == 0) return error.IllegalEscape;
+                            continue :loop last;
+                        },
+                        '0'...'9' => |d| {
+                            self.index += 1;
+                            unicodeescape_digits += 1;
+                            unicodeescape_value <<= 4;
+                            unicodeescape_value += d - '0';
+                        },
+                        'a'...'f' => |d| {
+                            self.index += 1;
+                            unicodeescape_digits += 1;
+                            unicodeescape_value <<= 4;
+                            unicodeescape_value += d - 'a';
+                        },
+                        'A'...'F' => |d| {
+                            self.index += 1;
+                            unicodeescape_digits += 1;
+                            unicodeescape_value <<= 4;
+                            unicodeescape_value += d - 'A';
+                        },
+                        else => return error.IllegalEscape,
+                    }
+                    if (unicodeescape_value > 0x10ffff) return error.IllegalEscape;
+                    continue :loop .unicodeescape;
+                },
+            }
+        },
+    }
+}
+
+/// Return any remaining string data, whether it has a comma or not.
+pub fn rest(self: *CommaSplitter) ?[]const u8 {
+    if (self.index >= self.str.len) return null;
+    defer self.index = self.str.len;
+    return self.str[self.index..];
+}
+
+test "splitter 1" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("a,b,c");
+    try testing.expectEqualStrings("a", (try s.next()).?);
+    try testing.expectEqualStrings("b", (try s.next()).?);
+    try testing.expectEqualStrings("c", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 2" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("");
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 3" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("a");
+    try testing.expectEqualStrings("a", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 4" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\x5a");
+    try testing.expectEqualStrings("\\x5a", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 5" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("'a',b");
+    try testing.expectEqualStrings("'a'", (try s.next()).?);
+    try testing.expectEqualStrings("b", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 6" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("'a,b',c");
+    try testing.expectEqualStrings("'a", (try s.next()).?);
+    try testing.expectEqualStrings("b'", (try s.next()).?);
+    try testing.expectEqualStrings("c", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 7" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\"a,b\",c");
+    try testing.expectEqualStrings("\"a,b\"", (try s.next()).?);
+    try testing.expectEqualStrings("c", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 8" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init(" a , b ");
+    try testing.expectEqualStrings(" a ", (try s.next()).?);
+    try testing.expectEqualStrings(" b ", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 9" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\x");
+    try testing.expectError(error.UnfinishedEscape, s.next());
+}
+
+test "splitter 10" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\x5");
+    try testing.expectError(error.UnfinishedEscape, s.next());
+}
+
+test "splitter 11" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\u");
+    try testing.expectError(error.UnfinishedEscape, s.next());
+}
+
+test "splitter 12" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\u{");
+    try testing.expectError(error.UnfinishedEscape, s.next());
+}
+
+test "splitter 13" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\u{}");
+    try testing.expectError(error.IllegalEscape, s.next());
+}
+
+test "splitter 14" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\u{h1}");
+    try testing.expectError(error.IllegalEscape, s.next());
+}
+
+test "splitter 15" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\u{10ffff}");
+    try testing.expectEqualStrings("\\u{10ffff}", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 16" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\u{110000}");
+    try testing.expectError(error.IllegalEscape, s.next());
+}
+
+test "splitter 17" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\d");
+    try testing.expectError(error.IllegalEscape, s.next());
+}
+
+test "splitter 18" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\\n\\r\\t\\\"\\'\\\\");
+    try testing.expectEqualStrings("\\n\\r\\t\\\"\\'\\\\", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 19" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\"abc'def'ghi\"");
+    try testing.expectEqualStrings("\"abc'def'ghi\"", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 20" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("\",\",abc");
+    try testing.expectEqualStrings("\",\"", (try s.next()).?);
+    try testing.expectEqualStrings("abc", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 21" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("'a','b', 'c'");
+    try testing.expectEqualStrings("'a'", (try s.next()).?);
+    try testing.expectEqualStrings("'b'", (try s.next()).?);
+    try testing.expectEqualStrings(" 'c'", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 22" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("abc\"def");
+    try testing.expectError(error.UnclosedQuote, s.next());
+}
+
+test "splitter 23" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("title:\"Focus Split: Up\",description:\"Focus the split above, if it exists.\",action:goto_split:up");
+    try testing.expectEqualStrings("title:\"Focus Split: Up\"", (try s.next()).?);
+    try testing.expectEqualStrings("description:\"Focus the split above, if it exists.\"", (try s.next()).?);
+    try testing.expectEqualStrings("action:goto_split:up", (try s.next()).?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 24" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("a,b,c,def");
+    try testing.expectEqualStrings("a", (try s.next()).?);
+    try testing.expectEqualStrings("b", (try s.next()).?);
+    try testing.expectEqualStrings("c,def", s.rest().?);
+    try testing.expect(null == try s.next());
+}
+
+test "splitter 25" {
+    const std = @import("std");
+    const testing = std.testing;
+
+    var s: CommaSplitter = .init("a,\\u{10,df}");
+    try testing.expectEqualStrings("a", (try s.next()).?);
+    try testing.expectError(error.IllegalEscape, s.next());
+}