Files
ghostty/src/cli/CommaSplitter.zig
2025-09-23 21:53:52 -05:00

425 lines
13 KiB
Zig

//! Iterator to split a string into fields by commas, taking into account
//! quotes and escapes.
//!
//! Supports the same escapes as in Zig literal strings.
//!
//! Quotes must begin and end with a double quote (`"`). It is an error to not
//! end a quote that was begun. To include a double quote inside a quote (or to
//! not have a double quote start a quoted section) escape it with a backslash.
//!
//! Single quotes (`'`) are not special, they do not begin a quoted block.
//!
//! Zig multiline string literals are NOT supported.
//!
//! Quotes and escapes are not stripped or decoded, that must be handled as a
//! separate step!
const CommaSplitter = @This();
pub const Error = error{
UnclosedQuote,
UnfinishedEscape,
IllegalEscape,
};
/// the string that we are splitting
str: []const u8,
/// how much of the string has been consumed so far
index: usize,
/// initialize a splitter with the given string
pub fn init(str: []const u8) CommaSplitter {
return .{
.str = str,
.index = 0,
};
}
/// return the next field, null if no more fields
pub fn next(self: *CommaSplitter) Error!?[]const u8 {
if (self.index >= self.str.len) return null;
// where the current field starts
const start = self.index;
// state of state machine
const State = enum {
normal,
quoted,
escape,
hexescape,
unicodeescape,
};
// keep track of the state to return to when done processing an escape
// sequence.
var last: State = .normal;
// used to count number of digits seen in a hex escape
var hexescape_digits: usize = 0;
// sub-state of parsing hex escapes
var unicodeescape_state: enum {
start,
digits,
} = .start;
// number of digits in a unicode escape seen so far
var unicodeescape_digits: usize = 0;
// accumulator for value of unicode escape
var unicodeescape_value: usize = 0;
loop: switch (State.normal) {
.normal => {
if (self.index >= self.str.len) return self.str[start..];
switch (self.str[self.index]) {
',' => {
self.index += 1;
return self.str[start .. self.index - 1];
},
'"' => {
self.index += 1;
continue :loop .quoted;
},
'\\' => {
self.index += 1;
last = .normal;
continue :loop .escape;
},
else => {
self.index += 1;
continue :loop .normal;
},
}
},
.quoted => {
if (self.index >= self.str.len) return error.UnclosedQuote;
switch (self.str[self.index]) {
'"' => {
self.index += 1;
continue :loop .normal;
},
'\\' => {
self.index += 1;
last = .quoted;
continue :loop .escape;
},
else => {
self.index += 1;
continue :loop .quoted;
},
}
},
.escape => {
if (self.index >= self.str.len) return error.UnfinishedEscape;
switch (self.str[self.index]) {
'n', 'r', 't', '\\', '\'', '"' => {
self.index += 1;
continue :loop last;
},
'x' => {
self.index += 1;
hexescape_digits = 0;
continue :loop .hexescape;
},
'u' => {
self.index += 1;
unicodeescape_state = .start;
unicodeescape_digits = 0;
unicodeescape_value = 0;
continue :loop .unicodeescape;
},
else => return error.IllegalEscape,
}
},
.hexescape => {
if (self.index >= self.str.len) return error.UnfinishedEscape;
switch (self.str[self.index]) {
'0'...'9', 'a'...'f', 'A'...'F' => {
self.index += 1;
hexescape_digits += 1;
if (hexescape_digits == 2) continue :loop last;
continue :loop .hexescape;
},
else => return error.IllegalEscape,
}
},
.unicodeescape => {
if (self.index >= self.str.len) return error.UnfinishedEscape;
switch (unicodeescape_state) {
.start => {
switch (self.str[self.index]) {
'{' => {
self.index += 1;
unicodeescape_value = 0;
unicodeescape_state = .digits;
continue :loop .unicodeescape;
},
else => return error.IllegalEscape,
}
},
.digits => {
switch (self.str[self.index]) {
'}' => {
self.index += 1;
if (unicodeescape_digits == 0) return error.IllegalEscape;
continue :loop last;
},
'0'...'9' => |d| {
self.index += 1;
unicodeescape_digits += 1;
unicodeescape_value <<= 4;
unicodeescape_value += d - '0';
},
'a'...'f' => |d| {
self.index += 1;
unicodeescape_digits += 1;
unicodeescape_value <<= 4;
unicodeescape_value += d - 'a';
},
'A'...'F' => |d| {
self.index += 1;
unicodeescape_digits += 1;
unicodeescape_value <<= 4;
unicodeescape_value += d - 'A';
},
else => return error.IllegalEscape,
}
if (unicodeescape_value > 0x10ffff) return error.IllegalEscape;
continue :loop .unicodeescape;
},
}
},
}
}
/// Return any remaining string data, whether it has a comma or not.
pub fn rest(self: *CommaSplitter) ?[]const u8 {
if (self.index >= self.str.len) return null;
defer self.index = self.str.len;
return self.str[self.index..];
}
test "splitter 1" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("a,b,c");
try testing.expectEqualStrings("a", (try s.next()).?);
try testing.expectEqualStrings("b", (try s.next()).?);
try testing.expectEqualStrings("c", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 2" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("");
try testing.expect(null == try s.next());
}
test "splitter 3" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("a");
try testing.expectEqualStrings("a", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 4" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\x5a");
try testing.expectEqualStrings("\\x5a", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 5" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("'a',b");
try testing.expectEqualStrings("'a'", (try s.next()).?);
try testing.expectEqualStrings("b", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 6" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("'a,b',c");
try testing.expectEqualStrings("'a", (try s.next()).?);
try testing.expectEqualStrings("b'", (try s.next()).?);
try testing.expectEqualStrings("c", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 7" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\"a,b\",c");
try testing.expectEqualStrings("\"a,b\"", (try s.next()).?);
try testing.expectEqualStrings("c", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 8" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init(" a , b ");
try testing.expectEqualStrings(" a ", (try s.next()).?);
try testing.expectEqualStrings(" b ", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 9" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\x");
try testing.expectError(error.UnfinishedEscape, s.next());
}
test "splitter 10" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\x5");
try testing.expectError(error.UnfinishedEscape, s.next());
}
test "splitter 11" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\u");
try testing.expectError(error.UnfinishedEscape, s.next());
}
test "splitter 12" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\u{");
try testing.expectError(error.UnfinishedEscape, s.next());
}
test "splitter 13" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\u{}");
try testing.expectError(error.IllegalEscape, s.next());
}
test "splitter 14" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\u{h1}");
try testing.expectError(error.IllegalEscape, s.next());
}
test "splitter 15" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\u{10ffff}");
try testing.expectEqualStrings("\\u{10ffff}", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 16" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\u{110000}");
try testing.expectError(error.IllegalEscape, s.next());
}
test "splitter 17" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\d");
try testing.expectError(error.IllegalEscape, s.next());
}
test "splitter 18" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\\n\\r\\t\\\"\\'\\\\");
try testing.expectEqualStrings("\\n\\r\\t\\\"\\'\\\\", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 19" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\"abc'def'ghi\"");
try testing.expectEqualStrings("\"abc'def'ghi\"", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 20" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("\",\",abc");
try testing.expectEqualStrings("\",\"", (try s.next()).?);
try testing.expectEqualStrings("abc", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 21" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("'a','b', 'c'");
try testing.expectEqualStrings("'a'", (try s.next()).?);
try testing.expectEqualStrings("'b'", (try s.next()).?);
try testing.expectEqualStrings(" 'c'", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 22" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("abc\"def");
try testing.expectError(error.UnclosedQuote, s.next());
}
test "splitter 23" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("title:\"Focus Split: Up\",description:\"Focus the split above, if it exists.\",action:goto_split:up");
try testing.expectEqualStrings("title:\"Focus Split: Up\"", (try s.next()).?);
try testing.expectEqualStrings("description:\"Focus the split above, if it exists.\"", (try s.next()).?);
try testing.expectEqualStrings("action:goto_split:up", (try s.next()).?);
try testing.expect(null == try s.next());
}
test "splitter 24" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("a,b,c,def");
try testing.expectEqualStrings("a", (try s.next()).?);
try testing.expectEqualStrings("b", (try s.next()).?);
try testing.expectEqualStrings("c,def", s.rest().?);
try testing.expect(null == try s.next());
}
test "splitter 25" {
const std = @import("std");
const testing = std.testing;
var s: CommaSplitter = .init("a,\\u{10,df}");
try testing.expectEqualStrings("a", (try s.next()).?);
try testing.expectError(error.IllegalEscape, s.next());
}