ghostty/src/config/string.zig

const std = @import("std");

/// Parse a string literal into a byte array. The string can contain
/// any valid Zig string literal escape sequences.
///
/// The output buffer never needs to be larger than the input buffer.
/// The buffers may alias.
pub fn parse(out: []u8, bytes: []const u8) ![]u8 {
    var dst_i: usize = 0;
    var src_i: usize = 0;
    while (src_i < bytes.len) {
        if (dst_i >= out.len) return error.OutOfMemory;

        // If this byte is not beginning an escape sequence we copy.
        const b = bytes[src_i];
        if (b != '\\') {
            out[dst_i] = b;
            dst_i += 1;
            src_i += 1;
            continue;
        }

        // Parse the escape sequence
        switch (std.zig.string_literal.parseEscapeSequence(
            bytes,
            &src_i,
        )) {
            .failure => return error.InvalidString,
            .success => |cp| dst_i += try std.unicode.utf8Encode(
                cp,
                out[dst_i..],
            ),
        }
    }

    return out[0..dst_i];
}

/// Creates an iterator that requires no allocation to extract codepoints
/// from the string literal, parsing escape sequences as it goes.
pub fn codepointIterator(bytes: []const u8) CodepointIterator {
    return .{ .bytes = bytes, .i = 0 };
}

pub const CodepointIterator = struct {
    bytes: []const u8,
    i: usize,

    pub fn next(self: *CodepointIterator) error{InvalidString}!?u21 {
        if (self.i >= self.bytes.len) return null;
        switch (self.bytes[self.i]) {
            // An escape sequence
            '\\' => return switch (std.zig.string_literal.parseEscapeSequence(
                self.bytes,
                &self.i,
            )) {
                .failure => error.InvalidString,
                .success => |cp| cp,
            },

            // Not an escape, parse as UTF-8
            else => |start| {
                const cp_len = std.unicode.utf8ByteSequenceLength(start) catch
                    return error.InvalidString;
                defer self.i += cp_len;
                return std.unicode.utf8Decode(self.bytes[self.i..][0..cp_len]) catch
                    return error.InvalidString;
            },
        }
    }
};

test "parse: empty" {
    const testing = std.testing;

    var buf: [128]u8 = undefined;
    const result = try parse(&buf, "");
    try testing.expectEqualStrings("", result);
}

test "parse: no escapes" {
    const testing = std.testing;

    var buf: [128]u8 = undefined;
    const result = try parse(&buf, "hello world");
    try testing.expectEqualStrings("hello world", result);
}

test "parse: escapes" {
    const testing = std.testing;

    var buf: [128]u8 = undefined;
    {
        const result = try parse(&buf, "hello\\nworld");
        try testing.expectEqualStrings("hello\nworld", result);
    }
    {
        const result = try parse(&buf, "hello\\u{1F601}world");
        try testing.expectEqualStrings("hello\u{1F601}world", result);
    }
}

test "codepointIterator: empty" {
    var it = codepointIterator("");
    try std.testing.expectEqual(null, try it.next());
}

test "codepointIterator: ascii no escapes" {
    var it = codepointIterator("abc");
    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, 'c'), (try it.next()).?);
    try std.testing.expectEqual(null, try it.next());
}

test "codepointIterator: multibyte utf8" {
    // │ is U+2502 (3 bytes in UTF-8)
    var it = codepointIterator("a│b");
    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
    try std.testing.expectEqual(null, try it.next());
}

test "codepointIterator: escape sequences" {
    var it = codepointIterator("a\\tb\\n\\\\");
    try std.testing.expectEqual(@as(u21, 'a'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, '\t'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, 'b'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, '\n'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, '\\'), (try it.next()).?);
    try std.testing.expectEqual(null, try it.next());
}

test "codepointIterator: unicode escape" {
    var it = codepointIterator("\\u{2502}x");
    try std.testing.expectEqual(@as(u21, '│'), (try it.next()).?);
    try std.testing.expectEqual(@as(u21, 'x'), (try it.next()).?);
    try std.testing.expectEqual(null, try it.next());
}

test "codepointIterator: emoji unicode escape" {
    var it = codepointIterator("\\u{1F601}");
    try std.testing.expectEqual(@as(u21, 0x1F601), (try it.next()).?);
    try std.testing.expectEqual(null, try it.next());
}