Files
ghostty/src/simd/vt.zig
Qwerasd 6d5b4a3426 perf: replace std.debug.assert with inlined version
See doc comment in `quirks.zig` for reasoning
2025-11-17 12:13:56 -07:00

197 lines
5.4 KiB
Zig

const std = @import("std");
const options = @import("build_options");
const assert = @import("../quirks.zig").inlineAssert;
const indexOf = @import("index_of.zig").indexOf;
// vt.cpp
extern "c" fn ghostty_simd_decode_utf8_until_control_seq(
input: [*]const u8,
count: usize,
output: [*]u32,
output_count: *usize,
) usize;
const DecodeResult = struct {
consumed: usize,
decoded: usize,
};
pub fn utf8DecodeUntilControlSeq(
input: []const u8,
output: []u32,
) DecodeResult {
assert(output.len >= input.len);
if (comptime options.simd) {
var decoded: usize = 0;
const consumed = ghostty_simd_decode_utf8_until_control_seq(
input.ptr,
input.len,
output.ptr,
&decoded,
);
return .{ .consumed = consumed, .decoded = decoded };
}
return utf8DecodeUntilControlSeqScalar(input, output);
}
fn utf8DecodeUntilControlSeqScalar(
input: []const u8,
output: []u32,
) DecodeResult {
// Find our escape
const idx = indexOf(input, 0x1B) orelse input.len;
const decode = input[0..idx];
// Go through and decode one item at a time.
var decode_offset: usize = 0;
var decode_count: usize = 0;
while (decode_offset < decode.len) {
const decode_rem = decode[decode_offset..];
const cp_len = std.unicode.utf8ByteSequenceLength(decode_rem[0]) catch {
// Note, this is matching our SIMD behavior, but it is admittedly
// a bit weird. See our "decode invalid leading byte" test too.
// SIMD should be our source of truth then we copy behavior here.
break;
};
// If we don't have that number of bytes available. we finish. We
// assume this is a partial input and we defer to the future.
if (decode_rem.len < cp_len) break;
// We have the bytes available, so move forward
const cp_bytes = decode_rem[0..cp_len];
decode_offset += cp_len;
if (std.unicode.utf8Decode(cp_bytes)) |cp| {
output[decode_count] = @intCast(cp);
decode_count += 1;
} else |_| {
// If decoding failed, we replace the leading byte with the
// replacement char and then continue decoding after that
// byte. This matches the SIMD behavior and is tested by the
// "invalid UTF-8" tests.
output[decode_count] = 0xFFFD;
decode_count += 1;
decode_offset -= cp_len - 1;
}
}
return .{
.consumed = decode_offset,
.decoded = decode_count,
};
}
test "decode no escape" {
const testing = std.testing;
var output: [1024]u32 = undefined;
// TODO: many more test cases
{
const str = "hello" ** 128;
try testing.expectEqual(DecodeResult{
.consumed = str.len,
.decoded = str.len,
}, utf8DecodeUntilControlSeq(str, &output));
}
}
test "decode ASCII to escape" {
const testing = std.testing;
var output: [1024]u32 = undefined;
// TODO: many more test cases
{
const prefix = "hello" ** 64;
const str = prefix ++ "\x1b" ++ ("world" ** 64);
try testing.expectEqual(DecodeResult{
.consumed = prefix.len,
.decoded = prefix.len,
}, utf8DecodeUntilControlSeq(str, &output));
}
}
test "decode immediate esc sequence" {
const testing = std.testing;
var output: [64]u32 = undefined;
const str = "\x1b[?5s";
try testing.expectEqual(DecodeResult{
.consumed = 0,
.decoded = 0,
}, utf8DecodeUntilControlSeq(str, &output));
}
test "decode incomplete UTF-8" {
const testing = std.testing;
var output: [64]u32 = undefined;
// 2-byte
{
const str = "hello\xc2";
try testing.expectEqual(DecodeResult{
.consumed = 5,
.decoded = 5,
}, utf8DecodeUntilControlSeq(str, &output));
}
// 3-byte
{
const str = "hello\xe0\x00";
try testing.expectEqual(DecodeResult{
.consumed = 5,
.decoded = 5,
}, utf8DecodeUntilControlSeq(str, &output));
}
// 4-byte
{
const str = "hello\xf0\x90";
try testing.expectEqual(DecodeResult{
.consumed = 5,
.decoded = 5,
}, utf8DecodeUntilControlSeq(str, &output));
}
}
test "decode invalid UTF-8" {
const testing = std.testing;
var output: [64]u32 = undefined;
// Invalid leading 2-byte sequence
{
const str = "hello\xc2\x01";
try testing.expectEqual(DecodeResult{
.consumed = 7,
.decoded = 7,
}, utf8DecodeUntilControlSeq(str, &output));
}
// Replacement will only replace the invalid leading byte.
try testing.expectEqual(@as(u32, 0xFFFD), output[5]);
try testing.expectEqual(@as(u32, 0x01), output[6]);
}
// This is testing our current behavior so that we know we have to handle
// this case in terminal/stream.zig. If we change this behavior, we can
// remove the special handling in terminal/stream.zig.
test "decode invalid leading byte isn't consumed or replaced" {
const testing = std.testing;
var output: [64]u32 = undefined;
{
const str = "hello\xFF";
try testing.expectEqual(DecodeResult{
.consumed = 5,
.decoded = 5,
}, utf8DecodeUntilControlSeq(str, &output));
}
}