mirror of
https://github.com/ghostty-org/ghostty.git
synced 2026-06-03 18:34:50 +00:00
This updates simdutf to my fork which has a SIMDUTF_NO_LIBCXX option that removes all libc++ and libc++ ABI dependencies. From there, the hand-written simd code we have has been updated to also no longer use any libc++ features. Part of this required removing utfcpp since it depended on libc++ (`<iterator>`). libghostty-vt now only depends on libc.
319 lines
11 KiB
C++
319 lines
11 KiB
C++
// Generates code for every target that this compiler can support.
|
|
#undef HWY_TARGET_INCLUDE
|
|
#define HWY_TARGET_INCLUDE "simd/vt.cpp" // this file
|
|
#include <hwy/foreach_target.h> // must come before highway.h
|
|
#include <hwy/highway.h>
|
|
|
|
#include <simdutf.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include <simd/index_of.h>
|
|
#include <simd/vt.h>
|
|
|
|
HWY_BEFORE_NAMESPACE();
|
|
namespace ghostty {
|
|
namespace HWY_NAMESPACE {
|
|
|
|
namespace hn = hwy::HWY_NAMESPACE;
|
|
|
|
using T = uint8_t;
|
|
|
|
// Compute the length of the maximal subpart of an ill-formed UTF-8
|
|
// subsequence starting at p[0], per Unicode Table 3-7 and the W3C
|
|
// "U+FFFD Substitution of Maximal Subparts" algorithm.
|
|
//
|
|
// The maximal subpart is the longest initial subsequence that is either:
|
|
// (a) the start of a well-formed sequence, or
|
|
// (b) a single byte.
|
|
// Each maximal subpart maps to exactly one U+FFFD.
|
|
static size_t MaximalSubpart(const unsigned char* p, size_t len) {
|
|
if (len == 0) return 0;
|
|
|
|
unsigned char b0 = p[0];
|
|
|
|
// Continuation bytes (80-BF), overlong leads (C0-C1), or invalid (F5-FF):
|
|
// each is its own maximal subpart of length 1.
|
|
if (b0 < 0xC2 || b0 > 0xF4) return 1;
|
|
|
|
// Determine the expected sequence length and the valid range for each
|
|
// continuation byte per Unicode Table 3-7.
|
|
size_t seq_len;
|
|
unsigned char lo[3], hi[3];
|
|
|
|
if (b0 <= 0xDF) {
|
|
seq_len = 2;
|
|
lo[0] = 0x80; hi[0] = 0xBF;
|
|
} else if (b0 == 0xE0) {
|
|
seq_len = 3;
|
|
lo[0] = 0xA0; hi[0] = 0xBF;
|
|
lo[1] = 0x80; hi[1] = 0xBF;
|
|
} else if (b0 <= 0xEC) {
|
|
seq_len = 3;
|
|
lo[0] = 0x80; hi[0] = 0xBF;
|
|
lo[1] = 0x80; hi[1] = 0xBF;
|
|
} else if (b0 == 0xED) {
|
|
seq_len = 3;
|
|
lo[0] = 0x80; hi[0] = 0x9F;
|
|
lo[1] = 0x80; hi[1] = 0xBF;
|
|
} else if (b0 <= 0xEF) {
|
|
seq_len = 3;
|
|
lo[0] = 0x80; hi[0] = 0xBF;
|
|
lo[1] = 0x80; hi[1] = 0xBF;
|
|
} else if (b0 == 0xF0) {
|
|
seq_len = 4;
|
|
lo[0] = 0x90; hi[0] = 0xBF;
|
|
lo[1] = 0x80; hi[1] = 0xBF;
|
|
lo[2] = 0x80; hi[2] = 0xBF;
|
|
} else if (b0 <= 0xF3) {
|
|
seq_len = 4;
|
|
lo[0] = 0x80; hi[0] = 0xBF;
|
|
lo[1] = 0x80; hi[1] = 0xBF;
|
|
lo[2] = 0x80; hi[2] = 0xBF;
|
|
} else { // b0 == 0xF4
|
|
seq_len = 4;
|
|
lo[0] = 0x80; hi[0] = 0x8F;
|
|
lo[1] = 0x80; hi[1] = 0xBF;
|
|
lo[2] = 0x80; hi[2] = 0xBF;
|
|
}
|
|
|
|
// Check continuation bytes against their specific valid ranges.
|
|
// The maximal subpart extends as far as bytes match.
|
|
size_t valid = 1; // lead byte counts
|
|
for (size_t i = 0; i < seq_len - 1 && valid < len; i++) {
|
|
unsigned char cb = p[valid];
|
|
if (cb < lo[i] || cb > hi[i]) break;
|
|
valid++;
|
|
}
|
|
|
|
// If we matched all bytes, the sequence is structurally valid
|
|
// (shouldn't happen since we're called on an error), but cap
|
|
// to avoid skipping a valid sequence.
|
|
if (valid == seq_len) return valid;
|
|
|
|
return valid;
|
|
}
|
|
|
|
// Trim trailing bytes that form a valid-but-incomplete UTF-8 sequence.
|
|
// Only trims sequences whose bytes so far match Table 3-7 ranges (i.e.,
|
|
// truly partial sequences that could be completed by future input).
|
|
// Invalid lead bytes (C0, C1, F5-FF) or mismatched continuations are NOT
|
|
// trimmed — they will be handled as errors by DecodeUTF8.
|
|
static size_t TrimValidPartialUTF8(const uint8_t* input, size_t len) {
|
|
if (len == 0) return 0;
|
|
|
|
// Find the start of a potential trailing partial sequence by scanning
|
|
// backwards from the end. We look for a lead byte (C2-F4) that could
|
|
// start a multi-byte sequence, possibly followed by continuation bytes.
|
|
//
|
|
// We check up to the last 4 bytes (max UTF-8 sequence length).
|
|
size_t check_start = len > 4 ? len - 4 : 0;
|
|
for (size_t pos = len; pos > check_start; pos--) {
|
|
unsigned char b = input[pos - 1];
|
|
|
|
// Skip continuation bytes — they might belong to the partial sequence.
|
|
if ((b & 0xC0) == 0x80) continue;
|
|
|
|
// Found a non-continuation byte. Only valid multi-byte leads (C2-F4)
|
|
// can start a partial sequence worth trimming. Anything else (ASCII,
|
|
// C0, C1, F5-FF) should be consumed by DecodeUTF8.
|
|
if (b < 0xC2 || b > 0xF4) return len;
|
|
|
|
// Determine expected sequence length from the lead byte.
|
|
size_t expected;
|
|
if (b <= 0xDF)
|
|
expected = 2;
|
|
else if (b <= 0xEF)
|
|
expected = 3;
|
|
else
|
|
expected = 4;
|
|
|
|
size_t seq_remaining = len - (pos - 1);
|
|
|
|
// If we have all expected bytes, the sequence is complete (not partial).
|
|
if (seq_remaining >= expected) return len;
|
|
|
|
// Check if the trailing bytes form a valid prefix using MaximalSubpart.
|
|
const unsigned char* seq_start = input + pos - 1;
|
|
size_t subpart = MaximalSubpart(seq_start, seq_remaining);
|
|
|
|
// Only trim if ALL trailing bytes are part of the valid prefix
|
|
// (the sequence is valid-so-far but incomplete).
|
|
if (subpart == seq_remaining) {
|
|
return pos - 1;
|
|
}
|
|
|
|
// The sequence is ill-formed, don't trim — let DecodeUTF8 handle it.
|
|
return len;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
// Decode the UTF-8 text in input into output. Returns the number of decoded
|
|
// characters. This function assumes output is large enough.
|
|
//
|
|
// This function handles malformed UTF-8 sequences by inserting a
|
|
// replacement character (U+FFFD) following the W3C/Unicode "U+FFFD
|
|
// Substitution of Maximal Subparts" algorithm and continuing to decode.
|
|
// This function will consume the entire input no matter what.
|
|
size_t DecodeUTF8(const uint8_t* HWY_RESTRICT input,
|
|
size_t count,
|
|
char32_t* output) {
|
|
// Its possible for our input to be empty since DecodeUTF8UntilControlSeq
|
|
// doesn't check for this.
|
|
if (count == 0) {
|
|
return 0;
|
|
}
|
|
|
|
// Decode UTF-8 to UTF-32, replacing invalid sequences with U+FFFD.
|
|
const char* in = reinterpret_cast<const char*>(input);
|
|
size_t remaining = count;
|
|
char32_t* out = output;
|
|
while (remaining > 0) {
|
|
auto r = simdutf::convert_utf8_to_utf32_with_errors(in, remaining, out);
|
|
|
|
// If the decode was a full success then we're done!
|
|
if (r.error == simdutf::SUCCESS) {
|
|
out += r.count;
|
|
break;
|
|
}
|
|
|
|
// On error, r.count is the input byte position of the error.
|
|
// The output buffer is already written up to that point, but
|
|
// we need count_utf8 to find how many char32_t that produced.
|
|
out += simdutf::count_utf8(in, r.count);
|
|
|
|
// Compute the maximal subpart at the error position and emit
|
|
// a single U+FFFD for it.
|
|
const unsigned char* err_pos =
|
|
reinterpret_cast<const unsigned char*>(in + r.count);
|
|
size_t err_remaining = remaining - r.count;
|
|
size_t skip = r.count + MaximalSubpart(err_pos, err_remaining);
|
|
|
|
*out++ = 0xFFFD;
|
|
|
|
in += skip;
|
|
remaining -= skip;
|
|
}
|
|
|
|
return static_cast<size_t>(out - output);
|
|
}
|
|
|
|
/// Decode the UTF-8 text in input into output until an escape
|
|
/// character is found. This returns the number of bytes consumed
|
|
/// from input and writes the number of decoded characters into
|
|
/// output_count.
|
|
///
|
|
/// This may return a value less than count even with no escape
|
|
/// character if the input ends with an incomplete UTF-8 sequence.
|
|
/// The caller should check the next byte manually to determine
|
|
/// if it is incomplete.
|
|
template <class D>
|
|
size_t DecodeUTF8UntilControlSeqImpl(D d,
|
|
const T* HWY_RESTRICT input,
|
|
size_t count,
|
|
char32_t* output,
|
|
size_t* output_count) {
|
|
const size_t N = hn::Lanes(d);
|
|
|
|
// Create a vector containing ESC since that denotes a control sequence.
|
|
const hn::Vec<D> esc_vec = Set(d, 0x1B);
|
|
|
|
// Compare N elements at a time.
|
|
size_t i = 0;
|
|
for (; i + N <= count; i += N) {
|
|
// Load the N elements from our input into a vector.
|
|
const hn::Vec<D> input_vec = hn::LoadU(d, input + i);
|
|
|
|
// If we don't have any escapes we keep going. We want to accumulate
|
|
// the largest possible valid UTF-8 sequence before decoding.
|
|
// TODO(mitchellh): benchmark this vs decoding every time
|
|
const size_t esc_idx = IndexOfChunk(d, esc_vec, input_vec);
|
|
if (esc_idx == kNotFound) {
|
|
continue;
|
|
}
|
|
|
|
// We have an ESC char, decode up to this point. We start by assuming
|
|
// a valid UTF-8 sequence and slow-path into error handling if we find
|
|
// an invalid sequence.
|
|
*output_count = DecodeUTF8(input, i + esc_idx, output);
|
|
return i + esc_idx;
|
|
}
|
|
|
|
// If we have leftover input then we decode it one byte at a time (slow!)
|
|
// using pretty much the same logic as above.
|
|
if (i != count) {
|
|
const hn::CappedTag<T, 1> d1;
|
|
using D1 = decltype(d1);
|
|
const hn::Vec<D1> esc1 = Set(d1, hn::GetLane(esc_vec));
|
|
for (; i < count; ++i) {
|
|
const hn::Vec<D1> input_vec = hn::LoadU(d1, input + i);
|
|
const size_t esc_idx = IndexOfChunk(d1, esc1, input_vec);
|
|
if (esc_idx == kNotFound) {
|
|
continue;
|
|
}
|
|
|
|
*output_count = DecodeUTF8(input, i + esc_idx, output);
|
|
return i + esc_idx;
|
|
}
|
|
}
|
|
|
|
// If we reached this point, its possible for our input to have an
|
|
// incomplete sequence because we're consuming the full input. We need
|
|
// to trim any incomplete sequences from the end of the input.
|
|
//
|
|
// We use our own trim instead of simdutf::trim_partial_utf8 because
|
|
// we only want to trim sequences that are valid-so-far (true partial
|
|
// sequences that may be completed by future input). Invalid bytes
|
|
// like C0, C1, F5-FF should NOT be trimmed — they should be passed
|
|
// through to DecodeUTF8 which will replace them with U+FFFD per the
|
|
// maximal subpart algorithm.
|
|
const size_t trimmed_len = TrimValidPartialUTF8(input, i);
|
|
*output_count = DecodeUTF8(input, trimmed_len, output);
|
|
return trimmed_len;
|
|
}
|
|
|
|
size_t DecodeUTF8UntilControlSeq(const uint8_t* HWY_RESTRICT input,
|
|
size_t count,
|
|
char32_t* output,
|
|
size_t* output_count) {
|
|
const hn::ScalableTag<uint8_t> d;
|
|
return DecodeUTF8UntilControlSeqImpl(d, input, count, output, output_count);
|
|
}
|
|
|
|
} // namespace HWY_NAMESPACE
|
|
} // namespace ghostty
|
|
HWY_AFTER_NAMESPACE();
|
|
|
|
// HWY_ONCE is true for only one of the target passes
|
|
#if HWY_ONCE
|
|
|
|
namespace ghostty {
|
|
|
|
HWY_EXPORT(DecodeUTF8UntilControlSeq);
|
|
|
|
size_t DecodeUTF8UntilControlSeq(const uint8_t* HWY_RESTRICT input,
|
|
size_t count,
|
|
char32_t* output,
|
|
size_t* output_count) {
|
|
return HWY_DYNAMIC_DISPATCH(DecodeUTF8UntilControlSeq)(input, count, output,
|
|
output_count);
|
|
}
|
|
|
|
} // namespace ghostty
|
|
|
|
extern "C" {
|
|
|
|
size_t ghostty_simd_decode_utf8_until_control_seq(const uint8_t* HWY_RESTRICT
|
|
input,
|
|
size_t count,
|
|
char32_t* output,
|
|
size_t* output_count) {
|
|
return ghostty::DecodeUTF8UntilControlSeq(input, count, output, output_count);
|
|
}
|
|
|
|
} // extern "C"
|
|
|
|
#endif // HWY_ONCE
|