terminal: use highway-based indexOf to support all targets

This commit is contained in:
Mitchell Hashimoto
2024-02-03 16:49:54 -08:00
parent 36b0db2a72
commit c751619b7e
5 changed files with 129 additions and 14 deletions

104
src/simd/index_of.cpp Normal file
View File

@@ -0,0 +1,104 @@
// Generates code for every target that this compiler can support.
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "simd/index_of.cpp" // this file
#include <hwy/foreach_target.h> // must come before highway.h
#include <hwy/highway.h>
HWY_BEFORE_NAMESPACE();
namespace ghostty {
namespace HWY_NAMESPACE {
namespace hn = hwy::HWY_NAMESPACE;
// Return the index of the first occurrence of `needle` in `input` or
// `count` if not found.
template <class D, typename T = hn::TFromD<D>>
size_t IndexOfImpl(D d, T needle, const T* HWY_RESTRICT input, size_t count) {
// Note: due to the simplicity of this operation and the general complexity
// of SIMD, I'm going to overly comment this function to help explain the
// implementation for future maintainers.
// The number of lanes in the vector type.
const size_t N = hn::Lanes(d);
// Create a vector with all lanes set to `needle` so we can do a lane-wise
// comparison with the input.
const hn::Vec<D> needle_vec = Set(d, needle);
// Compare N elements at a time.
size_t i = 0;
for (; i + N <= count; i += N) {
// Load the N elements from our input into a vector.
const hn::Vec<D> input_vec = hn::LoadU(d, input + i);
// Compare the input vector with the needle vector. This produces
// a vector where each lane is 0xFF if the corresponding lane in
// `input_vec` is equal to the corresponding lane in `needle_vec`.
const hn::Mask<D> eq_mask = hn::Eq(needle_vec, input_vec);
// Find the index within the vector where the first true value is.
const intptr_t pos = hn::FindFirstTrue(d, eq_mask);
// If we found a match, return the index into the input.
if (pos >= 0) return i + static_cast<size_t>(pos);
}
// Since we compare N elements at a time, we may have some elements left
// if count modulo N != 0. We need to scan the remaining elements. To
// be simple, we search one element at a time.
if (i != count) {
// Create a new vector with only one relevant lane.
const hn::CappedTag<T, 1> d1;
using D1 = decltype(d1);
// Get an equally sized needle vector with only one lane.
const hn::Vec<D1> needle1 = Set(d1, GetLane(needle_vec));
// Go through the remaining elements and do similar logic to
// the previous loop to find any matches.
for (; i < count; ++i) {
const hn::Vec<D1> input_vec = hn::LoadU(d1, input + i);
const hn::Mask<D1> eq_mask = hn::Eq(needle1, input_vec);
if (hn::AllTrue(d1, eq_mask)) return i;
}
}
return count;
}
size_t IndexOf(const uint8_t needle,
const uint8_t* HWY_RESTRICT input,
size_t count) {
const hn::ScalableTag<uint8_t> d;
return IndexOfImpl(d, needle, input, count);
}
} // namespace HWY_NAMESPACE
} // namespace ghostty
HWY_AFTER_NAMESPACE();
// HWY_ONCE is true for only one of the target passes
#if HWY_ONCE
namespace ghostty {
// This macro declares a static array used for dynamic dispatch.
HWY_EXPORT(IndexOf);
size_t IndexOf(const uint8_t needle,
const uint8_t* HWY_RESTRICT input,
size_t count) {
return HWY_DYNAMIC_DISPATCH(IndexOf)(needle, input, count);
}
} // namespace ghostty
extern "C" {
size_t ghostty_simd_index_of(const uint8_t needle, const uint8_t* HWY_RESTRICT input, size_t count) {
return ghostty::IndexOf(needle, input, count);
}
}
#endif // HWY_ONCE

View File

@@ -99,8 +99,22 @@ fn testIndexOf(func: *const IndexOf) !void {
, ' ').?);
}
pub const Hwy = struct {
extern "c" fn ghostty_simd_index_of(
needle: u8,
input: [*]const u8,
count: usize,
) usize;
pub fn indexOf(input: []const u8, needle: u8) ?usize {
const result = ghostty_simd_index_of(needle, input.ptr, input.len);
return if (result == input.len) null else result;
}
};
test "indexOf" {
const v = isa.detect();
var it = v.iterator();
while (it.next()) |isa_v| try testIndexOf(indexOfFunc(isa_v));
try testIndexOf(&Hwy.indexOf);
}