mirror of
https://github.com/ghostty-org/ghostty.git
synced 2026-06-18 01:21:25 +00:00
pkg/highway: Darwin builds don't rely on Apple headers
This uses a custom fork of `hwy/targtes.cpp` that uses an extern function written in Zig to use Zig's standard CPU detection to avoid a dependency on Apple SDK headers. This is on the path to removing Apple SDK requirements to build libghostty-vt, but will require a lot more work outside of this. The goal is to get this out of our external dependencies first and then we can work on removing the internal side.
This commit is contained in:
@@ -7,7 +7,7 @@ pub fn build(b: *std.Build) !void {
|
||||
const upstream_ = b.lazyDependency("highway", .{});
|
||||
|
||||
const module = b.addModule("highway", .{
|
||||
.root_source_file = b.path("main.zig"),
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
@@ -15,6 +15,7 @@ pub fn build(b: *std.Build) !void {
|
||||
const lib = b.addLibrary(.{
|
||||
.name = "highway",
|
||||
.root_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/runtime_darwin.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
}),
|
||||
@@ -26,11 +27,6 @@ pub fn build(b: *std.Build) !void {
|
||||
module.addIncludePath(upstream.path(""));
|
||||
}
|
||||
|
||||
if (target.result.os.tag.isDarwin()) {
|
||||
const apple_sdk = @import("apple_sdk");
|
||||
try apple_sdk.addPaths(b, lib);
|
||||
}
|
||||
|
||||
if (target.result.abi.isAndroid()) {
|
||||
const android_ndk = @import("android_ndk");
|
||||
try android_ndk.addPaths(b, lib);
|
||||
@@ -93,12 +89,20 @@ pub fn build(b: *std.Build) !void {
|
||||
});
|
||||
}
|
||||
|
||||
lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"bridge.cpp"} });
|
||||
lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"src/cpp/bridge.cpp"} });
|
||||
if (target.result.os.tag.isDarwin()) {
|
||||
lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"src/cpp/targets_darwin.cpp"} });
|
||||
}
|
||||
|
||||
if (upstream_) |upstream| {
|
||||
lib.addCSourceFiles(.{
|
||||
.root = upstream.path(""),
|
||||
.flags = flags.items,
|
||||
.files = &.{
|
||||
.files = if (target.result.os.tag.isDarwin()) &.{
|
||||
// Darwin uses a local targets_darwin.cpp shim so the package doesn't
|
||||
// need Apple SDK headers for target detection.
|
||||
"hwy/per_target.cc",
|
||||
} else &.{
|
||||
// These provide the runtime target selection used by
|
||||
// HWY_DYNAMIC_DISPATCH. The benchmark, timer, print, and
|
||||
// aligned allocator support files are unused by Ghostty.
|
||||
@@ -119,7 +123,7 @@ pub fn build(b: *std.Build) !void {
|
||||
const test_exe = b.addTest(.{
|
||||
.name = "test",
|
||||
.root_module = b.createModule(.{
|
||||
.root_source_file = b.path("main.zig"),
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
}),
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
.lazy = true,
|
||||
},
|
||||
|
||||
.apple_sdk = .{ .path = "../apple-sdk" },
|
||||
.android_ndk = .{ .path = "../android-ndk" },
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
extern "c" fn hwy_supported_targets() i64;
|
||||
|
||||
pub const Targets = packed struct(i64) {
|
||||
// x86_64
|
||||
_reserved: u4 = 0,
|
||||
avx3_spr: bool = false,
|
||||
_reserved_5: u1 = 0,
|
||||
avx3_zen4: bool = false,
|
||||
avx3_dl: bool = false,
|
||||
avx3: bool = false,
|
||||
avx2: bool = false,
|
||||
_reserved_10: u1 = 0,
|
||||
sse4: bool = false,
|
||||
ssse3: bool = false,
|
||||
_reserved_13: u1 = 0, // SSE3 reserved
|
||||
sse2: bool = false,
|
||||
_reserved_15_23: u9 = 0,
|
||||
|
||||
// aarch64
|
||||
sve2_128: bool = false,
|
||||
sve_256: bool = false,
|
||||
sve2: bool = false,
|
||||
sve: bool = false,
|
||||
neon: bool = false,
|
||||
neon_without_aes: bool = false,
|
||||
_reserved_30_36: u6 = 0,
|
||||
|
||||
// risc-v
|
||||
rvv: bool = false,
|
||||
_reserved_38_46: u9 = 0,
|
||||
|
||||
// IBM Power
|
||||
ppc10: bool = false,
|
||||
ppc9: bool = false,
|
||||
ppc8: bool = false,
|
||||
z15: bool = false,
|
||||
z14: bool = false,
|
||||
_reserved_52_57: u6 = 0,
|
||||
|
||||
// WebAssembly
|
||||
wasm_emu256: bool = false,
|
||||
wasm: bool = false,
|
||||
_reserved_60_61: u2 = 0,
|
||||
|
||||
// Emulation
|
||||
emu128: bool = false,
|
||||
scalar: bool = false,
|
||||
_reserved_63: u1 = 0,
|
||||
};
|
||||
|
||||
pub fn supported_targets() Targets {
|
||||
return @bitCast(hwy_supported_targets());
|
||||
}
|
||||
|
||||
test {
|
||||
_ = supported_targets();
|
||||
}
|
||||
@@ -2,10 +2,7 @@
|
||||
#include <hwy/base.h>
|
||||
#include <hwy/targets.h>
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace hwy {
|
||||
namespace {
|
||||
@@ -17,16 +14,6 @@ namespace {
|
||||
WarnFunc g_warn_func = nullptr;
|
||||
AbortFunc g_abort_func = nullptr;
|
||||
|
||||
// Mirror the upstream behavior closely enough for Highway's internal callers:
|
||||
// format into a fixed buffer, fall back to a generic error if formatting fails,
|
||||
// and then dispatch to either the registered hook or stderr.
|
||||
void format_message(const char* format, va_list args, char* buffer, size_t size) {
|
||||
const int written = vsnprintf(buffer, size, format, args);
|
||||
if (written < 0) {
|
||||
snprintf(buffer, size, "%s", "failed to format highway message");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
WarnFunc& GetWarnFunc() {
|
||||
@@ -48,34 +35,17 @@ AbortFunc SetAbortFunc(AbortFunc func) {
|
||||
}
|
||||
|
||||
void Warn(const char* file, int line, const char* format, ...) {
|
||||
char message[1024];
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
format_message(format, args, message, sizeof(message));
|
||||
va_end(args);
|
||||
|
||||
if (WarnFunc func = g_warn_func) {
|
||||
func(file, line, message);
|
||||
return;
|
||||
if (WarnFunc func = __atomic_load_n(&g_warn_func, __ATOMIC_SEQ_CST)) {
|
||||
func(file, line, format);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s:%d: %s\n", file, line, message);
|
||||
}
|
||||
|
||||
HWY_NORETURN void Abort(const char* file, int line, const char* format, ...) {
|
||||
char message[1024];
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
format_message(format, args, message, sizeof(message));
|
||||
va_end(args);
|
||||
|
||||
if (AbortFunc func = g_abort_func) {
|
||||
func(file, line, message);
|
||||
} else {
|
||||
fprintf(stderr, "%s:%d: %s\n", file, line, message);
|
||||
if (AbortFunc func = __atomic_load_n(&g_abort_func, __ATOMIC_SEQ_CST)) {
|
||||
func(file, line, format);
|
||||
}
|
||||
|
||||
abort();
|
||||
__builtin_trap();
|
||||
}
|
||||
|
||||
} // namespace hwy
|
||||
61
pkg/highway/src/cpp/targets_darwin.cpp
Normal file
61
pkg/highway/src/cpp/targets_darwin.cpp
Normal file
@@ -0,0 +1,61 @@
|
||||
#include <hwy/base.h>
|
||||
#include <hwy/detect_targets.h>
|
||||
#include <hwy/highway.h>
|
||||
#include <hwy/targets.h>
|
||||
|
||||
namespace hwy {
|
||||
|
||||
extern "C" int64_t ghostty_hwy_detect_targets();
|
||||
|
||||
static int64_t DetectTargets() {
|
||||
int64_t bits = HWY_SCALAR | HWY_EMU128;
|
||||
|
||||
#if (HWY_ARCH_X86 || HWY_ARCH_ARM) && HWY_HAVE_RUNTIME_DISPATCH
|
||||
bits |= ghostty_hwy_detect_targets();
|
||||
#else
|
||||
bits |= HWY_ENABLED_BASELINE;
|
||||
#endif
|
||||
|
||||
if ((bits & HWY_ENABLED_BASELINE) != HWY_ENABLED_BASELINE) {
|
||||
const uint64_t bits_u = static_cast<uint64_t>(bits);
|
||||
const uint64_t enabled = static_cast<uint64_t>(HWY_ENABLED_BASELINE);
|
||||
HWY_WARN("CPU supports 0x%08x%08x, software requires 0x%08x%08x\n",
|
||||
static_cast<uint32_t>(bits_u >> 32),
|
||||
static_cast<uint32_t>(bits_u & 0xFFFFFFFF),
|
||||
static_cast<uint32_t>(enabled >> 32),
|
||||
static_cast<uint32_t>(enabled & 0xFFFFFFFF));
|
||||
}
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
static int64_t supported_targets_for_test_ = 0;
|
||||
static int64_t supported_mask_ = LimitsMax<int64_t>();
|
||||
|
||||
HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets) {
|
||||
supported_mask_ = static_cast<int64_t>(~disabled_targets);
|
||||
GetChosenTarget().DeInit();
|
||||
}
|
||||
|
||||
HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets) {
|
||||
supported_targets_for_test_ = targets;
|
||||
GetChosenTarget().DeInit();
|
||||
}
|
||||
|
||||
HWY_DLLEXPORT int64_t SupportedTargets() {
|
||||
int64_t targets = supported_targets_for_test_;
|
||||
if (HWY_LIKELY(targets == 0)) {
|
||||
targets = DetectTargets();
|
||||
GetChosenTarget().Update(targets);
|
||||
}
|
||||
|
||||
targets &= supported_mask_;
|
||||
return targets == 0 ? HWY_STATIC_TARGET : targets;
|
||||
}
|
||||
|
||||
HWY_DLLEXPORT ChosenTarget& GetChosenTarget() {
|
||||
static ChosenTarget chosen_target;
|
||||
return chosen_target;
|
||||
}
|
||||
|
||||
} // namespace hwy
|
||||
16
pkg/highway/src/main.zig
Normal file
16
pkg/highway/src/main.zig
Normal file
@@ -0,0 +1,16 @@
|
||||
const builtin = @import("builtin");
|
||||
|
||||
extern "c" fn hwy_supported_targets() i64;
|
||||
|
||||
pub const Targets = @import("targets.zig").Targets;
|
||||
|
||||
pub fn supported_targets() Targets {
|
||||
return @bitCast(hwy_supported_targets());
|
||||
}
|
||||
|
||||
test {
|
||||
_ = supported_targets();
|
||||
if (builtin.os.tag.isDarwin()) {
|
||||
_ = @import("runtime_darwin.zig");
|
||||
}
|
||||
}
|
||||
155
pkg/highway/src/runtime_darwin.zig
Normal file
155
pkg/highway/src/runtime_darwin.zig
Normal file
@@ -0,0 +1,155 @@
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
const Target = std.Target;
|
||||
const HwyTargets = @import("targets.zig").Targets;
|
||||
|
||||
/// Detect Highway targets using Zig's standard library CPU feature detection.
|
||||
///
|
||||
/// The logic is mostly identical to the Highway implementation, but we
|
||||
/// use Zig's built-in CPU feature detection instead of Highway so that we
|
||||
/// can strictly control access to Apple headers (and avoid them completely).
|
||||
pub export fn ghostty_hwy_detect_targets() callconv(.c) i64 {
|
||||
const native = std.zig.system.resolveTargetQuery(.{}) catch return 0;
|
||||
const cpu = native.cpu;
|
||||
|
||||
return switch (builtin.cpu.arch) {
|
||||
.x86_64, .x86 => detectX86(cpu),
|
||||
.aarch64, .aarch64_be => detectAarch64(cpu),
|
||||
else => 0,
|
||||
};
|
||||
}
|
||||
|
||||
fn detectX86(cpu: Target.Cpu) i64 {
|
||||
var t: HwyTargets = .{};
|
||||
|
||||
if (comptime builtin.cpu.arch == .x86_64) {
|
||||
t.sse2 = true;
|
||||
}
|
||||
|
||||
if (comptime builtin.cpu.arch == .x86) {
|
||||
if (cpu.has(.x86, .sse) and
|
||||
cpu.has(.x86, .sse2))
|
||||
{
|
||||
t.sse2 = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu.has(.x86, .sse3) and
|
||||
cpu.has(.x86, .ssse3))
|
||||
{
|
||||
t.ssse3 = true;
|
||||
}
|
||||
|
||||
if (cpu.has(.x86, .sse4_1) and
|
||||
cpu.has(.x86, .sse4_2) and
|
||||
cpu.has(.x86, .pclmul) and
|
||||
cpu.has(.x86, .aes))
|
||||
{
|
||||
t.sse4 = true;
|
||||
}
|
||||
|
||||
if (cpu.has(.x86, .avx) and
|
||||
cpu.has(.x86, .avx2) and
|
||||
cpu.has(.x86, .lzcnt) and
|
||||
cpu.has(.x86, .bmi) and
|
||||
cpu.has(.x86, .bmi2) and
|
||||
cpu.has(.x86, .fma) and
|
||||
cpu.has(.x86, .f16c))
|
||||
{
|
||||
t.avx2 = true;
|
||||
}
|
||||
|
||||
if (cpu.has(.x86, .avx512f) and
|
||||
cpu.has(.x86, .avx512vl) and
|
||||
cpu.has(.x86, .avx512dq) and
|
||||
cpu.has(.x86, .avx512bw) and
|
||||
cpu.has(.x86, .avx512cd))
|
||||
{
|
||||
t.avx3 = true;
|
||||
}
|
||||
|
||||
if (cpu.has(.x86, .avx512vnni) and
|
||||
cpu.has(.x86, .vpclmulqdq) and
|
||||
cpu.has(.x86, .avx512vbmi) and
|
||||
cpu.has(.x86, .avx512vbmi2) and
|
||||
cpu.has(.x86, .vaes) and
|
||||
cpu.has(.x86, .avx512vpopcntdq) and
|
||||
cpu.has(.x86, .avx512bitalg) and
|
||||
cpu.has(.x86, .gfni))
|
||||
{
|
||||
t.avx3_dl = true;
|
||||
}
|
||||
|
||||
if (t.avx3_dl and cpu.has(.x86, .avx512bf16)) {
|
||||
if (isAMD()) {
|
||||
t.avx3_zen4 = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu.has(.x86, .avx512fp16) and
|
||||
cpu.has(.x86, .avx512bf16))
|
||||
{
|
||||
t.avx3_spr = true;
|
||||
}
|
||||
|
||||
if (cpu.has(.x86, .avx10_1_256)) {
|
||||
if (cpu.has(.x86, .avx10_1_512)) {
|
||||
t.avx3_spr = true;
|
||||
t.avx3_dl = true;
|
||||
t.avx3 = true;
|
||||
}
|
||||
|
||||
if (cpu.has(.x86, .avx10_2_256)) {
|
||||
t.avx10_2 = true;
|
||||
if (cpu.has(.x86, .avx10_2_512)) {
|
||||
t.avx10_2_512 = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Darwin lazily saves AVX512 context on first use, so the XCR0 check
|
||||
// is handled by Zig's feature detection (which hardcodes has_avx512_save
|
||||
// to true on Darwin, matching LLVM's approach).
|
||||
|
||||
return @bitCast(t);
|
||||
}
|
||||
|
||||
fn detectAarch64(cpu: Target.Cpu) i64 {
|
||||
var t: HwyTargets = .{};
|
||||
|
||||
t.neon_without_aes = true;
|
||||
|
||||
if (cpu.has(.aarch64, .aes)) {
|
||||
t.neon = true;
|
||||
|
||||
if (cpu.has(.aarch64, .fullfp16) and
|
||||
cpu.has(.aarch64, .dotprod) and
|
||||
cpu.has(.aarch64, .bf16))
|
||||
{
|
||||
t.neon_bf16 = true;
|
||||
}
|
||||
}
|
||||
|
||||
return @bitCast(t);
|
||||
}
|
||||
|
||||
/// Check CPUID vendor string for "AuthenticAMD", matching Highway's IsAMD().
|
||||
/// Zig doesn't expose the vendor string, so we must use inline assembly.
|
||||
fn isAMD() bool {
|
||||
var eax: u32 = undefined;
|
||||
var ebx: u32 = undefined;
|
||||
var ecx: u32 = undefined;
|
||||
var edx: u32 = undefined;
|
||||
asm volatile ("cpuid"
|
||||
: [_] "={eax}" (eax),
|
||||
[_] "={ebx}" (ebx),
|
||||
[_] "={ecx}" (ecx),
|
||||
[_] "={edx}" (edx),
|
||||
: [_] "{eax}" (0),
|
||||
);
|
||||
|
||||
// "Auth" "enti" "cAMD"
|
||||
return ebx == 0x68747541 and
|
||||
ecx == 0x444d4163 and
|
||||
edx == 0x69746e65;
|
||||
}
|
||||
109
pkg/highway/src/targets.zig
Normal file
109
pkg/highway/src/targets.zig
Normal file
@@ -0,0 +1,109 @@
|
||||
const assert = @import("std").debug.assert;
|
||||
|
||||
pub const Targets = packed struct(i64) {
|
||||
// x86_64
|
||||
_reserved_0_2: u3 = 0,
|
||||
avx10_2_512: bool = false,
|
||||
avx3_spr: bool = false,
|
||||
avx10_2: bool = false,
|
||||
avx3_zen4: bool = false,
|
||||
avx3_dl: bool = false,
|
||||
avx3: bool = false,
|
||||
avx2: bool = false,
|
||||
_reserved_10: u1 = 0,
|
||||
sse4: bool = false,
|
||||
ssse3: bool = false,
|
||||
_reserved_13: u1 = 0,
|
||||
sse2: bool = false,
|
||||
_reserved_15_17: u3 = 0,
|
||||
|
||||
// aarch64
|
||||
sve2_128: bool = false,
|
||||
sve_256: bool = false,
|
||||
_reserved_20_22: u3 = 0,
|
||||
sve2: bool = false,
|
||||
sve: bool = false,
|
||||
_reserved_25: u1 = 0,
|
||||
neon_bf16: bool = false,
|
||||
_reserved_27: u1 = 0,
|
||||
neon: bool = false,
|
||||
neon_without_aes: bool = false,
|
||||
_reserved_30_36: u7 = 0,
|
||||
|
||||
// risc-v
|
||||
rvv: bool = false,
|
||||
_reserved_38_39: u2 = 0,
|
||||
|
||||
// LoongArch
|
||||
lasx: bool = false,
|
||||
lsx: bool = false,
|
||||
_reserved_42_46: u5 = 0,
|
||||
|
||||
// IBM Power
|
||||
ppc10: bool = false,
|
||||
ppc9: bool = false,
|
||||
ppc8: bool = false,
|
||||
z15: bool = false,
|
||||
z14: bool = false,
|
||||
_reserved_52_57: u6 = 0,
|
||||
|
||||
// WebAssembly
|
||||
wasm_emu256: bool = false,
|
||||
wasm: bool = false,
|
||||
_reserved_60: u1 = 0,
|
||||
|
||||
// Emulation
|
||||
emu128: bool = false,
|
||||
scalar: bool = false,
|
||||
_reserved_63: u1 = 0,
|
||||
|
||||
fn bitPos(comptime field_name: []const u8) comptime_int {
|
||||
return @bitOffsetOf(Targets, field_name);
|
||||
}
|
||||
|
||||
// Verify at comptime that each flag field matches its Highway bit constant.
|
||||
comptime {
|
||||
// x86
|
||||
assert(bitPos("avx10_2_512") == 3);
|
||||
assert(bitPos("avx3_spr") == 4);
|
||||
assert(bitPos("avx10_2") == 5);
|
||||
assert(bitPos("avx3_zen4") == 6);
|
||||
assert(bitPos("avx3_dl") == 7);
|
||||
assert(bitPos("avx3") == 8);
|
||||
assert(bitPos("avx2") == 9);
|
||||
assert(bitPos("sse4") == 11);
|
||||
assert(bitPos("ssse3") == 12);
|
||||
assert(bitPos("sse2") == 14);
|
||||
|
||||
// aarch64
|
||||
assert(bitPos("sve2_128") == 18);
|
||||
assert(bitPos("sve_256") == 19);
|
||||
assert(bitPos("sve2") == 23);
|
||||
assert(bitPos("sve") == 24);
|
||||
assert(bitPos("neon_bf16") == 26);
|
||||
assert(bitPos("neon") == 28);
|
||||
assert(bitPos("neon_without_aes") == 29);
|
||||
|
||||
// risc-v
|
||||
assert(bitPos("rvv") == 37);
|
||||
|
||||
// LoongArch
|
||||
assert(bitPos("lasx") == 40);
|
||||
assert(bitPos("lsx") == 41);
|
||||
|
||||
// IBM Power
|
||||
assert(bitPos("ppc10") == 47);
|
||||
assert(bitPos("ppc9") == 48);
|
||||
assert(bitPos("ppc8") == 49);
|
||||
assert(bitPos("z15") == 50);
|
||||
assert(bitPos("z14") == 51);
|
||||
|
||||
// WebAssembly
|
||||
assert(bitPos("wasm_emu256") == 58);
|
||||
assert(bitPos("wasm") == 59);
|
||||
|
||||
// Emulation
|
||||
assert(bitPos("emu128") == 61);
|
||||
assert(bitPos("scalar") == 62);
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user