pkg/highway: Darwin builds don't rely on Apple headers

This uses a custom fork of `hwy/targtes.cpp` that uses an extern
function written in Zig to use Zig's standard CPU detection to avoid
a dependency on Apple SDK headers.

This is on the path to removing Apple SDK requirements to build 
libghostty-vt, but will require a lot more work outside of this. The goal 
is to get this out of our external dependencies first and then we can
work on removing the internal side.
This commit is contained in:
Mitchell Hashimoto
2026-04-23 14:23:20 -07:00
parent 2f1a30ddb0
commit c642e3104b
8 changed files with 359 additions and 102 deletions

View File

@@ -7,7 +7,7 @@ pub fn build(b: *std.Build) !void {
const upstream_ = b.lazyDependency("highway", .{});
const module = b.addModule("highway", .{
.root_source_file = b.path("main.zig"),
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
@@ -15,6 +15,7 @@ pub fn build(b: *std.Build) !void {
const lib = b.addLibrary(.{
.name = "highway",
.root_module = b.createModule(.{
.root_source_file = b.path("src/runtime_darwin.zig"),
.target = target,
.optimize = optimize,
}),
@@ -26,11 +27,6 @@ pub fn build(b: *std.Build) !void {
module.addIncludePath(upstream.path(""));
}
if (target.result.os.tag.isDarwin()) {
const apple_sdk = @import("apple_sdk");
try apple_sdk.addPaths(b, lib);
}
if (target.result.abi.isAndroid()) {
const android_ndk = @import("android_ndk");
try android_ndk.addPaths(b, lib);
@@ -93,12 +89,20 @@ pub fn build(b: *std.Build) !void {
});
}
lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"bridge.cpp"} });
lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"src/cpp/bridge.cpp"} });
if (target.result.os.tag.isDarwin()) {
lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"src/cpp/targets_darwin.cpp"} });
}
if (upstream_) |upstream| {
lib.addCSourceFiles(.{
.root = upstream.path(""),
.flags = flags.items,
.files = &.{
.files = if (target.result.os.tag.isDarwin()) &.{
// Darwin uses a local targets_darwin.cpp shim so the package doesn't
// need Apple SDK headers for target detection.
"hwy/per_target.cc",
} else &.{
// These provide the runtime target selection used by
// HWY_DYNAMIC_DISPATCH. The benchmark, timer, print, and
// aligned allocator support files are unused by Ghostty.
@@ -119,7 +123,7 @@ pub fn build(b: *std.Build) !void {
const test_exe = b.addTest(.{
.name = "test",
.root_module = b.createModule(.{
.root_source_file = b.path("main.zig"),
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
}),

View File

@@ -11,7 +11,6 @@
.lazy = true,
},
.apple_sdk = .{ .path = "../apple-sdk" },
.android_ndk = .{ .path = "../android-ndk" },
},
}

View File

@@ -1,57 +0,0 @@
extern "c" fn hwy_supported_targets() i64;
pub const Targets = packed struct(i64) {
// x86_64
_reserved: u4 = 0,
avx3_spr: bool = false,
_reserved_5: u1 = 0,
avx3_zen4: bool = false,
avx3_dl: bool = false,
avx3: bool = false,
avx2: bool = false,
_reserved_10: u1 = 0,
sse4: bool = false,
ssse3: bool = false,
_reserved_13: u1 = 0, // SSE3 reserved
sse2: bool = false,
_reserved_15_23: u9 = 0,
// aarch64
sve2_128: bool = false,
sve_256: bool = false,
sve2: bool = false,
sve: bool = false,
neon: bool = false,
neon_without_aes: bool = false,
_reserved_30_36: u6 = 0,
// risc-v
rvv: bool = false,
_reserved_38_46: u9 = 0,
// IBM Power
ppc10: bool = false,
ppc9: bool = false,
ppc8: bool = false,
z15: bool = false,
z14: bool = false,
_reserved_52_57: u6 = 0,
// WebAssembly
wasm_emu256: bool = false,
wasm: bool = false,
_reserved_60_61: u2 = 0,
// Emulation
emu128: bool = false,
scalar: bool = false,
_reserved_63: u1 = 0,
};
pub fn supported_targets() Targets {
return @bitCast(hwy_supported_targets());
}
test {
_ = supported_targets();
}

View File

@@ -2,10 +2,7 @@
#include <hwy/base.h>
#include <hwy/targets.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
namespace hwy {
namespace {
@@ -17,16 +14,6 @@ namespace {
WarnFunc g_warn_func = nullptr;
AbortFunc g_abort_func = nullptr;
// Mirror the upstream behavior closely enough for Highway's internal callers:
// format into a fixed buffer, fall back to a generic error if formatting fails,
// and then dispatch to either the registered hook or stderr.
void format_message(const char* format, va_list args, char* buffer, size_t size) {
const int written = vsnprintf(buffer, size, format, args);
if (written < 0) {
snprintf(buffer, size, "%s", "failed to format highway message");
}
}
} // namespace
WarnFunc& GetWarnFunc() {
@@ -48,34 +35,17 @@ AbortFunc SetAbortFunc(AbortFunc func) {
}
void Warn(const char* file, int line, const char* format, ...) {
char message[1024];
va_list args;
va_start(args, format);
format_message(format, args, message, sizeof(message));
va_end(args);
if (WarnFunc func = g_warn_func) {
func(file, line, message);
return;
if (WarnFunc func = __atomic_load_n(&g_warn_func, __ATOMIC_SEQ_CST)) {
func(file, line, format);
}
fprintf(stderr, "%s:%d: %s\n", file, line, message);
}
HWY_NORETURN void Abort(const char* file, int line, const char* format, ...) {
char message[1024];
va_list args;
va_start(args, format);
format_message(format, args, message, sizeof(message));
va_end(args);
if (AbortFunc func = g_abort_func) {
func(file, line, message);
} else {
fprintf(stderr, "%s:%d: %s\n", file, line, message);
if (AbortFunc func = __atomic_load_n(&g_abort_func, __ATOMIC_SEQ_CST)) {
func(file, line, format);
}
abort();
__builtin_trap();
}
} // namespace hwy

View File

@@ -0,0 +1,61 @@
#include <hwy/base.h>
#include <hwy/detect_targets.h>
#include <hwy/highway.h>
#include <hwy/targets.h>
namespace hwy {
extern "C" int64_t ghostty_hwy_detect_targets();
static int64_t DetectTargets() {
int64_t bits = HWY_SCALAR | HWY_EMU128;
#if (HWY_ARCH_X86 || HWY_ARCH_ARM) && HWY_HAVE_RUNTIME_DISPATCH
bits |= ghostty_hwy_detect_targets();
#else
bits |= HWY_ENABLED_BASELINE;
#endif
if ((bits & HWY_ENABLED_BASELINE) != HWY_ENABLED_BASELINE) {
const uint64_t bits_u = static_cast<uint64_t>(bits);
const uint64_t enabled = static_cast<uint64_t>(HWY_ENABLED_BASELINE);
HWY_WARN("CPU supports 0x%08x%08x, software requires 0x%08x%08x\n",
static_cast<uint32_t>(bits_u >> 32),
static_cast<uint32_t>(bits_u & 0xFFFFFFFF),
static_cast<uint32_t>(enabled >> 32),
static_cast<uint32_t>(enabled & 0xFFFFFFFF));
}
return bits;
}
static int64_t supported_targets_for_test_ = 0;
static int64_t supported_mask_ = LimitsMax<int64_t>();
HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets) {
supported_mask_ = static_cast<int64_t>(~disabled_targets);
GetChosenTarget().DeInit();
}
HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets) {
supported_targets_for_test_ = targets;
GetChosenTarget().DeInit();
}
HWY_DLLEXPORT int64_t SupportedTargets() {
int64_t targets = supported_targets_for_test_;
if (HWY_LIKELY(targets == 0)) {
targets = DetectTargets();
GetChosenTarget().Update(targets);
}
targets &= supported_mask_;
return targets == 0 ? HWY_STATIC_TARGET : targets;
}
HWY_DLLEXPORT ChosenTarget& GetChosenTarget() {
static ChosenTarget chosen_target;
return chosen_target;
}
} // namespace hwy

16
pkg/highway/src/main.zig Normal file
View File

@@ -0,0 +1,16 @@
const builtin = @import("builtin");
extern "c" fn hwy_supported_targets() i64;
pub const Targets = @import("targets.zig").Targets;
pub fn supported_targets() Targets {
return @bitCast(hwy_supported_targets());
}
test {
_ = supported_targets();
if (builtin.os.tag.isDarwin()) {
_ = @import("runtime_darwin.zig");
}
}

View File

@@ -0,0 +1,155 @@
const builtin = @import("builtin");
const std = @import("std");
const Target = std.Target;
const HwyTargets = @import("targets.zig").Targets;
/// Detect Highway targets using Zig's standard library CPU feature detection.
///
/// The logic is mostly identical to the Highway implementation, but we
/// use Zig's built-in CPU feature detection instead of Highway so that we
/// can strictly control access to Apple headers (and avoid them completely).
pub export fn ghostty_hwy_detect_targets() callconv(.c) i64 {
const native = std.zig.system.resolveTargetQuery(.{}) catch return 0;
const cpu = native.cpu;
return switch (builtin.cpu.arch) {
.x86_64, .x86 => detectX86(cpu),
.aarch64, .aarch64_be => detectAarch64(cpu),
else => 0,
};
}
fn detectX86(cpu: Target.Cpu) i64 {
var t: HwyTargets = .{};
if (comptime builtin.cpu.arch == .x86_64) {
t.sse2 = true;
}
if (comptime builtin.cpu.arch == .x86) {
if (cpu.has(.x86, .sse) and
cpu.has(.x86, .sse2))
{
t.sse2 = true;
}
}
if (cpu.has(.x86, .sse3) and
cpu.has(.x86, .ssse3))
{
t.ssse3 = true;
}
if (cpu.has(.x86, .sse4_1) and
cpu.has(.x86, .sse4_2) and
cpu.has(.x86, .pclmul) and
cpu.has(.x86, .aes))
{
t.sse4 = true;
}
if (cpu.has(.x86, .avx) and
cpu.has(.x86, .avx2) and
cpu.has(.x86, .lzcnt) and
cpu.has(.x86, .bmi) and
cpu.has(.x86, .bmi2) and
cpu.has(.x86, .fma) and
cpu.has(.x86, .f16c))
{
t.avx2 = true;
}
if (cpu.has(.x86, .avx512f) and
cpu.has(.x86, .avx512vl) and
cpu.has(.x86, .avx512dq) and
cpu.has(.x86, .avx512bw) and
cpu.has(.x86, .avx512cd))
{
t.avx3 = true;
}
if (cpu.has(.x86, .avx512vnni) and
cpu.has(.x86, .vpclmulqdq) and
cpu.has(.x86, .avx512vbmi) and
cpu.has(.x86, .avx512vbmi2) and
cpu.has(.x86, .vaes) and
cpu.has(.x86, .avx512vpopcntdq) and
cpu.has(.x86, .avx512bitalg) and
cpu.has(.x86, .gfni))
{
t.avx3_dl = true;
}
if (t.avx3_dl and cpu.has(.x86, .avx512bf16)) {
if (isAMD()) {
t.avx3_zen4 = true;
}
}
if (cpu.has(.x86, .avx512fp16) and
cpu.has(.x86, .avx512bf16))
{
t.avx3_spr = true;
}
if (cpu.has(.x86, .avx10_1_256)) {
if (cpu.has(.x86, .avx10_1_512)) {
t.avx3_spr = true;
t.avx3_dl = true;
t.avx3 = true;
}
if (cpu.has(.x86, .avx10_2_256)) {
t.avx10_2 = true;
if (cpu.has(.x86, .avx10_2_512)) {
t.avx10_2_512 = true;
}
}
}
// Darwin lazily saves AVX512 context on first use, so the XCR0 check
// is handled by Zig's feature detection (which hardcodes has_avx512_save
// to true on Darwin, matching LLVM's approach).
return @bitCast(t);
}
fn detectAarch64(cpu: Target.Cpu) i64 {
var t: HwyTargets = .{};
t.neon_without_aes = true;
if (cpu.has(.aarch64, .aes)) {
t.neon = true;
if (cpu.has(.aarch64, .fullfp16) and
cpu.has(.aarch64, .dotprod) and
cpu.has(.aarch64, .bf16))
{
t.neon_bf16 = true;
}
}
return @bitCast(t);
}
/// Check CPUID vendor string for "AuthenticAMD", matching Highway's IsAMD().
/// Zig doesn't expose the vendor string, so we must use inline assembly.
fn isAMD() bool {
var eax: u32 = undefined;
var ebx: u32 = undefined;
var ecx: u32 = undefined;
var edx: u32 = undefined;
asm volatile ("cpuid"
: [_] "={eax}" (eax),
[_] "={ebx}" (ebx),
[_] "={ecx}" (ecx),
[_] "={edx}" (edx),
: [_] "{eax}" (0),
);
// "Auth" "enti" "cAMD"
return ebx == 0x68747541 and
ecx == 0x444d4163 and
edx == 0x69746e65;
}

109
pkg/highway/src/targets.zig Normal file
View File

@@ -0,0 +1,109 @@
const assert = @import("std").debug.assert;
pub const Targets = packed struct(i64) {
// x86_64
_reserved_0_2: u3 = 0,
avx10_2_512: bool = false,
avx3_spr: bool = false,
avx10_2: bool = false,
avx3_zen4: bool = false,
avx3_dl: bool = false,
avx3: bool = false,
avx2: bool = false,
_reserved_10: u1 = 0,
sse4: bool = false,
ssse3: bool = false,
_reserved_13: u1 = 0,
sse2: bool = false,
_reserved_15_17: u3 = 0,
// aarch64
sve2_128: bool = false,
sve_256: bool = false,
_reserved_20_22: u3 = 0,
sve2: bool = false,
sve: bool = false,
_reserved_25: u1 = 0,
neon_bf16: bool = false,
_reserved_27: u1 = 0,
neon: bool = false,
neon_without_aes: bool = false,
_reserved_30_36: u7 = 0,
// risc-v
rvv: bool = false,
_reserved_38_39: u2 = 0,
// LoongArch
lasx: bool = false,
lsx: bool = false,
_reserved_42_46: u5 = 0,
// IBM Power
ppc10: bool = false,
ppc9: bool = false,
ppc8: bool = false,
z15: bool = false,
z14: bool = false,
_reserved_52_57: u6 = 0,
// WebAssembly
wasm_emu256: bool = false,
wasm: bool = false,
_reserved_60: u1 = 0,
// Emulation
emu128: bool = false,
scalar: bool = false,
_reserved_63: u1 = 0,
fn bitPos(comptime field_name: []const u8) comptime_int {
return @bitOffsetOf(Targets, field_name);
}
// Verify at comptime that each flag field matches its Highway bit constant.
comptime {
// x86
assert(bitPos("avx10_2_512") == 3);
assert(bitPos("avx3_spr") == 4);
assert(bitPos("avx10_2") == 5);
assert(bitPos("avx3_zen4") == 6);
assert(bitPos("avx3_dl") == 7);
assert(bitPos("avx3") == 8);
assert(bitPos("avx2") == 9);
assert(bitPos("sse4") == 11);
assert(bitPos("ssse3") == 12);
assert(bitPos("sse2") == 14);
// aarch64
assert(bitPos("sve2_128") == 18);
assert(bitPos("sve_256") == 19);
assert(bitPos("sve2") == 23);
assert(bitPos("sve") == 24);
assert(bitPos("neon_bf16") == 26);
assert(bitPos("neon") == 28);
assert(bitPos("neon_without_aes") == 29);
// risc-v
assert(bitPos("rvv") == 37);
// LoongArch
assert(bitPos("lasx") == 40);
assert(bitPos("lsx") == 41);
// IBM Power
assert(bitPos("ppc10") == 47);
assert(bitPos("ppc9") == 48);
assert(bitPos("ppc8") == 49);
assert(bitPos("z15") == 50);
assert(bitPos("z14") == 51);
// WebAssembly
assert(bitPos("wasm_emu256") == 58);
assert(bitPos("wasm") == 59);
// Emulation
assert(bitPos("emu128") == 61);
assert(bitPos("scalar") == 62);
}
};