diff --git a/pkg/highway/build.zig b/pkg/highway/build.zig index 6ed721562..a7b409c96 100644 --- a/pkg/highway/build.zig +++ b/pkg/highway/build.zig @@ -7,7 +7,7 @@ pub fn build(b: *std.Build) !void { const upstream_ = b.lazyDependency("highway", .{}); const module = b.addModule("highway", .{ - .root_source_file = b.path("main.zig"), + .root_source_file = b.path("src/main.zig"), .target = target, .optimize = optimize, }); @@ -15,6 +15,7 @@ pub fn build(b: *std.Build) !void { const lib = b.addLibrary(.{ .name = "highway", .root_module = b.createModule(.{ + .root_source_file = b.path("src/runtime_darwin.zig"), .target = target, .optimize = optimize, }), @@ -26,11 +27,6 @@ pub fn build(b: *std.Build) !void { module.addIncludePath(upstream.path("")); } - if (target.result.os.tag.isDarwin()) { - const apple_sdk = @import("apple_sdk"); - try apple_sdk.addPaths(b, lib); - } - if (target.result.abi.isAndroid()) { const android_ndk = @import("android_ndk"); try android_ndk.addPaths(b, lib); @@ -93,12 +89,20 @@ pub fn build(b: *std.Build) !void { }); } - lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"bridge.cpp"} }); + lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"src/cpp/bridge.cpp"} }); + if (target.result.os.tag.isDarwin()) { + lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"src/cpp/targets_darwin.cpp"} }); + } + if (upstream_) |upstream| { lib.addCSourceFiles(.{ .root = upstream.path(""), .flags = flags.items, - .files = &.{ + .files = if (target.result.os.tag.isDarwin()) &.{ + // Darwin uses a local targets_darwin.cpp shim so the package doesn't + // need Apple SDK headers for target detection. + "hwy/per_target.cc", + } else &.{ // These provide the runtime target selection used by // HWY_DYNAMIC_DISPATCH. The benchmark, timer, print, and // aligned allocator support files are unused by Ghostty. @@ -119,7 +123,7 @@ pub fn build(b: *std.Build) !void { const test_exe = b.addTest(.{ .name = "test", .root_module = b.createModule(.{ - .root_source_file = b.path("main.zig"), + .root_source_file = b.path("src/main.zig"), .target = target, .optimize = optimize, }), diff --git a/pkg/highway/build.zig.zon b/pkg/highway/build.zig.zon index 4870d1db5..96b2768ae 100644 --- a/pkg/highway/build.zig.zon +++ b/pkg/highway/build.zig.zon @@ -11,7 +11,6 @@ .lazy = true, }, - .apple_sdk = .{ .path = "../apple-sdk" }, .android_ndk = .{ .path = "../android-ndk" }, }, } diff --git a/pkg/highway/main.zig b/pkg/highway/main.zig deleted file mode 100644 index 95ba6cda8..000000000 --- a/pkg/highway/main.zig +++ /dev/null @@ -1,57 +0,0 @@ -extern "c" fn hwy_supported_targets() i64; - -pub const Targets = packed struct(i64) { - // x86_64 - _reserved: u4 = 0, - avx3_spr: bool = false, - _reserved_5: u1 = 0, - avx3_zen4: bool = false, - avx3_dl: bool = false, - avx3: bool = false, - avx2: bool = false, - _reserved_10: u1 = 0, - sse4: bool = false, - ssse3: bool = false, - _reserved_13: u1 = 0, // SSE3 reserved - sse2: bool = false, - _reserved_15_23: u9 = 0, - - // aarch64 - sve2_128: bool = false, - sve_256: bool = false, - sve2: bool = false, - sve: bool = false, - neon: bool = false, - neon_without_aes: bool = false, - _reserved_30_36: u6 = 0, - - // risc-v - rvv: bool = false, - _reserved_38_46: u9 = 0, - - // IBM Power - ppc10: bool = false, - ppc9: bool = false, - ppc8: bool = false, - z15: bool = false, - z14: bool = false, - _reserved_52_57: u6 = 0, - - // WebAssembly - wasm_emu256: bool = false, - wasm: bool = false, - _reserved_60_61: u2 = 0, - - // Emulation - emu128: bool = false, - scalar: bool = false, - _reserved_63: u1 = 0, -}; - -pub fn supported_targets() Targets { - return @bitCast(hwy_supported_targets()); -} - -test { - _ = supported_targets(); -} diff --git a/pkg/highway/bridge.cpp b/pkg/highway/src/cpp/bridge.cpp similarity index 56% rename from pkg/highway/bridge.cpp rename to pkg/highway/src/cpp/bridge.cpp index 8f607f3e6..1ac0c0752 100644 --- a/pkg/highway/bridge.cpp +++ b/pkg/highway/src/cpp/bridge.cpp @@ -2,10 +2,7 @@ #include #include -#include #include -#include -#include namespace hwy { namespace { @@ -17,16 +14,6 @@ namespace { WarnFunc g_warn_func = nullptr; AbortFunc g_abort_func = nullptr; -// Mirror the upstream behavior closely enough for Highway's internal callers: -// format into a fixed buffer, fall back to a generic error if formatting fails, -// and then dispatch to either the registered hook or stderr. -void format_message(const char* format, va_list args, char* buffer, size_t size) { - const int written = vsnprintf(buffer, size, format, args); - if (written < 0) { - snprintf(buffer, size, "%s", "failed to format highway message"); - } -} - } // namespace WarnFunc& GetWarnFunc() { @@ -48,34 +35,17 @@ AbortFunc SetAbortFunc(AbortFunc func) { } void Warn(const char* file, int line, const char* format, ...) { - char message[1024]; - va_list args; - va_start(args, format); - format_message(format, args, message, sizeof(message)); - va_end(args); - - if (WarnFunc func = g_warn_func) { - func(file, line, message); - return; + if (WarnFunc func = __atomic_load_n(&g_warn_func, __ATOMIC_SEQ_CST)) { + func(file, line, format); } - - fprintf(stderr, "%s:%d: %s\n", file, line, message); } HWY_NORETURN void Abort(const char* file, int line, const char* format, ...) { - char message[1024]; - va_list args; - va_start(args, format); - format_message(format, args, message, sizeof(message)); - va_end(args); - - if (AbortFunc func = g_abort_func) { - func(file, line, message); - } else { - fprintf(stderr, "%s:%d: %s\n", file, line, message); + if (AbortFunc func = __atomic_load_n(&g_abort_func, __ATOMIC_SEQ_CST)) { + func(file, line, format); } - abort(); + __builtin_trap(); } } // namespace hwy diff --git a/pkg/highway/src/cpp/targets_darwin.cpp b/pkg/highway/src/cpp/targets_darwin.cpp new file mode 100644 index 000000000..c5a64b50a --- /dev/null +++ b/pkg/highway/src/cpp/targets_darwin.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include + +namespace hwy { + +extern "C" int64_t ghostty_hwy_detect_targets(); + +static int64_t DetectTargets() { + int64_t bits = HWY_SCALAR | HWY_EMU128; + +#if (HWY_ARCH_X86 || HWY_ARCH_ARM) && HWY_HAVE_RUNTIME_DISPATCH + bits |= ghostty_hwy_detect_targets(); +#else + bits |= HWY_ENABLED_BASELINE; +#endif + + if ((bits & HWY_ENABLED_BASELINE) != HWY_ENABLED_BASELINE) { + const uint64_t bits_u = static_cast(bits); + const uint64_t enabled = static_cast(HWY_ENABLED_BASELINE); + HWY_WARN("CPU supports 0x%08x%08x, software requires 0x%08x%08x\n", + static_cast(bits_u >> 32), + static_cast(bits_u & 0xFFFFFFFF), + static_cast(enabled >> 32), + static_cast(enabled & 0xFFFFFFFF)); + } + + return bits; +} + +static int64_t supported_targets_for_test_ = 0; +static int64_t supported_mask_ = LimitsMax(); + +HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets) { + supported_mask_ = static_cast(~disabled_targets); + GetChosenTarget().DeInit(); +} + +HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets) { + supported_targets_for_test_ = targets; + GetChosenTarget().DeInit(); +} + +HWY_DLLEXPORT int64_t SupportedTargets() { + int64_t targets = supported_targets_for_test_; + if (HWY_LIKELY(targets == 0)) { + targets = DetectTargets(); + GetChosenTarget().Update(targets); + } + + targets &= supported_mask_; + return targets == 0 ? HWY_STATIC_TARGET : targets; +} + +HWY_DLLEXPORT ChosenTarget& GetChosenTarget() { + static ChosenTarget chosen_target; + return chosen_target; +} + +} // namespace hwy diff --git a/pkg/highway/src/main.zig b/pkg/highway/src/main.zig new file mode 100644 index 000000000..b31bc4dd5 --- /dev/null +++ b/pkg/highway/src/main.zig @@ -0,0 +1,16 @@ +const builtin = @import("builtin"); + +extern "c" fn hwy_supported_targets() i64; + +pub const Targets = @import("targets.zig").Targets; + +pub fn supported_targets() Targets { + return @bitCast(hwy_supported_targets()); +} + +test { + _ = supported_targets(); + if (builtin.os.tag.isDarwin()) { + _ = @import("runtime_darwin.zig"); + } +} diff --git a/pkg/highway/src/runtime_darwin.zig b/pkg/highway/src/runtime_darwin.zig new file mode 100644 index 000000000..ca86b8cf8 --- /dev/null +++ b/pkg/highway/src/runtime_darwin.zig @@ -0,0 +1,155 @@ +const builtin = @import("builtin"); +const std = @import("std"); +const Target = std.Target; +const HwyTargets = @import("targets.zig").Targets; + +/// Detect Highway targets using Zig's standard library CPU feature detection. +/// +/// The logic is mostly identical to the Highway implementation, but we +/// use Zig's built-in CPU feature detection instead of Highway so that we +/// can strictly control access to Apple headers (and avoid them completely). +pub export fn ghostty_hwy_detect_targets() callconv(.c) i64 { + const native = std.zig.system.resolveTargetQuery(.{}) catch return 0; + const cpu = native.cpu; + + return switch (builtin.cpu.arch) { + .x86_64, .x86 => detectX86(cpu), + .aarch64, .aarch64_be => detectAarch64(cpu), + else => 0, + }; +} + +fn detectX86(cpu: Target.Cpu) i64 { + var t: HwyTargets = .{}; + + if (comptime builtin.cpu.arch == .x86_64) { + t.sse2 = true; + } + + if (comptime builtin.cpu.arch == .x86) { + if (cpu.has(.x86, .sse) and + cpu.has(.x86, .sse2)) + { + t.sse2 = true; + } + } + + if (cpu.has(.x86, .sse3) and + cpu.has(.x86, .ssse3)) + { + t.ssse3 = true; + } + + if (cpu.has(.x86, .sse4_1) and + cpu.has(.x86, .sse4_2) and + cpu.has(.x86, .pclmul) and + cpu.has(.x86, .aes)) + { + t.sse4 = true; + } + + if (cpu.has(.x86, .avx) and + cpu.has(.x86, .avx2) and + cpu.has(.x86, .lzcnt) and + cpu.has(.x86, .bmi) and + cpu.has(.x86, .bmi2) and + cpu.has(.x86, .fma) and + cpu.has(.x86, .f16c)) + { + t.avx2 = true; + } + + if (cpu.has(.x86, .avx512f) and + cpu.has(.x86, .avx512vl) and + cpu.has(.x86, .avx512dq) and + cpu.has(.x86, .avx512bw) and + cpu.has(.x86, .avx512cd)) + { + t.avx3 = true; + } + + if (cpu.has(.x86, .avx512vnni) and + cpu.has(.x86, .vpclmulqdq) and + cpu.has(.x86, .avx512vbmi) and + cpu.has(.x86, .avx512vbmi2) and + cpu.has(.x86, .vaes) and + cpu.has(.x86, .avx512vpopcntdq) and + cpu.has(.x86, .avx512bitalg) and + cpu.has(.x86, .gfni)) + { + t.avx3_dl = true; + } + + if (t.avx3_dl and cpu.has(.x86, .avx512bf16)) { + if (isAMD()) { + t.avx3_zen4 = true; + } + } + + if (cpu.has(.x86, .avx512fp16) and + cpu.has(.x86, .avx512bf16)) + { + t.avx3_spr = true; + } + + if (cpu.has(.x86, .avx10_1_256)) { + if (cpu.has(.x86, .avx10_1_512)) { + t.avx3_spr = true; + t.avx3_dl = true; + t.avx3 = true; + } + + if (cpu.has(.x86, .avx10_2_256)) { + t.avx10_2 = true; + if (cpu.has(.x86, .avx10_2_512)) { + t.avx10_2_512 = true; + } + } + } + + // Darwin lazily saves AVX512 context on first use, so the XCR0 check + // is handled by Zig's feature detection (which hardcodes has_avx512_save + // to true on Darwin, matching LLVM's approach). + + return @bitCast(t); +} + +fn detectAarch64(cpu: Target.Cpu) i64 { + var t: HwyTargets = .{}; + + t.neon_without_aes = true; + + if (cpu.has(.aarch64, .aes)) { + t.neon = true; + + if (cpu.has(.aarch64, .fullfp16) and + cpu.has(.aarch64, .dotprod) and + cpu.has(.aarch64, .bf16)) + { + t.neon_bf16 = true; + } + } + + return @bitCast(t); +} + +/// Check CPUID vendor string for "AuthenticAMD", matching Highway's IsAMD(). +/// Zig doesn't expose the vendor string, so we must use inline assembly. +fn isAMD() bool { + var eax: u32 = undefined; + var ebx: u32 = undefined; + var ecx: u32 = undefined; + var edx: u32 = undefined; + asm volatile ("cpuid" + : [_] "={eax}" (eax), + [_] "={ebx}" (ebx), + [_] "={ecx}" (ecx), + [_] "={edx}" (edx), + : [_] "{eax}" (0), + ); + + // "Auth" "enti" "cAMD" + return ebx == 0x68747541 and + ecx == 0x444d4163 and + edx == 0x69746e65; +} diff --git a/pkg/highway/src/targets.zig b/pkg/highway/src/targets.zig new file mode 100644 index 000000000..5ae77bcad --- /dev/null +++ b/pkg/highway/src/targets.zig @@ -0,0 +1,109 @@ +const assert = @import("std").debug.assert; + +pub const Targets = packed struct(i64) { + // x86_64 + _reserved_0_2: u3 = 0, + avx10_2_512: bool = false, + avx3_spr: bool = false, + avx10_2: bool = false, + avx3_zen4: bool = false, + avx3_dl: bool = false, + avx3: bool = false, + avx2: bool = false, + _reserved_10: u1 = 0, + sse4: bool = false, + ssse3: bool = false, + _reserved_13: u1 = 0, + sse2: bool = false, + _reserved_15_17: u3 = 0, + + // aarch64 + sve2_128: bool = false, + sve_256: bool = false, + _reserved_20_22: u3 = 0, + sve2: bool = false, + sve: bool = false, + _reserved_25: u1 = 0, + neon_bf16: bool = false, + _reserved_27: u1 = 0, + neon: bool = false, + neon_without_aes: bool = false, + _reserved_30_36: u7 = 0, + + // risc-v + rvv: bool = false, + _reserved_38_39: u2 = 0, + + // LoongArch + lasx: bool = false, + lsx: bool = false, + _reserved_42_46: u5 = 0, + + // IBM Power + ppc10: bool = false, + ppc9: bool = false, + ppc8: bool = false, + z15: bool = false, + z14: bool = false, + _reserved_52_57: u6 = 0, + + // WebAssembly + wasm_emu256: bool = false, + wasm: bool = false, + _reserved_60: u1 = 0, + + // Emulation + emu128: bool = false, + scalar: bool = false, + _reserved_63: u1 = 0, + + fn bitPos(comptime field_name: []const u8) comptime_int { + return @bitOffsetOf(Targets, field_name); + } + + // Verify at comptime that each flag field matches its Highway bit constant. + comptime { + // x86 + assert(bitPos("avx10_2_512") == 3); + assert(bitPos("avx3_spr") == 4); + assert(bitPos("avx10_2") == 5); + assert(bitPos("avx3_zen4") == 6); + assert(bitPos("avx3_dl") == 7); + assert(bitPos("avx3") == 8); + assert(bitPos("avx2") == 9); + assert(bitPos("sse4") == 11); + assert(bitPos("ssse3") == 12); + assert(bitPos("sse2") == 14); + + // aarch64 + assert(bitPos("sve2_128") == 18); + assert(bitPos("sve_256") == 19); + assert(bitPos("sve2") == 23); + assert(bitPos("sve") == 24); + assert(bitPos("neon_bf16") == 26); + assert(bitPos("neon") == 28); + assert(bitPos("neon_without_aes") == 29); + + // risc-v + assert(bitPos("rvv") == 37); + + // LoongArch + assert(bitPos("lasx") == 40); + assert(bitPos("lsx") == 41); + + // IBM Power + assert(bitPos("ppc10") == 47); + assert(bitPos("ppc9") == 48); + assert(bitPos("ppc8") == 49); + assert(bitPos("z15") == 50); + assert(bitPos("z14") == 51); + + // WebAssembly + assert(bitPos("wasm_emu256") == 58); + assert(bitPos("wasm") == 59); + + // Emulation + assert(bitPos("emu128") == 61); + assert(bitPos("scalar") == 62); + } +};