diff --git a/pkg/highway/build.zig b/pkg/highway/build.zig index a7b409c96..e35349730 100644 --- a/pkg/highway/build.zig +++ b/pkg/highway/build.zig @@ -15,7 +15,7 @@ pub fn build(b: *std.Build) !void { const lib = b.addLibrary(.{ .name = "highway", .root_module = b.createModule(.{ - .root_source_file = b.path("src/runtime_darwin.zig"), + .root_source_file = b.path("src/runtime_detect.zig"), .target = target, .optimize = optimize, }), @@ -89,25 +89,17 @@ pub fn build(b: *std.Build) !void { }); } - lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"src/cpp/bridge.cpp"} }); - if (target.result.os.tag.isDarwin()) { - lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"src/cpp/targets_darwin.cpp"} }); - } + lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{ + "src/cpp/bridge.cpp", + "src/cpp/targets.cpp", + } }); if (upstream_) |upstream| { lib.addCSourceFiles(.{ .root = upstream.path(""), .flags = flags.items, - .files = if (target.result.os.tag.isDarwin()) &.{ - // Darwin uses a local targets_darwin.cpp shim so the package doesn't - // need Apple SDK headers for target detection. + .files = &.{ "hwy/per_target.cc", - } else &.{ - // These provide the runtime target selection used by - // HWY_DYNAMIC_DISPATCH. The benchmark, timer, print, and - // aligned allocator support files are unused by Ghostty. - "hwy/per_target.cc", - "hwy/targets.cc", }, }); lib.installHeadersDirectory( diff --git a/pkg/highway/src/cpp/targets_darwin.cpp b/pkg/highway/src/cpp/targets.cpp similarity index 100% rename from pkg/highway/src/cpp/targets_darwin.cpp rename to pkg/highway/src/cpp/targets.cpp diff --git a/pkg/highway/src/main.zig b/pkg/highway/src/main.zig index b31bc4dd5..614fd14af 100644 --- a/pkg/highway/src/main.zig +++ b/pkg/highway/src/main.zig @@ -1,5 +1,3 @@ -const builtin = @import("builtin"); - extern "c" fn hwy_supported_targets() i64; pub const Targets = @import("targets.zig").Targets; @@ -10,7 +8,5 @@ pub fn supported_targets() Targets { test { _ = supported_targets(); - if (builtin.os.tag.isDarwin()) { - _ = @import("runtime_darwin.zig"); - } + _ = @import("runtime_detect.zig"); } diff --git a/pkg/highway/src/runtime_darwin.zig b/pkg/highway/src/runtime_detect.zig similarity index 58% rename from pkg/highway/src/runtime_darwin.zig rename to pkg/highway/src/runtime_detect.zig index ca86b8cf8..25554a44d 100644 --- a/pkg/highway/src/runtime_darwin.zig +++ b/pkg/highway/src/runtime_detect.zig @@ -15,6 +15,10 @@ pub export fn ghostty_hwy_detect_targets() callconv(.c) i64 { return switch (builtin.cpu.arch) { .x86_64, .x86 => detectX86(cpu), .aarch64, .aarch64_be => detectAarch64(cpu), + .powerpc, .powerpc64, .powerpc64le => detectPpc(cpu), + .s390x => detectS390x(cpu), + .riscv32, .riscv64 => detectRiscv(cpu), + .loongarch32, .loongarch64 => detectLoongArch(cpu), else => 0, }; } @@ -107,9 +111,10 @@ fn detectX86(cpu: Target.Cpu) i64 { } } - // Darwin lazily saves AVX512 context on first use, so the XCR0 check - // is handled by Zig's feature detection (which hardcodes has_avx512_save - // to true on Darwin, matching LLVM's approach). + // On Darwin the kernel lazily saves AVX512 context on first use, so no + // explicit XCR0 check is required. On Linux, Zig's feature detection + // reads the kernel-provided auxiliary vector (getauxval) which already + // reflects OS-level XSAVE support. return @bitCast(t); } @@ -130,6 +135,103 @@ fn detectAarch64(cpu: Target.Cpu) i64 { } } + if (cpu.has(.aarch64, .sve)) { + const vec_bytes = sveVectorBytes(); + + if (vec_bytes >= 32) { + t.sve = true; + if (vec_bytes == 32) { + t.sve_256 = true; + } + } + + if (cpu.has(.aarch64, .sve2) and cpu.has(.aarch64, .sve2_aes)) { + if (vec_bytes >= 32) { + t.sve2 = true; + } else if (vec_bytes == 16) { + t.sve2_128 = true; + } + } + } + + return @bitCast(t); +} + +fn sveVectorBytes() usize { + if (comptime builtin.os.tag == .linux) { + // PR_SVE_GET_VL returns the SVE vector length in the lower 16 bits. + const PR_SVE_GET_VL = 51; + const ret = std.os.linux.prctl(PR_SVE_GET_VL, 0, 0, 0, 0); + const signed: isize = @bitCast(ret); + if (signed >= 0) { + return ret & 0xFFFF; + } + } + // Non-Linux or prctl failed: assume 128-bit (NEON-width, conservative). + return 16; +} + +fn detectPpc(cpu: Target.Cpu) i64 { + var t: HwyTargets = .{}; + + if (cpu.has(.powerpc, .altivec) and + cpu.has(.powerpc, .vsx) and + cpu.has(.powerpc, .power8_vector) and + cpu.has(.powerpc, .crypto)) + { + t.ppc8 = true; + + if (cpu.has(.powerpc, .power9_vector)) { + t.ppc9 = true; + + if (cpu.has(.powerpc, .power10_vector) and + cpu.has(.powerpc, .mma)) + { + t.ppc10 = true; + } + } + } + + return @bitCast(t); +} + +fn detectS390x(cpu: Target.Cpu) i64 { + var t: HwyTargets = .{}; + + if (cpu.has(.s390x, .vector)) { + if (cpu.has(.s390x, .vector_enhancements_1)) { + t.z14 = true; + + if (cpu.has(.s390x, .vector_enhancements_2)) { + t.z15 = true; + } + } + } + + return @bitCast(t); +} + +fn detectRiscv(cpu: Target.Cpu) i64 { + var t: HwyTargets = .{}; + + if (cpu.has(.riscv, .v)) { + t.rvv = true; + } + + return @bitCast(t); +} + +fn detectLoongArch(cpu: Target.Cpu) i64 { + var t: HwyTargets = .{}; + + if (cpu.has(.loongarch, .lsx)) { + t.lsx = true; + + if (cpu.has(.loongarch, .lasx)) { + t.lasx = true; + } + } + return @bitCast(t); }