mirror of
https://github.com/ghostty-org/ghostty.git
synced 2026-05-26 23:08:25 +00:00
The previous runtime_detect.zig called std.zig.system.resolveTargetQuery which pulled in the entire Zig target/CPU model table infrastructure for every architecture (~4,000 symbols, ~175 KB of data tables, ~130 KB of code). This bloated the binary by ~500 KB and shifted code layout enough to cause a measurable icache/branch-predictor regression in unrelated hot paths like the terminal parser (~20% more cycles for identical instruction counts). Replace with minimal, direct CPU feature detection per architecture: CPUID + XGETBV inline assembly on x86, sysctlbyname on Darwin AArch64, and getauxval/prctl via std.os.linux (direct syscalls, no libc) on Linux for AArch64, PPC, S390x, RISC-V, and LoongArch. Split into per-architecture files under src/detect/ for maintainability.
66 lines
1.8 KiB
Zig
66 lines
1.8 KiB
Zig
const HwyTargets = @import("../targets.zig").Targets;
|
|
const linux = @import("linux.zig");
|
|
|
|
pub fn detect(t: *HwyTargets) i64 {
|
|
// Linux exposes AArch64 features via getauxval(AT_HWCAP / AT_HWCAP2).
|
|
const AT_HWCAP: usize = 16;
|
|
const AT_HWCAP2: usize = 26;
|
|
|
|
const hwcap = linux.getauxval(AT_HWCAP);
|
|
const hwcap2 = linux.getauxval(AT_HWCAP2);
|
|
|
|
// Bit positions from Linux UAPI asm/hwcap.h
|
|
const HWCAP_AES: usize = 1 << 3;
|
|
const HWCAP_FPHP: usize = 1 << 9; // FEAT_FP16
|
|
const HWCAP_ASIMDDP: usize = 1 << 20; // DotProd
|
|
const HWCAP_SVE: usize = 1 << 22;
|
|
|
|
const HWCAP2_BF16: usize = 1 << 14;
|
|
const HWCAP2_SVE2: usize = 1 << 1;
|
|
const HWCAP2_SVEAES: usize = 1 << 2;
|
|
|
|
if (hwcap & HWCAP_AES != 0) {
|
|
t.neon = true;
|
|
|
|
if (hwcap & HWCAP_FPHP != 0 and
|
|
hwcap & HWCAP_ASIMDDP != 0 and
|
|
hwcap2 & HWCAP2_BF16 != 0)
|
|
{
|
|
t.neon_bf16 = true;
|
|
}
|
|
}
|
|
|
|
if (hwcap & HWCAP_SVE != 0) {
|
|
const vec_bytes = sveVectorBytes();
|
|
|
|
if (vec_bytes >= 32) {
|
|
t.sve = true;
|
|
if (vec_bytes == 32) {
|
|
t.sve_256 = true;
|
|
}
|
|
}
|
|
|
|
if (hwcap2 & HWCAP2_SVE2 != 0 and hwcap2 & HWCAP2_SVEAES != 0) {
|
|
if (vec_bytes >= 32) {
|
|
t.sve2 = true;
|
|
} else if (vec_bytes == 16) {
|
|
t.sve2_128 = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return @bitCast(t.*);
|
|
}
|
|
|
|
fn sveVectorBytes() usize {
|
|
// PR_SVE_GET_VL returns the SVE vector length in the lower 16 bits.
|
|
const PR_SVE_GET_VL: i32 = 51;
|
|
const ret = linux.prctl(PR_SVE_GET_VL, 0, 0, 0, 0);
|
|
const signed: isize = @bitCast(ret);
|
|
if (signed >= 0) {
|
|
return ret & 0xFFFF;
|
|
}
|
|
// prctl failed: assume 128-bit (NEON-width, conservative).
|
|
return 16;
|
|
}
|