Files
ghostty/pkg/highway/src/runtime_detect.zig
2026-04-23 15:28:59 -07:00

258 lines
6.3 KiB
Zig

const builtin = @import("builtin");
const std = @import("std");
const Target = std.Target;
const HwyTargets = @import("targets.zig").Targets;
/// Detect Highway targets using Zig's standard library CPU feature detection.
///
/// The logic is mostly identical to the Highway implementation, but we
/// use Zig's built-in CPU feature detection instead of Highway so that we
/// can strictly control access to Apple headers (and avoid them completely).
pub export fn ghostty_hwy_detect_targets() callconv(.c) i64 {
const native = std.zig.system.resolveTargetQuery(.{}) catch return 0;
const cpu = native.cpu;
return switch (builtin.cpu.arch) {
.x86_64, .x86 => detectX86(cpu),
.aarch64, .aarch64_be => detectAarch64(cpu),
.powerpc, .powerpc64, .powerpc64le => detectPpc(cpu),
.s390x => detectS390x(cpu),
.riscv32, .riscv64 => detectRiscv(cpu),
.loongarch32, .loongarch64 => detectLoongArch(cpu),
else => 0,
};
}
fn detectX86(cpu: Target.Cpu) i64 {
var t: HwyTargets = .{};
if (comptime builtin.cpu.arch == .x86_64) {
t.sse2 = true;
}
if (comptime builtin.cpu.arch == .x86) {
if (cpu.has(.x86, .sse) and
cpu.has(.x86, .sse2))
{
t.sse2 = true;
}
}
if (cpu.has(.x86, .sse3) and
cpu.has(.x86, .ssse3))
{
t.ssse3 = true;
}
if (cpu.has(.x86, .sse4_1) and
cpu.has(.x86, .sse4_2) and
cpu.has(.x86, .pclmul) and
cpu.has(.x86, .aes))
{
t.sse4 = true;
}
if (cpu.has(.x86, .avx) and
cpu.has(.x86, .avx2) and
cpu.has(.x86, .lzcnt) and
cpu.has(.x86, .bmi) and
cpu.has(.x86, .bmi2) and
cpu.has(.x86, .fma) and
cpu.has(.x86, .f16c))
{
t.avx2 = true;
}
if (cpu.has(.x86, .avx512f) and
cpu.has(.x86, .avx512vl) and
cpu.has(.x86, .avx512dq) and
cpu.has(.x86, .avx512bw) and
cpu.has(.x86, .avx512cd))
{
t.avx3 = true;
}
if (cpu.has(.x86, .avx512vnni) and
cpu.has(.x86, .vpclmulqdq) and
cpu.has(.x86, .avx512vbmi) and
cpu.has(.x86, .avx512vbmi2) and
cpu.has(.x86, .vaes) and
cpu.has(.x86, .avx512vpopcntdq) and
cpu.has(.x86, .avx512bitalg) and
cpu.has(.x86, .gfni))
{
t.avx3_dl = true;
}
if (t.avx3_dl and cpu.has(.x86, .avx512bf16)) {
if (isAMD()) {
t.avx3_zen4 = true;
}
}
if (cpu.has(.x86, .avx512fp16) and
cpu.has(.x86, .avx512bf16))
{
t.avx3_spr = true;
}
if (cpu.has(.x86, .avx10_1_256)) {
if (cpu.has(.x86, .avx10_1_512)) {
t.avx3_spr = true;
t.avx3_dl = true;
t.avx3 = true;
}
if (cpu.has(.x86, .avx10_2_256)) {
t.avx10_2 = true;
if (cpu.has(.x86, .avx10_2_512)) {
t.avx10_2_512 = true;
}
}
}
// On Darwin the kernel lazily saves AVX512 context on first use, so no
// explicit XCR0 check is required. On Linux, Zig's feature detection
// reads the kernel-provided auxiliary vector (getauxval) which already
// reflects OS-level XSAVE support.
return @bitCast(t);
}
fn detectAarch64(cpu: Target.Cpu) i64 {
var t: HwyTargets = .{};
t.neon_without_aes = true;
if (cpu.has(.aarch64, .aes)) {
t.neon = true;
if (cpu.has(.aarch64, .fullfp16) and
cpu.has(.aarch64, .dotprod) and
cpu.has(.aarch64, .bf16))
{
t.neon_bf16 = true;
}
}
if (cpu.has(.aarch64, .sve)) {
const vec_bytes = sveVectorBytes();
if (vec_bytes >= 32) {
t.sve = true;
if (vec_bytes == 32) {
t.sve_256 = true;
}
}
if (cpu.has(.aarch64, .sve2) and cpu.has(.aarch64, .sve2_aes)) {
if (vec_bytes >= 32) {
t.sve2 = true;
} else if (vec_bytes == 16) {
t.sve2_128 = true;
}
}
}
return @bitCast(t);
}
fn sveVectorBytes() usize {
if (comptime builtin.os.tag == .linux) {
// PR_SVE_GET_VL returns the SVE vector length in the lower 16 bits.
const PR_SVE_GET_VL = 51;
const ret = std.os.linux.prctl(PR_SVE_GET_VL, 0, 0, 0, 0);
const signed: isize = @bitCast(ret);
if (signed >= 0) {
return ret & 0xFFFF;
}
}
// Non-Linux or prctl failed: assume 128-bit (NEON-width, conservative).
return 16;
}
fn detectPpc(cpu: Target.Cpu) i64 {
var t: HwyTargets = .{};
if (cpu.has(.powerpc, .altivec) and
cpu.has(.powerpc, .vsx) and
cpu.has(.powerpc, .power8_vector) and
cpu.has(.powerpc, .crypto))
{
t.ppc8 = true;
if (cpu.has(.powerpc, .power9_vector)) {
t.ppc9 = true;
if (cpu.has(.powerpc, .power10_vector) and
cpu.has(.powerpc, .mma))
{
t.ppc10 = true;
}
}
}
return @bitCast(t);
}
fn detectS390x(cpu: Target.Cpu) i64 {
var t: HwyTargets = .{};
if (cpu.has(.s390x, .vector)) {
if (cpu.has(.s390x, .vector_enhancements_1)) {
t.z14 = true;
if (cpu.has(.s390x, .vector_enhancements_2)) {
t.z15 = true;
}
}
}
return @bitCast(t);
}
fn detectRiscv(cpu: Target.Cpu) i64 {
var t: HwyTargets = .{};
if (cpu.has(.riscv, .v)) {
t.rvv = true;
}
return @bitCast(t);
}
fn detectLoongArch(cpu: Target.Cpu) i64 {
var t: HwyTargets = .{};
if (cpu.has(.loongarch, .lsx)) {
t.lsx = true;
if (cpu.has(.loongarch, .lasx)) {
t.lasx = true;
}
}
return @bitCast(t);
}
/// Check CPUID vendor string for "AuthenticAMD", matching Highway's IsAMD().
/// Zig doesn't expose the vendor string, so we must use inline assembly.
fn isAMD() bool {
var eax: u32 = undefined;
var ebx: u32 = undefined;
var ecx: u32 = undefined;
var edx: u32 = undefined;
asm volatile ("cpuid"
: [_] "={eax}" (eax),
[_] "={ebx}" (ebx),
[_] "={ecx}" (ecx),
[_] "={edx}" (edx),
: [_] "{eax}" (0),
);
// "Auth" "enti" "cAMD"
return ebx == 0x68747541 and
ecx == 0x444d4163 and
edx == 0x69746e65;
}