core/math/rand: Disable LLVM vectorization of xoshiro on Intel

LLVM tries really hard to vectorize this when it is a terrible idea,
when `-microarch:native` is specified on modern processors.  Not doing
so is a 2.5->3x performance gain.
This commit is contained in:
Yawning Angel
2026-03-18 05:41:50 +09:00
parent 62d78d61fa
commit a171746a15

View File

@@ -3,6 +3,18 @@ package rand
import "base:intrinsics"
import "base:runtime"
when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
// LLVM thinks that using SIMD for read_u64 is good,
// when it causes a ~3x performance regression. As
// far as I can tell, this behavior is limited to
// Intel.
@(private = "file")
TARGET_FEATURES :: "-sse,-avx,-avx2"
} else {
@(private = "file")
TARGET_FEATURES :: ""
}
/*
The state for a xoshiro256** pseudorandom generator.
*/
@@ -10,8 +22,9 @@ Xoshiro256_Random_State :: struct {
s: [4]u64,
}
@(enable_target_feature = TARGET_FEATURES)
xoshiro256_random_generator_proc :: proc(data: rawptr, mode: runtime.Random_Generator_Mode, p: []byte) {
@(require_results)
@(require_results, enable_target_feature = TARGET_FEATURES)
read_u64 :: proc "contextless" (r: ^Xoshiro256_Random_State) -> u64 {
// xoshiro256** output function and state transition
@@ -27,7 +40,7 @@ xoshiro256_random_generator_proc :: proc(data: rawptr, mode: runtime.Random_Gene
return result
rotate_left64 :: proc "contextless" (x: u64, k: int) -> u64 {
rotate_left64 :: #force_inline proc "contextless" (x: u64, k: int) -> u64 {
n :: 64
s := uint(k) & (n-1)
return x << s | x >> (n-s)