mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-19 13:00:28 +00:00
core/math/rand: Disable LLVM vectorization of xoshiro on Intel
LLVM tries really hard to vectorize this when it is a terrible idea, when `-microarch:native` is specified on modern processors. Not doing so is a 2.5->3x performance gain.
This commit is contained in:
@@ -3,6 +3,18 @@ package rand
|
||||
import "base:intrinsics"
|
||||
import "base:runtime"
|
||||
|
||||
when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
// LLVM thinks that using SIMD for read_u64 is good,
|
||||
// when it causes a ~3x performance regression. As
|
||||
// far as I can tell, this behavior is limited to
|
||||
// Intel.
|
||||
@(private = "file")
|
||||
TARGET_FEATURES :: "-sse,-avx,-avx2"
|
||||
} else {
|
||||
@(private = "file")
|
||||
TARGET_FEATURES :: ""
|
||||
}
|
||||
|
||||
/*
|
||||
The state for a xoshiro256** pseudorandom generator.
|
||||
*/
|
||||
@@ -10,8 +22,9 @@ Xoshiro256_Random_State :: struct {
|
||||
s: [4]u64,
|
||||
}
|
||||
|
||||
@(enable_target_feature = TARGET_FEATURES)
|
||||
xoshiro256_random_generator_proc :: proc(data: rawptr, mode: runtime.Random_Generator_Mode, p: []byte) {
|
||||
@(require_results)
|
||||
@(require_results, enable_target_feature = TARGET_FEATURES)
|
||||
read_u64 :: proc "contextless" (r: ^Xoshiro256_Random_State) -> u64 {
|
||||
// xoshiro256** output function and state transition
|
||||
|
||||
@@ -27,7 +40,7 @@ xoshiro256_random_generator_proc :: proc(data: rawptr, mode: runtime.Random_Gene
|
||||
|
||||
return result
|
||||
|
||||
rotate_left64 :: proc "contextless" (x: u64, k: int) -> u64 {
|
||||
rotate_left64 :: #force_inline proc "contextless" (x: u64, k: int) -> u64 {
|
||||
n :: 64
|
||||
s := uint(k) & (n-1)
|
||||
return x << s | x >> (n-s)
|
||||
|
||||
Reference in New Issue
Block a user