mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-23 06:45:19 +00:00
Simplify and make simd_util cross-platform
This new algorithm uses a Scalar->Vector->Scalar iteration loop which requires no masking off of any incomplete data chunks. Also, the width was reduced to 32 bytes instead of 64, as I found this to be about as fast as the previous 64-byte x86 version.
This commit is contained in:
@@ -309,14 +309,8 @@ index_byte :: proc(s: []byte, c: byte) -> int {
|
||||
// NOTE(Feoramund): On my Alder Lake CPU, I have only witnessed a
|
||||
// significant speedup when compiling in either Size or Speed mode.
|
||||
// The SIMD version is usually 2-3x slower without optimizations on.
|
||||
when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") {
|
||||
// SIMD's benefits are noticeable only past a certain threshold of data.
|
||||
// For small data, use the plain old algorithm.
|
||||
if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE {
|
||||
return simd_util.index_byte(s, c)
|
||||
} else {
|
||||
return _index_byte(s, c)
|
||||
}
|
||||
when ODIN_OPTIMIZATION_MODE > .Minimal {
|
||||
return #force_inline simd_util.index_byte(s, c)
|
||||
} else {
|
||||
return _index_byte(s, c)
|
||||
}
|
||||
@@ -333,12 +327,8 @@ last_index_byte :: proc(s: []byte, c: byte) -> int {
|
||||
return -1
|
||||
}
|
||||
|
||||
when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") {
|
||||
if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE {
|
||||
return simd_util.last_index_byte(s, c)
|
||||
} else {
|
||||
return _last_index_byte(s, c)
|
||||
}
|
||||
when ODIN_OPTIMIZATION_MODE > .Minimal {
|
||||
return #force_inline simd_util.last_index_byte(s, c)
|
||||
} else {
|
||||
return _last_index_byte(s, c)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user