mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-21 13:55:19 +00:00
core/simd/x86: Fix some intrinsics
- _mm_slli_si128 produced totally incorrect output - _mm_storeu_si128 refered to a LLVM intrinsic that is missing
This commit is contained in:
@@ -144,19 +144,26 @@ _mm_subs_epu16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
_mm_slli_si128_impl :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i {
|
||||
shift :: IMM8 & 0xff
|
||||
|
||||
// This needs to emit behavior identical to PSLLDQ which is as follows:
|
||||
//
|
||||
// TEMP := COUNT
|
||||
// IF (TEMP > 15) THEN TEMP := 16; FI
|
||||
// DEST := DEST << (TEMP * 8)
|
||||
// DEST[MAXVL-1:128] (Unmodified)
|
||||
|
||||
return transmute(__m128i)simd.shuffle(
|
||||
transmute(i8x16)a,
|
||||
i8x16(0),
|
||||
0 when shift > 15 else (16 - shift + 0),
|
||||
1 when shift > 15 else (16 - shift + 1),
|
||||
2 when shift > 15 else (16 - shift + 2),
|
||||
3 when shift > 15 else (16 - shift + 3),
|
||||
4 when shift > 15 else (16 - shift + 4),
|
||||
5 when shift > 15 else (16 - shift + 5),
|
||||
6 when shift > 15 else (16 - shift + 6),
|
||||
7 when shift > 15 else (16 - shift + 7),
|
||||
8 when shift > 15 else (16 - shift + 8),
|
||||
9 when shift > 15 else (16 - shift + 9),
|
||||
transmute(i8x16)a,
|
||||
0 when shift > 15 else (16 - shift + 0),
|
||||
1 when shift > 15 else (16 - shift + 1),
|
||||
2 when shift > 15 else (16 - shift + 2),
|
||||
3 when shift > 15 else (16 - shift + 3),
|
||||
4 when shift > 15 else (16 - shift + 4),
|
||||
5 when shift > 15 else (16 - shift + 5),
|
||||
6 when shift > 15 else (16 - shift + 6),
|
||||
7 when shift > 15 else (16 - shift + 7),
|
||||
8 when shift > 15 else (16 - shift + 8),
|
||||
9 when shift > 15 else (16 - shift + 9),
|
||||
10 when shift > 15 else (16 - shift + 10),
|
||||
11 when shift > 15 else (16 - shift + 11),
|
||||
12 when shift > 15 else (16 - shift + 12),
|
||||
@@ -435,7 +442,7 @@ _mm_store_si128 :: #force_inline proc "c" (mem_addr: ^__m128i, a: __m128i) {
|
||||
}
|
||||
@(enable_target_feature="sse2")
|
||||
_mm_storeu_si128 :: #force_inline proc "c" (mem_addr: ^__m128i, a: __m128i) {
|
||||
storeudq(mem_addr, a)
|
||||
intrinsics.unaligned_store(mem_addr, a)
|
||||
}
|
||||
@(enable_target_feature="sse2")
|
||||
_mm_storel_epi64 :: #force_inline proc "c" (mem_addr: ^__m128i, a: __m128i) {
|
||||
@@ -1178,8 +1185,6 @@ foreign _ {
|
||||
cvttsd2si :: proc(a: __m128d) -> i32 ---
|
||||
@(link_name="llvm.x86.sse2.cvttps2dq")
|
||||
cvttps2dq :: proc(a: __m128) -> i32x4 ---
|
||||
@(link_name="llvm.x86.sse2.storeu.dq")
|
||||
storeudq :: proc(mem_addr: rawptr, a: __m128i) ---
|
||||
@(link_name="llvm.x86.sse2.storeu.pd")
|
||||
storeupd :: proc(mem_addr: rawptr, a: __m128d) ---
|
||||
|
||||
|
||||
Reference in New Issue
Block a user