Add pack and unpack

This commit is contained in:
gingerBill
2022-05-27 17:07:48 +01:00
parent 20fe6d102a
commit 20c5033b38

View File

@@ -364,6 +364,101 @@ _mm_move_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i {
_mm_packs_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)packsswb(transmute(i16x8)a, transmute(i16x8)b)
}
_mm_packs_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)packssdw(transmute(i32x4)a, transmute(i32x4)b)
}
_mm_packus_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)packuswb(transmute(i16x8)a, transmute(i16x8)b)
}
_mm_extract_epi16 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> i32 {
return i32(simd.extract(transmute(u16x8)a, IMM8))
}
_mm_insert_epi16 :: #force_inline proc "c" (a: __m128i, i: i32, $IMM8: u32) -> __m128i {
return i32(simd.replace(transmute(u16x8)a, IMM8, i16(i)))
}
_mm_movemask_epi8 :: #force_inline proc "c" (a: __m128i) -> i32 {
return pmovmskb(transmute(i8x16)a)
}
_mm_shuffle_epi32 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i {
v := transmute(i32x4)a
return transmute(__m128i)simd.shuffle(
v,
v,
IMM8 & 0b11,
(IMM8 >> 2) & 0b11,
(IMM8 >> 4) & 0b11,
(IMM8 >> 6) & 0b11,
)
}
_mm_shufflehi_epi16 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i {
v := transmute(i16x8)a
return transmute(__m128i)simd.shuffle(
v,
v,
0,
1,
2,
3,
(IMM8 & 0b11) + 4,
((IMM8 >> 2) & 0b11) + 4,
((IMM8 >> 4) & 0b11) + 4,
((IMM8 >> 6) & 0b11) + 4,
)
}
_mm_shufflelo_epi16 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i {
v := transmute(i16x8)a
return transmute(__m128i)simd.shuffle(
v,
v,
IMM8 & 0b11,
(IMM8 >> 2) & 0b11,
(IMM8 >> 4) & 0b11,
(IMM8 >> 6) & 0b11,
4,
5,
6,
7,
)
}
_mm_unpackhi_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)simd.shuffle(
transmute(i8x16)a,
transmute(i8x16)b,
8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
)
}
_mm_unpackhi_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)simd.shuffle(transmute(i16x8)a, transmute(i16x8)b, 4, 12, 5, 13, 6, 14, 7, 15)
}
_mm_unpackhi_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)simd.shuffle(transmute(i32x4)a, transmute(i32x4)b, 2, 6, 3, 7)
}
_mm_unpackhi_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)simd.shuffle(transmute(i64x2)a, transmute(i64x2)b, 1, 3)
}
_mm_unpacklo_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)simd.shuffle(
transmute(i8x16)a,
transmute(i8x16)b,
0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23,
)
}
_mm_unpacklo_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)simd.shuffle(transmute(i16x8)a, transmute(i16x8)b, 0, 8, 1, 9, 2, 10, 3, 11)
}
_mm_unpacklo_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)simd.shuffle(transmute(i32x4)a, transmute(i32x4)b, 0, 4, 1, 5)
}
_mm_unpacklo_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
return transmute(__m128i)simd.shuffle(transmute(i64x2)a, transmute(i64x2)b, 0, 2)
}
_mm_castpd_ps :: #force_inline proc "c" (a: __m128d) -> __m128 {
return transmute(__m128)a