From 20c5033b38e108b1e127cc6b8be61cce043cba9e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 27 May 2022 17:07:48 +0100 Subject: [PATCH] Add pack and unpack --- core/simd/x86/sse2.odin | 95 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/core/simd/x86/sse2.odin b/core/simd/x86/sse2.odin index bba842820..f52981639 100644 --- a/core/simd/x86/sse2.odin +++ b/core/simd/x86/sse2.odin @@ -364,6 +364,101 @@ _mm_move_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i { +_mm_packs_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)packsswb(transmute(i16x8)a, transmute(i16x8)b) +} +_mm_packs_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)packssdw(transmute(i32x4)a, transmute(i32x4)b) +} +_mm_packus_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)packuswb(transmute(i16x8)a, transmute(i16x8)b) +} +_mm_extract_epi16 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> i32 { + return i32(simd.extract(transmute(u16x8)a, IMM8)) +} +_mm_insert_epi16 :: #force_inline proc "c" (a: __m128i, i: i32, $IMM8: u32) -> __m128i { + return i32(simd.replace(transmute(u16x8)a, IMM8, i16(i))) +} +_mm_movemask_epi8 :: #force_inline proc "c" (a: __m128i) -> i32 { + return pmovmskb(transmute(i8x16)a) +} +_mm_shuffle_epi32 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i { + v := transmute(i32x4)a + return transmute(__m128i)simd.shuffle( + v, + v, + IMM8 & 0b11, + (IMM8 >> 2) & 0b11, + (IMM8 >> 4) & 0b11, + (IMM8 >> 6) & 0b11, + ) +} +_mm_shufflehi_epi16 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i { + v := transmute(i16x8)a + return transmute(__m128i)simd.shuffle( + v, + v, + 0, + 1, + 2, + 3, + (IMM8 & 0b11) + 4, + ((IMM8 >> 2) & 0b11) + 4, + ((IMM8 >> 4) & 0b11) + 4, + ((IMM8 >> 6) & 0b11) + 4, + ) +} +_mm_shufflelo_epi16 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i { + v := transmute(i16x8)a + return transmute(__m128i)simd.shuffle( + v, + v, + IMM8 & 0b11, + (IMM8 >> 2) & 0b11, + (IMM8 >> 4) & 0b11, + (IMM8 >> 6) & 0b11, + 4, + 5, + 6, + 7, + ) +} +_mm_unpackhi_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)simd.shuffle( + transmute(i8x16)a, + transmute(i8x16)b, + 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, + ) +} +_mm_unpackhi_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)simd.shuffle(transmute(i16x8)a, transmute(i16x8)b, 4, 12, 5, 13, 6, 14, 7, 15) +} +_mm_unpackhi_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)simd.shuffle(transmute(i32x4)a, transmute(i32x4)b, 2, 6, 3, 7) +} +_mm_unpackhi_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)simd.shuffle(transmute(i64x2)a, transmute(i64x2)b, 1, 3) +} +_mm_unpacklo_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)simd.shuffle( + transmute(i8x16)a, + transmute(i8x16)b, + 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + ) +} +_mm_unpacklo_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)simd.shuffle(transmute(i16x8)a, transmute(i16x8)b, 0, 8, 1, 9, 2, 10, 3, 11) +} +_mm_unpacklo_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)simd.shuffle(transmute(i32x4)a, transmute(i32x4)b, 0, 4, 1, 5) +} +_mm_unpacklo_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return transmute(__m128i)simd.shuffle(transmute(i64x2)a, transmute(i64x2)b, 0, 2) +} + + + + _mm_castpd_ps :: #force_inline proc "c" (a: __m128d) -> __m128 { return transmute(__m128)a