mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-25 07:04:58 -07:00
Add load and stores and sets
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
//+build i386, amd64
|
||||
package simd_x86
|
||||
|
||||
import "core:intrinsics"
|
||||
import "core:simd"
|
||||
|
||||
_mm_pause :: #force_inline proc "c" () {
|
||||
@@ -287,6 +288,80 @@ _mm_cvtsi128_si32 :: #force_inline proc "c" (a: __m128i) -> i32 {
|
||||
|
||||
|
||||
|
||||
_mm_set_epi64x :: #force_inline proc "c" (e1, e0: i64) -> __m128i {
|
||||
return transmute(__m128i)i64x2{e0, e1}
|
||||
}
|
||||
_mm_set_epi32 :: #force_inline proc "c" (e3, e2, e1, e0: i32) -> __m128i {
|
||||
return transmute(__m128i)i32x4{e0, e1, e2, e3}
|
||||
}
|
||||
_mm_set_epi16 :: #force_inline proc "c" (e7, e6, e5, e4, e3, e2, e1, e0: i16) -> __m128i {
|
||||
return transmute(__m128i)i16x8{e0, e1, e2, e3, e4, e5, e6, e7}
|
||||
}
|
||||
_mm_set_epi8 :: #force_inline proc "c" (e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0: i8) -> __m128i {
|
||||
return transmute(__m128i)i8x16{e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15}
|
||||
}
|
||||
_mm_set1_epi64x :: #force_inline proc "c" (a: i64) -> __m128i {
|
||||
return _mm_set_epi64x(a, a)
|
||||
}
|
||||
_mm_set1_epi32 :: #force_inline proc "c" (a: i32) -> __m128i {
|
||||
return _mm_set_epi32(a, a, a, a)
|
||||
}
|
||||
_mm_set1_epi16 :: #force_inline proc "c" (a: i16) -> __m128i {
|
||||
return _mm_set_epi16(a, a, a, a, a, a, a, a)
|
||||
}
|
||||
_mm_set1_epi8 :: #force_inline proc "c" (a: i8) -> __m128i {
|
||||
return _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
|
||||
}
|
||||
_mm_setr_epi32 :: #force_inline proc "c" (e3, e2, e1, e0: i32) -> __m128i {
|
||||
return _mm_set_epi32(e0, e1, e2, e3)
|
||||
}
|
||||
_mm_setr_epi16 :: #force_inline proc "c" (e7, e6, e5, e4, e3, e2, e1, e0: i16) -> __m128i {
|
||||
return _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
|
||||
}
|
||||
_mm_setr_epi8 :: #force_inline proc "c" (e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0: i8) -> __m128i {
|
||||
return _mm_set_epi8(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15)
|
||||
}
|
||||
_mm_setzero_si128 :: #force_inline proc "c" () -> __m128i {
|
||||
return _mm_set1_epi64x(0)
|
||||
}
|
||||
|
||||
|
||||
_mm_loadl_epi64 :: #force_inline proc "c" (mem_addr: ^__m128i) -> __m128i {
|
||||
return _mm_set_epi64x(0, intrinsics.unaligned_load((^i64)(mem_addr)))
|
||||
}
|
||||
_mm_load_si128 :: #force_inline proc "c" (mem_addr: ^__m128i) -> __m128i {
|
||||
return mem_addr^
|
||||
}
|
||||
_mm_loadu_si128 :: #force_inline proc "c" (mem_addr: ^__m128i) -> __m128i {
|
||||
dst := _mm_undefined_si128()
|
||||
intrinsics.mem_copy_non_overlapping(&dst, mem_addr, size_of(__m128i))
|
||||
return dst
|
||||
}
|
||||
_mm_maskmoveu_si128 :: #force_inline proc "c" (a, mask: __m128i, mem_addr: rawptr) {
|
||||
maskmovdqu(transmute(i8x16)a, transmute(i8x16)mask, mem_addr)
|
||||
}
|
||||
_mm_store_si128 :: #force_inline proc "c" (mem_addr: ^__m128i, a: __m128i) {
|
||||
mem_addr^ = a
|
||||
}
|
||||
_mm_storeu_si128 :: #force_inline proc "c" (mem_addr: ^__m128i, a: __m128i) {
|
||||
storeudq(mem_addr, a)
|
||||
}
|
||||
_mm_storel_epi64 :: #force_inline proc "c" (mem_addr: ^__m128i, a: __m128i) {
|
||||
a := a
|
||||
intrinsics.mem_copy_non_overlapping(mem_addr, &a, 8)
|
||||
}
|
||||
_mm_stream_si128 :: #force_inline proc "c" (mem_addr: ^__m128i, a: __m128i) {
|
||||
intrinsics.nontemporal_store(mem_addr, a)
|
||||
}
|
||||
_mm_stream_si32 :: #force_inline proc "c" (mem_addr: ^i32, a: i32) {
|
||||
intrinsics.nontemporal_store(mem_addr, a)
|
||||
}
|
||||
_mm_move_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
zero := _mm_setzero_si128()
|
||||
return transmute(__m128i)simd.shuffle(transmute(i64x2)a, transmute(i64x2)zero, 0, 2)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user