mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-18 11:52:22 -07:00
Add @(require_results) to all appropriate procedures
This commit is contained in:
@@ -3,17 +3,21 @@ package simd_x86
|
||||
|
||||
import "core:intrinsics"
|
||||
|
||||
@(require_results)
|
||||
_lzcnt_u32 :: #force_inline proc "c" (x: u32) -> u32 {
|
||||
return intrinsics.count_leading_zeros(x)
|
||||
}
|
||||
@(require_results)
|
||||
_popcnt32 :: #force_inline proc "c" (x: u32) -> i32 {
|
||||
return i32(intrinsics.count_ones(x))
|
||||
}
|
||||
|
||||
when ODIN_ARCH == .amd64 {
|
||||
@(require_results)
|
||||
_lzcnt_u64 :: #force_inline proc "c" (x: u64) -> u64 {
|
||||
return intrinsics.count_leading_zeros(x)
|
||||
}
|
||||
@(require_results)
|
||||
_popcnt64 :: #force_inline proc "c" (x: u64) -> i32 {
|
||||
return i32(intrinsics.count_ones(x))
|
||||
}
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
//+build i386, amd64
|
||||
package simd_x86
|
||||
|
||||
@(require_results)
|
||||
_addcarry_u32 :: #force_inline proc "c" (c_in: u8, a: u32, b: u32, out: ^u32) -> u8 {
|
||||
x, y := llvm_addcarry_u32(c_in, a, b)
|
||||
out^ = y
|
||||
return x
|
||||
}
|
||||
@(require_results)
|
||||
_addcarryx_u32 :: #force_inline proc "c" (c_in: u8, a: u32, b: u32, out: ^u32) -> u8 {
|
||||
return llvm_addcarryx_u32(c_in, a, b, out)
|
||||
}
|
||||
@(require_results)
|
||||
_subborrow_u32 :: #force_inline proc "c" (c_in: u8, a: u32, b: u32, out: ^u32) -> u8 {
|
||||
x, y := llvm_subborrow_u32(c_in, a, b)
|
||||
out^ = y
|
||||
@@ -16,14 +19,17 @@ _subborrow_u32 :: #force_inline proc "c" (c_in: u8, a: u32, b: u32, out: ^u32) -
|
||||
}
|
||||
|
||||
when ODIN_ARCH == .amd64 {
|
||||
@(require_results)
|
||||
_addcarry_u64 :: #force_inline proc "c" (c_in: u8, a: u64, b: u64, out: ^u64) -> u8 {
|
||||
x, y := llvm_addcarry_u64(c_in, a, b)
|
||||
out^ = y
|
||||
return x
|
||||
}
|
||||
@(require_results)
|
||||
_addcarryx_u64 :: #force_inline proc "c" (c_in: u8, a: u64, b: u64, out: ^u64) -> u8 {
|
||||
return llvm_addcarryx_u64(c_in, a, b, out)
|
||||
}
|
||||
@(require_results)
|
||||
_subborrow_u64 :: #force_inline proc "c" (c_in: u8, a: u64, b: u64, out: ^u64) -> u8 {
|
||||
x, y := llvm_subborrow_u64(c_in, a, b)
|
||||
out^ = y
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//+build i386, amd64
|
||||
package simd_x86
|
||||
|
||||
@(enable_target_feature="pclmulqdq")
|
||||
@(require_results, enable_target_feature="pclmulqdq")
|
||||
_mm_clmulepi64_si128 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i {
|
||||
return pclmulqdq(a, b, u8(IMM8))
|
||||
}
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
//+build i386, amd64
|
||||
package simd_x86
|
||||
|
||||
@(require_results)
|
||||
_rdtsc :: #force_inline proc "c" () -> u64 {
|
||||
return rdtsc()
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
__rdtscp :: #force_inline proc "c" (aux: ^u32) -> u64 {
|
||||
return rdtscp(aux)
|
||||
}
|
||||
|
||||
@@ -1,31 +1,31 @@
|
||||
//+build i386, amd64
|
||||
package simd_x86
|
||||
|
||||
@(enable_target_feature="sha")
|
||||
@(require_results, enable_target_feature="sha")
|
||||
_mm_sha1msg1_epu32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)sha1msg1(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sha")
|
||||
@(require_results, enable_target_feature="sha")
|
||||
_mm_sha1msg2_epu32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)sha1msg2(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sha")
|
||||
@(require_results, enable_target_feature="sha")
|
||||
_mm_sha1nexte_epu32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)sha1nexte(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sha")
|
||||
@(require_results, enable_target_feature="sha")
|
||||
_mm_sha1rnds4_epu32 :: #force_inline proc "c" (a, b: __m128i, $FUNC: u32) -> __m128i where 0 <= FUNC, FUNC <= 3 {
|
||||
return transmute(__m128i)sha1rnds4(transmute(i32x4)a, transmute(i32x4)b, u8(FUNC & 0xff))
|
||||
}
|
||||
@(enable_target_feature="sha")
|
||||
@(require_results, enable_target_feature="sha")
|
||||
_mm_sha256msg1_epu32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)sha256msg1(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sha")
|
||||
@(require_results, enable_target_feature="sha")
|
||||
_mm_sha256msg2_epu32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)sha256msg2(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sha")
|
||||
@(require_results, enable_target_feature="sha")
|
||||
_mm_sha256rnds2_epu32 :: #force_inline proc "c" (a, b, k: __m128i) -> __m128i {
|
||||
return transmute(__m128i)sha256rnds2(transmute(i32x4)a, transmute(i32x4)b, transmute(i32x4)k)
|
||||
}
|
||||
|
||||
+88
-88
@@ -43,299 +43,299 @@ _MM_FLUSH_ZERO_ON :: 0x8000
|
||||
_MM_FLUSH_ZERO_OFF :: 0x0000
|
||||
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_add_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return addss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_add_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.add(a, b)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_sub_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return subss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_sub_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.sub(a, b)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_mul_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return mulss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_mul_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.mul(a, b)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_div_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return divss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_div_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.div(a, b)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_sqrt_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return sqrtss(a)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_sqrt_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return sqrtps(a)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_rcp_ss :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return rcpss(a)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_rcp_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return rcpps(a)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_rsqrt_ss :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return rsqrtss(a)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_rsqrt_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return rsqrtps(a)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_min_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return minss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_min_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return minps(a, b)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_max_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return maxss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_max_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return maxps(a, b)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_and_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return transmute(__m128)simd.and(transmute(__m128i)a, transmute(__m128i)b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_andnot_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return transmute(__m128)simd.and_not(transmute(__m128i)a, transmute(__m128i)b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_or_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return transmute(__m128)simd.or(transmute(__m128i)a, transmute(__m128i)b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_xor_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return transmute(__m128)simd.xor(transmute(__m128i)a, transmute(__m128i)b)
|
||||
}
|
||||
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpeq_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpss(a, b, 0)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmplt_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpss(a, b, 1)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmple_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpss(a, b, 2)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpgt_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, cmpss(b, a, 1), 4, 1, 2, 3)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpge_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, cmpss(b, a, 2), 4, 1, 2, 3)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpneq_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpss(a, b, 4)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpnlt_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpss(a, b, 5)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpnle_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpss(a, b, 6)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpngt_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, cmpss(b, a, 5), 4, 1, 2, 3)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpnge_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, cmpss(b, a, 6), 4, 1, 2, 3)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpord_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpss(a, b, 7)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpunord_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpss(a, b, 3)
|
||||
}
|
||||
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpeq_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(a, b, 0)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmplt_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(a, b, 1)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmple_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(a, b, 2)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpgt_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(b, a, 1)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpge_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(b, a, 2)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpneq_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(a, b, 4)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpnlt_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(a, b, 5)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpnle_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(a, b, 6)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpngt_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(b, a, 5)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpnge_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(b, a, 6)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpord_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(b, a, 7)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cmpunord_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return cmpps(b, a, 3)
|
||||
}
|
||||
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_comieq_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return comieq_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_comilt_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return comilt_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_comile_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return comile_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_comigt_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return comigt_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_comige_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return comige_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_comineq_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return comineq_ss(a, b)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_ucomieq_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return ucomieq_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_ucomilt_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return ucomilt_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_ucomile_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return ucomile_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_ucomigt_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return ucomigt_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_ucomige_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return ucomige_ss(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_ucomineq_ss :: #force_inline proc "c" (a, b: __m128) -> b32 {
|
||||
return ucomineq_ss(a, b)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cvtss_si32 :: #force_inline proc "c" (a: __m128) -> i32 {
|
||||
return cvtss2si(a)
|
||||
}
|
||||
_mm_cvt_ss2si :: _mm_cvtss_si32
|
||||
_mm_cvttss_si32 :: _mm_cvtss_si32
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cvtss_f32 :: #force_inline proc "c" (a: __m128) -> f32 {
|
||||
return simd.extract(a, 0)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cvtsi32_ss :: #force_inline proc "c" (a: __m128, b: i32) -> __m128 {
|
||||
return cvtsi2ss(a, b)
|
||||
}
|
||||
_mm_cvt_si2ss :: _mm_cvtsi32_ss
|
||||
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_set_ss :: #force_inline proc "c" (a: f32) -> __m128 {
|
||||
return __m128{a, 0, 0, 0}
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_set1_ps :: #force_inline proc "c" (a: f32) -> __m128 {
|
||||
return __m128(a)
|
||||
}
|
||||
_mm_set_ps1 :: _mm_set1_ps
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_set_ps :: #force_inline proc "c" (a, b, c, d: f32) -> __m128 {
|
||||
return __m128{d, c, b, a}
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_setr_ps :: #force_inline proc "c" (a, b, c, d: f32) -> __m128 {
|
||||
return __m128{a, b, c, d}
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_setzero_ps :: #force_inline proc "c" () -> __m128 {
|
||||
return __m128{0, 0, 0, 0}
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_shuffle_ps :: #force_inline proc "c" (a, b: __m128, $MASK: u32) -> __m128 {
|
||||
return simd.shuffle(
|
||||
a, b,
|
||||
@@ -346,58 +346,58 @@ _mm_shuffle_ps :: #force_inline proc "c" (a, b: __m128, $MASK: u32) -> __m128 {
|
||||
}
|
||||
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_unpackhi_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, b, 2, 6, 3, 7)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_unpacklo_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, b, 0, 4, 1, 5)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_movehl_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, b, 6, 7, 2, 3)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_movelh_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, b, 0, 1, 4, 5)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_movemask_ps :: #force_inline proc "c" (a: __m128) -> u32 {
|
||||
return movmskps(a)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_load_ss :: #force_inline proc "c" (p: ^f32) -> __m128 {
|
||||
return __m128{p^, 0, 0, 0}
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_load1_ps :: #force_inline proc "c" (p: ^f32) -> __m128 {
|
||||
a := p^
|
||||
return __m128(a)
|
||||
}
|
||||
_mm_load_ps1 :: _mm_load1_ps
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_load_ps :: #force_inline proc "c" (p: [^]f32) -> __m128 {
|
||||
return (^__m128)(p)^
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_loadu_ps :: #force_inline proc "c" (p: [^]f32) -> __m128 {
|
||||
dst := _mm_undefined_ps()
|
||||
intrinsics.mem_copy_non_overlapping(&dst, p, size_of(__m128))
|
||||
return dst
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_loadr_ps :: #force_inline proc "c" (p: [^]f32) -> __m128 {
|
||||
return simd.lanes_reverse(_mm_load_ps(p))
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_loadu_si64 :: #force_inline proc "c" (mem_addr: rawptr) -> __m128i {
|
||||
a := intrinsics.unaligned_load((^i64)(mem_addr))
|
||||
return __m128i{a, 0}
|
||||
@@ -431,7 +431,7 @@ _mm_storer_ps :: #force_inline proc "c" (p: [^]f32, a: __m128) {
|
||||
}
|
||||
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_move_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return simd.shuffle(a, b, 4, 1, 2, 3)
|
||||
}
|
||||
@@ -441,7 +441,7 @@ _mm_sfence :: #force_inline proc "c" () {
|
||||
sfence()
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_getcsr :: #force_inline proc "c" () -> (result: u32) {
|
||||
stmxcsr(&result)
|
||||
return result
|
||||
@@ -453,19 +453,19 @@ _mm_setcsr :: #force_inline proc "c" (val: u32) {
|
||||
ldmxcsr(&val)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_MM_GET_EXCEPTION_MASK :: #force_inline proc "c" () -> u32 {
|
||||
return _mm_getcsr() & _MM_MASK_MASK
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_MM_GET_EXCEPTION_STATE :: #force_inline proc "c" () -> u32 {
|
||||
return _mm_getcsr() & _MM_EXCEPT_MASK
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_MM_GET_FLUSH_ZERO_MODE :: #force_inline proc "c" () -> u32 {
|
||||
return _mm_getcsr() & _MM_FLUSH_ZERO_MASK
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_MM_GET_ROUNDING_MODE :: #force_inline proc "c" () -> u32 {
|
||||
return _mm_getcsr() & _MM_ROUND_MASK
|
||||
}
|
||||
@@ -493,7 +493,7 @@ _mm_prefetch :: #force_inline proc "c" (p: rawptr, $STRATEGY: u32) {
|
||||
}
|
||||
|
||||
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_undefined_ps :: #force_inline proc "c" () -> __m128 {
|
||||
return _mm_set1_ps(0)
|
||||
}
|
||||
@@ -517,15 +517,15 @@ _mm_stream_ps :: #force_inline proc "c" (addr: [^]f32, a: __m128) {
|
||||
}
|
||||
|
||||
when ODIN_ARCH == .amd64 {
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cvtss_si64 :: #force_inline proc "c"(a: __m128) -> i64 {
|
||||
return cvtss2si64(a)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cvttss_si64 :: #force_inline proc "c"(a: __m128) -> i64 {
|
||||
return cvttss2si64(a)
|
||||
}
|
||||
@(enable_target_feature="sse")
|
||||
@(require_results, enable_target_feature="sse")
|
||||
_mm_cvtsi64_ss :: #force_inline proc "c"(a: __m128, b: i64) -> __m128 {
|
||||
return cvtsi642ss(a, b)
|
||||
}
|
||||
|
||||
+203
-203
File diff suppressed because it is too large
Load Diff
+11
-11
@@ -4,47 +4,47 @@ package simd_x86
|
||||
import "core:intrinsics"
|
||||
import "core:simd"
|
||||
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_addsub_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return addsubps(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_addsub_pd :: #force_inline proc "c" (a: __m128d, b: __m128d) -> __m128d {
|
||||
return addsubpd(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_hadd_pd :: #force_inline proc "c" (a: __m128d, b: __m128d) -> __m128d {
|
||||
return haddpd(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_hadd_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return haddps(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_hsub_pd :: #force_inline proc "c" (a: __m128d, b: __m128d) -> __m128d {
|
||||
return hsubpd(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_hsub_ps :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return hsubps(a, b)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_lddqu_si128 :: #force_inline proc "c" (mem_addr: ^__m128i) -> __m128i {
|
||||
return transmute(__m128i)lddqu(mem_addr)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_movedup_pd :: #force_inline proc "c" (a: __m128d) -> __m128d {
|
||||
return simd.shuffle(a, a, 0, 0)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_loaddup_pd :: #force_inline proc "c" (mem_addr: [^]f64) -> __m128d {
|
||||
return _mm_load1_pd(mem_addr)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_movehdup_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return simd.shuffle(a, a, 1, 1, 3, 3)
|
||||
}
|
||||
@(enable_target_feature="sse3")
|
||||
@(require_results, enable_target_feature="sse3")
|
||||
_mm_moveldup_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return simd.shuffle(a, a, 0, 0, 2, 2)
|
||||
}
|
||||
|
||||
+60
-60
@@ -20,271 +20,271 @@ _MM_FROUND_NEARBYINT :: _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION
|
||||
|
||||
|
||||
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_blendv_epi8 :: #force_inline proc "c" (a, b, mask: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pblendvb(transmute(i8x16)a, transmute(i8x16)b, transmute(i8x16)mask)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_blend_epi16 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i {
|
||||
return transmute(__m128i)pblendw(transmute(i16x8)a, transmute(i16x8)b, IMM8)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_blendv_pd :: #force_inline proc "c" (a, b, mask: __m128d) -> __m128d {
|
||||
return blendvpd(a, b, mask)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_blendv_ps :: #force_inline proc "c" (a, b, mask: __m128) -> __m128 {
|
||||
return blendvps(a, b, mask)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_blend_pd :: #force_inline proc "c" (a, b: __m128d, $IMM2: u8) -> __m128d {
|
||||
return blendpd(a, b, IMM2)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_blend_ps :: #force_inline proc "c" (a, b: __m128, $IMM4: u8) -> __m128 {
|
||||
return blendps(a, b, IMM4)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_extract_ps :: #force_inline proc "c" (a: __m128, $IMM8: u32) -> i32 {
|
||||
return transmute(i32)simd.extract(a, IMM8)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_extract_epi8 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> i32 {
|
||||
return i32(simd.extract(transmute(u8x16)a, IMM8))
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_extract_epi32 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> i32 {
|
||||
return simd.extract(transmute(i32x4)a, IMM8)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_insert_ps :: #force_inline proc "c" (a, b: __m128, $IMM8: u8) -> __m128 {
|
||||
return insertps(a, b, IMM8)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_insert_epi8 :: #force_inline proc "c" (a: __m128i, i: i32, $IMM8: u32) -> __m128i {
|
||||
return transmute(__m128i)simd.replace(transmute(i8x16)a, IMM8, i8(i))
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_insert_epi32 :: #force_inline proc "c" (a: __m128i, i: i32, $IMM8: u32) -> __m128i {
|
||||
return transmute(__m128i)simd.replace(transmute(i32x4)a, IMM8, i)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_max_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pmaxsb(transmute(i8x16)a, transmute(i8x16)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_max_epu16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pmaxuw(transmute(u16x8)a, transmute(u16x8)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_max_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pmaxsd(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_max_epu32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pmaxud(transmute(u32x4)a, transmute(u32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_min_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pminsb(transmute(i8x16)a, transmute(i8x16)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_min_epu16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pminuw(transmute(u16x8)a, transmute(u16x8)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_min_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pminsd(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_min_epu32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pminud(transmute(u32x4)a, transmute(u32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_packus_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)packusdw(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cmpeq_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)simd.lanes_eq(transmute(i64x2)a, transmute(i64x2)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepi8_epi16 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(i8x16)a
|
||||
y := simd.shuffle(x, x, 0, 1, 2, 3, 4, 5, 6, 7)
|
||||
return transmute(__m128i)i16x8(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepi8_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(i8x16)a
|
||||
y := simd.shuffle(x, x, 0, 1, 2, 3)
|
||||
return transmute(__m128i)i32x4(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepi8_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(i8x16)a
|
||||
y := simd.shuffle(x, x, 0, 1)
|
||||
return transmute(__m128i)i64x2(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepi16_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(i16x8)a
|
||||
y := simd.shuffle(x, x, 0, 1, 2, 3)
|
||||
return transmute(__m128i)i32x4(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepi16_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(i16x8)a
|
||||
y := simd.shuffle(x, x, 0, 1)
|
||||
return transmute(__m128i)i64x2(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepi32_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(i32x4)a
|
||||
y := simd.shuffle(x, x, 0, 1)
|
||||
return transmute(__m128i)i64x2(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepu8_epi16 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(u8x16)a
|
||||
y := simd.shuffle(x, x, 0, 1, 2, 3, 4, 5, 6, 7)
|
||||
return transmute(__m128i)i16x8(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepu8_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(u8x16)a
|
||||
y := simd.shuffle(x, x, 0, 1, 2, 3)
|
||||
return transmute(__m128i)i32x4(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepu8_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(u8x16)a
|
||||
y := simd.shuffle(x, x, 0, 1)
|
||||
return transmute(__m128i)i64x2(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepu16_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(u16x8)a
|
||||
y := simd.shuffle(x, x, 0, 1, 2, 3)
|
||||
return transmute(__m128i)i32x4(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepu16_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(u16x8)a
|
||||
y := simd.shuffle(x, x, 0, 1)
|
||||
return transmute(__m128i)i64x2(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_cvtepu32_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
x := transmute(u32x4)a
|
||||
y := simd.shuffle(x, x, 0, 1)
|
||||
return transmute(__m128i)i64x2(y)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_dp_pd :: #force_inline proc "c" (a, b: __m128d, $IMM8: u8) -> __m128d {
|
||||
return dppd(a, b, IMM8)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_dp_ps :: #force_inline proc "c" (a, b: __m128, $IMM8: u8) -> __m128 {
|
||||
return dpps(a, b, IMM8)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_floor_pd :: #force_inline proc "c" (a: __m128d) -> __m128d {
|
||||
return simd.floor(a)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_floor_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return simd.floor(a)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_floor_sd :: #force_inline proc "c" (a, b: __m128d) -> __m128d {
|
||||
return roundsd(a, b, _MM_FROUND_FLOOR)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_floor_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return roundss(a, b, _MM_FROUND_FLOOR)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_ceil_pd :: #force_inline proc "c" (a: __m128d) -> __m128d {
|
||||
return simd.ceil(a)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_ceil_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return simd.ceil(a)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_ceil_sd :: #force_inline proc "c" (a, b: __m128d) -> __m128d {
|
||||
return roundsd(a, b, _MM_FROUND_CEIL)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_ceil_ss :: #force_inline proc "c" (a, b: __m128) -> __m128 {
|
||||
return roundss(a, b, _MM_FROUND_CEIL)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_round_pd :: #force_inline proc "c" (a: __m128d, $ROUNDING: i32) -> __m128d {
|
||||
return roundpd(a, ROUNDING)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_round_ps :: #force_inline proc "c" (a: __m128, $ROUNDING: i32) -> __m128 {
|
||||
return roundps(a, ROUNDING)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_round_sd :: #force_inline proc "c" (a, b: __m128d, $ROUNDING: i32) -> __m128d {
|
||||
return roundsd(a, b, ROUNDING)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_round_ss :: #force_inline proc "c" (a, b: __m128, $ROUNDING: i32) -> __m128 {
|
||||
return roundss(a, b, ROUNDING)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_minpos_epu16 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
return transmute(__m128i)phminposuw(transmute(u16x8)a)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_mul_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pmuldq(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_mullo_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)simd.mul(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_mpsadbw_epu8 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i {
|
||||
return transmute(__m128i)mpsadbw(transmute(u8x16)a, transmute(u8x16)b, IMM8)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_testz_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 {
|
||||
return ptestz(transmute(i64x2)a, transmute(i64x2)mask)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_testc_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 {
|
||||
return ptestc(transmute(i64x2)a, transmute(i64x2)mask)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_testnzc_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 {
|
||||
return ptestnzc(transmute(i64x2)a, transmute(i64x2)mask)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_test_all_zeros :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 {
|
||||
return _mm_testz_si128(a, mask)
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_test_all_ones :: #force_inline proc "c" (a: __m128i) -> i32 {
|
||||
return _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
|
||||
}
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_test_mix_ones_zeros :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 {
|
||||
return _mm_testnzc_si128(a, mask)
|
||||
}
|
||||
|
||||
|
||||
when ODIN_ARCH == .amd64 {
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_extract_epi64 :: #force_inline proc "c" (a: __m128i, $IMM1: u32) -> i64 {
|
||||
return simd.extract(transmute(i64x2)a, IMM1)
|
||||
}
|
||||
|
||||
@(enable_target_feature="sse4.1")
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_insert_epi64 :: #force_inline proc "c" (a: __m128i, i: i64, $IMM1: u32) -> __m128i {
|
||||
return transmute(__m128i)simd.replace(transmute(i64x2)a, IMM1, i)
|
||||
}
|
||||
|
||||
+16
-16
@@ -5,23 +5,23 @@ import "core:intrinsics"
|
||||
import "core:simd"
|
||||
_ :: simd
|
||||
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_abs_epi8 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pabsb128(transmute(i8x16)a)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_abs_epi16 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pabsw128(transmute(i16x8)a)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_abs_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pabsd128(transmute(i32x4)a)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_shuffle_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pshufb128(transmute(u8x16)a, transmute(u8x16)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_alignr_epi8 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u32) -> __m128i {
|
||||
shift :: IMM8
|
||||
|
||||
@@ -58,47 +58,47 @@ _mm_alignr_epi8 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u32) -> __m128i
|
||||
}
|
||||
|
||||
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_hadd_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)phaddw128(transmute(i16x8)a, transmute(i16x8)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_hadds_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)phaddsw128(transmute(i16x8)a, transmute(i16x8)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_hadd_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)phaddd128(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_hsub_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)phsubw128(transmute(i16x8)a, transmute(i16x8)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_hsubs_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)phsubsw128(transmute(i16x8)a, transmute(i16x8)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_hsub_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)phsubd128(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_maddubs_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pmaddubsw128(transmute(u8x16)a, transmute(i8x16)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_mulhrs_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pmulhrsw128(transmute(i16x8)a, transmute(i16x8)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_sign_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)psignb128(transmute(i8x16)a, transmute(i8x16)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_sign_epi16 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)psignw128(transmute(i16x8)a, transmute(i16x8)b)
|
||||
}
|
||||
@(enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
_mm_sign_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)psignd128(transmute(i32x4)a, transmute(i32x4)b)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user