mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-26 15:34:59 -07:00
Merge pull request #3024 from Yawning/fix/simd-x86
core:simd/x86: Various fixes
This commit is contained in:
@@ -47,6 +47,8 @@ tests/core/test_linalg_glsl_math
|
||||
tests/core/test_noise
|
||||
tests/core/test_varint
|
||||
tests/core/test_xml
|
||||
tests/core/test_core_slice
|
||||
tests/core/test_core_thread
|
||||
tests/vendor/vendor_botan
|
||||
# Visual Studio 2015 cache/options directory
|
||||
.vs/
|
||||
|
||||
@@ -37,7 +37,7 @@ when ODIN_ARCH == .amd64 {
|
||||
}
|
||||
}
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name="llvm.x86.addcarry.32")
|
||||
llvm_addcarry_u32 :: proc(a: u8, b: u32, c: u32) -> (u8, u32) ---
|
||||
|
||||
@@ -21,7 +21,7 @@ when ODIN_ARCH == .amd64 {
|
||||
}
|
||||
}
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name="llvm.x86.fxsave")
|
||||
fxsave :: proc(p: rawptr) ---
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
//+build i386, amd64
|
||||
package simd_x86
|
||||
|
||||
@(require_results, enable_target_feature="pclmulqdq")
|
||||
@(require_results, enable_target_feature="pclmul")
|
||||
_mm_clmulepi64_si128 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i {
|
||||
return pclmulqdq(a, b, u8(IMM8))
|
||||
}
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name="llvm.x86.pclmulqdq")
|
||||
pclmulqdq :: proc(a, round_key: __m128i, #const imm8: u8) -> __m128i ---
|
||||
|
||||
@@ -11,7 +11,7 @@ __rdtscp :: #force_inline proc "c" (aux: ^u32) -> u64 {
|
||||
return rdtscp(aux)
|
||||
}
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name="llvm.x86.rdtsc")
|
||||
rdtsc :: proc() -> u64 ---
|
||||
|
||||
@@ -30,7 +30,7 @@ _mm_sha256rnds2_epu32 :: #force_inline proc "c" (a, b, k: __m128i) -> __m128i {
|
||||
return transmute(__m128i)sha256rnds2(transmute(i32x4)a, transmute(i32x4)b, transmute(i32x4)k)
|
||||
}
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name="llvm.x86.sha1msg1")
|
||||
sha1msg1 :: proc(a, b: i32x4) -> i32x4 ---
|
||||
|
||||
@@ -532,7 +532,7 @@ when ODIN_ARCH == .amd64 {
|
||||
}
|
||||
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name="llvm.x86.sse.add.ss")
|
||||
addss :: proc(a, b: __m128) -> __m128 ---
|
||||
|
||||
@@ -1040,7 +1040,7 @@ when ODIN_ARCH == .amd64 {
|
||||
}
|
||||
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name="llvm.x86.sse2.pause")
|
||||
pause :: proc() ---
|
||||
|
||||
@@ -49,7 +49,7 @@ _mm_moveldup_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
|
||||
return simd.shuffle(a, a, 0, 0, 2, 2)
|
||||
}
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name = "llvm.x86.sse3.addsub.ps")
|
||||
addsubps :: proc(a, b: __m128) -> __m128 ---
|
||||
|
||||
@@ -291,7 +291,7 @@ when ODIN_ARCH == .amd64 {
|
||||
}
|
||||
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name = "llvm.x86.sse41.pblendvb")
|
||||
pblendvb :: proc(a, b: i8x16, mask: i8x16) -> i8x16 ---
|
||||
|
||||
@@ -104,7 +104,7 @@ when ODIN_ARCH == .amd64 {
|
||||
}
|
||||
}
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
// SSE 4.2 string and text comparison ops
|
||||
@(link_name="llvm.x86.sse42.pcmpestrm128")
|
||||
|
||||
@@ -105,7 +105,7 @@ _mm_sign_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
|
||||
|
||||
|
||||
@(private, default_calling_convention="c")
|
||||
@(private, default_calling_convention="none")
|
||||
foreign _ {
|
||||
@(link_name = "llvm.x86.ssse3.pabs.b.128")
|
||||
pabsb128 :: proc(a: i8x16) -> u8x16 ---
|
||||
|
||||
@@ -1493,7 +1493,7 @@ gb_internal void enable_target_feature(TokenPos pos, String const &target_featur
|
||||
}
|
||||
|
||||
|
||||
gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes) {
|
||||
gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes, bool with_plus) {
|
||||
isize len = 0;
|
||||
isize i = 0;
|
||||
for (String const &feature : build_context.target_features_set) {
|
||||
@@ -1502,6 +1502,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo
|
||||
}
|
||||
len += feature.len;
|
||||
if (with_quotes) len += 2;
|
||||
if (with_plus) len += 1;
|
||||
i += 1;
|
||||
}
|
||||
char *features = gb_alloc_array(allocator, char, len+1);
|
||||
@@ -1513,6 +1514,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo
|
||||
}
|
||||
|
||||
if (with_quotes) features[len++] = '"';
|
||||
if (with_plus) features[len++] = '+';
|
||||
gb_memmove(features + len, feature.text, feature.len);
|
||||
len += feature.len;
|
||||
if (with_quotes) features[len++] = '"';
|
||||
|
||||
+40
-1
@@ -2531,7 +2531,46 @@ gb_internal bool lb_generate_code(lbGenerator *gen) {
|
||||
*/
|
||||
|
||||
if (build_context.target_features_set.entries.count != 0) {
|
||||
llvm_features = target_features_set_to_cstring(permanent_allocator(), false);
|
||||
// Prefix all of the features with a `+`, because we are
|
||||
// enabling additional features.
|
||||
char const *additional_features = target_features_set_to_cstring(permanent_allocator(), false, true);
|
||||
|
||||
String f_string = make_string_c(llvm_features);
|
||||
String a_string = make_string_c(additional_features);
|
||||
isize f_len = f_string.len;
|
||||
|
||||
if (f_len == 0) {
|
||||
// The common case is that llvm_features is empty, so
|
||||
// the target_features_set additions can be used as is.
|
||||
llvm_features = additional_features;
|
||||
} else {
|
||||
// The user probably specified `-microarch:native`, so
|
||||
// llvm_features is populated by LLVM's idea of what
|
||||
// the host CPU supports.
|
||||
//
|
||||
// As far as I can tell, (which is barely better than
|
||||
// wild guessing), a bitset is formed by parsing the
|
||||
// string left to right.
|
||||
//
|
||||
// So, llvm_features + ',' + additonal_features, will
|
||||
// makes the target_features_set override llvm_features.
|
||||
|
||||
char *tmp = gb_alloc_array(permanent_allocator(), char, f_len + 1 + a_string.len + 1);
|
||||
isize len = 0;
|
||||
|
||||
// tmp = f_string
|
||||
gb_memmove(tmp, f_string.text, f_string.len);
|
||||
len += f_string.len;
|
||||
// tmp += ','
|
||||
tmp[len++] = ',';
|
||||
// tmp += a_string
|
||||
gb_memmove(tmp + len, a_string.text, a_string.len);
|
||||
len += a_string.len;
|
||||
// tmp += NUL
|
||||
tmp[len++] = 0;
|
||||
|
||||
llvm_features = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target));
|
||||
|
||||
Reference in New Issue
Block a user