Merge pull request #3024 from Yawning/fix/simd-x86

core:simd/x86: Various fixes
This commit is contained in:
gingerBill
2024-01-07 11:57:22 +00:00
committed by GitHub
14 changed files with 57 additions and 14 deletions
+2
View File
@@ -47,6 +47,8 @@ tests/core/test_linalg_glsl_math
tests/core/test_noise
tests/core/test_varint
tests/core/test_xml
tests/core/test_core_slice
tests/core/test_core_thread
tests/vendor/vendor_botan
# Visual Studio 2015 cache/options directory
.vs/
+1 -1
View File
@@ -37,7 +37,7 @@ when ODIN_ARCH == .amd64 {
}
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name="llvm.x86.addcarry.32")
llvm_addcarry_u32 :: proc(a: u8, b: u32, c: u32) -> (u8, u32) ---
+1 -1
View File
@@ -21,7 +21,7 @@ when ODIN_ARCH == .amd64 {
}
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name="llvm.x86.fxsave")
fxsave :: proc(p: rawptr) ---
+2 -2
View File
@@ -1,12 +1,12 @@
//+build i386, amd64
package simd_x86
@(require_results, enable_target_feature="pclmulqdq")
@(require_results, enable_target_feature="pclmul")
_mm_clmulepi64_si128 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i {
return pclmulqdq(a, b, u8(IMM8))
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name="llvm.x86.pclmulqdq")
pclmulqdq :: proc(a, round_key: __m128i, #const imm8: u8) -> __m128i ---
+1 -1
View File
@@ -11,7 +11,7 @@ __rdtscp :: #force_inline proc "c" (aux: ^u32) -> u64 {
return rdtscp(aux)
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name="llvm.x86.rdtsc")
rdtsc :: proc() -> u64 ---
+1 -1
View File
@@ -30,7 +30,7 @@ _mm_sha256rnds2_epu32 :: #force_inline proc "c" (a, b, k: __m128i) -> __m128i {
return transmute(__m128i)sha256rnds2(transmute(i32x4)a, transmute(i32x4)b, transmute(i32x4)k)
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name="llvm.x86.sha1msg1")
sha1msg1 :: proc(a, b: i32x4) -> i32x4 ---
+1 -1
View File
@@ -532,7 +532,7 @@ when ODIN_ARCH == .amd64 {
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name="llvm.x86.sse.add.ss")
addss :: proc(a, b: __m128) -> __m128 ---
+1 -1
View File
@@ -1040,7 +1040,7 @@ when ODIN_ARCH == .amd64 {
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name="llvm.x86.sse2.pause")
pause :: proc() ---
+1 -1
View File
@@ -49,7 +49,7 @@ _mm_moveldup_ps :: #force_inline proc "c" (a: __m128) -> __m128 {
return simd.shuffle(a, a, 0, 0, 2, 2)
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name = "llvm.x86.sse3.addsub.ps")
addsubps :: proc(a, b: __m128) -> __m128 ---
+1 -1
View File
@@ -291,7 +291,7 @@ when ODIN_ARCH == .amd64 {
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name = "llvm.x86.sse41.pblendvb")
pblendvb :: proc(a, b: i8x16, mask: i8x16) -> i8x16 ---
+1 -1
View File
@@ -104,7 +104,7 @@ when ODIN_ARCH == .amd64 {
}
}
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
// SSE 4.2 string and text comparison ops
@(link_name="llvm.x86.sse42.pcmpestrm128")
+1 -1
View File
@@ -105,7 +105,7 @@ _mm_sign_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
@(private, default_calling_convention="c")
@(private, default_calling_convention="none")
foreign _ {
@(link_name = "llvm.x86.ssse3.pabs.b.128")
pabsb128 :: proc(a: i8x16) -> u8x16 ---
+3 -1
View File
@@ -1493,7 +1493,7 @@ gb_internal void enable_target_feature(TokenPos pos, String const &target_featur
}
gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes) {
gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes, bool with_plus) {
isize len = 0;
isize i = 0;
for (String const &feature : build_context.target_features_set) {
@@ -1502,6 +1502,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo
}
len += feature.len;
if (with_quotes) len += 2;
if (with_plus) len += 1;
i += 1;
}
char *features = gb_alloc_array(allocator, char, len+1);
@@ -1513,6 +1514,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo
}
if (with_quotes) features[len++] = '"';
if (with_plus) features[len++] = '+';
gb_memmove(features + len, feature.text, feature.len);
len += feature.len;
if (with_quotes) features[len++] = '"';
+40 -1
View File
@@ -2531,7 +2531,46 @@ gb_internal bool lb_generate_code(lbGenerator *gen) {
*/
if (build_context.target_features_set.entries.count != 0) {
llvm_features = target_features_set_to_cstring(permanent_allocator(), false);
// Prefix all of the features with a `+`, because we are
// enabling additional features.
char const *additional_features = target_features_set_to_cstring(permanent_allocator(), false, true);
String f_string = make_string_c(llvm_features);
String a_string = make_string_c(additional_features);
isize f_len = f_string.len;
if (f_len == 0) {
// The common case is that llvm_features is empty, so
// the target_features_set additions can be used as is.
llvm_features = additional_features;
} else {
// The user probably specified `-microarch:native`, so
// llvm_features is populated by LLVM's idea of what
// the host CPU supports.
//
// As far as I can tell, (which is barely better than
// wild guessing), a bitset is formed by parsing the
// string left to right.
//
// So, llvm_features + ',' + additonal_features, will
// makes the target_features_set override llvm_features.
char *tmp = gb_alloc_array(permanent_allocator(), char, f_len + 1 + a_string.len + 1);
isize len = 0;
// tmp = f_string
gb_memmove(tmp, f_string.text, f_string.len);
len += f_string.len;
// tmp += ','
tmp[len++] = ',';
// tmp += a_string
gb_memmove(tmp + len, a_string.text, a_string.len);
len += a_string.len;
// tmp += NUL
tmp[len++] = 0;
llvm_features = tmp;
}
}
// GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target));