From ecee0e2db2107db7e7ea3242a3431d00a9392887 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Fri, 15 Dec 2023 17:06:55 +0900 Subject: [PATCH 1/4] repo: Add more test binaries to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 1f55b7ab7..91d8328da 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,8 @@ tests/core/test_linalg_glsl_math tests/core/test_noise tests/core/test_varint tests/core/test_xml +tests/core/test_core_slice +tests/core/test_core_thread tests/vendor/vendor_botan # Visual Studio 2015 cache/options directory .vs/ From 9235e8245193fb891e46ee4a7daa3e101ff8032a Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Fri, 15 Dec 2023 16:55:53 +0900 Subject: [PATCH 2/4] core/simd/x86: Correct a target feature name --- core/simd/x86/pclmulqdq.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/simd/x86/pclmulqdq.odin b/core/simd/x86/pclmulqdq.odin index 692fb7ce1..31ba58fe6 100644 --- a/core/simd/x86/pclmulqdq.odin +++ b/core/simd/x86/pclmulqdq.odin @@ -1,7 +1,7 @@ //+build i386, amd64 package simd_x86 -@(require_results, enable_target_feature="pclmulqdq") +@(require_results, enable_target_feature="pclmul") _mm_clmulepi64_si128 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i { return pclmulqdq(a, b, u8(IMM8)) } From cd65a15d81b32636c5097200446cc6d6afc7199b Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Fri, 15 Dec 2023 17:29:59 +0900 Subject: [PATCH 3/4] src: `enable_target_feature` should add features, not overwrite `llvm_features` being empty is the default state, and implies the presence of certain features. Previously if any target features were explicitly enabled by the `enable_target_feature` attribute, they were added comma separated to `llvm_features`. For example: `lzcnt,popcnt,...,sse4.2,sse` This was causing LLVM to try to target a CPU that *ONLY* has the explicitly enabled features. This now will prefix explicitly enabled features with a `+`, and preserve the existing `llvm_features` string by appending to it if it is set. --- src/build_settings.cpp | 4 +++- src/llvm_backend.cpp | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 18ad8ac0d..9d909fcae 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -1493,7 +1493,7 @@ gb_internal void enable_target_feature(TokenPos pos, String const &target_featur } -gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes) { +gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes, bool with_plus) { isize len = 0; isize i = 0; for (String const &feature : build_context.target_features_set) { @@ -1502,6 +1502,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo } len += feature.len; if (with_quotes) len += 2; + if (with_plus) len += 1; i += 1; } char *features = gb_alloc_array(allocator, char, len+1); @@ -1513,6 +1514,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo } if (with_quotes) features[len++] = '"'; + if (with_plus) features[len++] = '+'; gb_memmove(features + len, feature.text, feature.len); len += feature.len; if (with_quotes) features[len++] = '"'; diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index ca71a0f45..b90fd8495 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -2531,7 +2531,46 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { */ if (build_context.target_features_set.entries.count != 0) { - llvm_features = target_features_set_to_cstring(permanent_allocator(), false); + // Prefix all of the features with a `+`, because we are + // enabling additional features. + char const *additional_features = target_features_set_to_cstring(permanent_allocator(), false, true); + + String f_string = make_string_c(llvm_features); + String a_string = make_string_c(additional_features); + isize f_len = f_string.len; + + if (f_len == 0) { + // The common case is that llvm_features is empty, so + // the target_features_set additions can be used as is. + llvm_features = additional_features; + } else { + // The user probably specified `-microarch:native`, so + // llvm_features is populated by LLVM's idea of what + // the host CPU supports. + // + // As far as I can tell, (which is barely better than + // wild guessing), a bitset is formed by parsing the + // string left to right. + // + // So, llvm_features + ',' + additonal_features, will + // makes the target_features_set override llvm_features. + + char *tmp = gb_alloc_array(permanent_allocator(), char, f_len + 1 + a_string.len + 1); + isize len = 0; + + // tmp = f_string + gb_memmove(tmp, f_string.text, f_string.len); + len += f_string.len; + // tmp += ',' + tmp[len++] = ','; + // tmp += a_string + gb_memmove(tmp + len, a_string.text, a_string.len); + len += a_string.len; + // tmp += NUL + tmp[len++] = 0; + + llvm_features = tmp; + } } // GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target)); From 8d7c37e38430fa5d8a40939dec5657e6837c69c6 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sat, 30 Dec 2023 04:49:28 +0900 Subject: [PATCH 4/4] core/simd/x86: Use the `none` calling convention for intrinsics The LLVM intrinsics that live under `llvm.x86` are not actual functions, so trying to invoke them as such using the platform's native C calling convention causes incorrect types to be emitted in the IR. Thanks to laytanl for assistance in testing. --- core/simd/x86/adx.odin | 2 +- core/simd/x86/fxsr.odin | 2 +- core/simd/x86/pclmulqdq.odin | 2 +- core/simd/x86/rdtsc.odin | 2 +- core/simd/x86/sha.odin | 2 +- core/simd/x86/sse.odin | 2 +- core/simd/x86/sse2.odin | 2 +- core/simd/x86/sse3.odin | 2 +- core/simd/x86/sse41.odin | 2 +- core/simd/x86/sse42.odin | 2 +- core/simd/x86/ssse3.odin | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/simd/x86/adx.odin b/core/simd/x86/adx.odin index d03cffcff..5750ae627 100644 --- a/core/simd/x86/adx.odin +++ b/core/simd/x86/adx.odin @@ -37,7 +37,7 @@ when ODIN_ARCH == .amd64 { } } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.addcarry.32") llvm_addcarry_u32 :: proc(a: u8, b: u32, c: u32) -> (u8, u32) --- diff --git a/core/simd/x86/fxsr.odin b/core/simd/x86/fxsr.odin index cd78de7d4..a9213fed2 100644 --- a/core/simd/x86/fxsr.odin +++ b/core/simd/x86/fxsr.odin @@ -21,7 +21,7 @@ when ODIN_ARCH == .amd64 { } } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.fxsave") fxsave :: proc(p: rawptr) --- diff --git a/core/simd/x86/pclmulqdq.odin b/core/simd/x86/pclmulqdq.odin index 31ba58fe6..e827bf6b9 100644 --- a/core/simd/x86/pclmulqdq.odin +++ b/core/simd/x86/pclmulqdq.odin @@ -6,7 +6,7 @@ _mm_clmulepi64_si128 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m return pclmulqdq(a, b, u8(IMM8)) } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.pclmulqdq") pclmulqdq :: proc(a, round_key: __m128i, #const imm8: u8) -> __m128i --- diff --git a/core/simd/x86/rdtsc.odin b/core/simd/x86/rdtsc.odin index 54024c3f2..8a8b13c4b 100644 --- a/core/simd/x86/rdtsc.odin +++ b/core/simd/x86/rdtsc.odin @@ -11,7 +11,7 @@ __rdtscp :: #force_inline proc "c" (aux: ^u32) -> u64 { return rdtscp(aux) } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.rdtsc") rdtsc :: proc() -> u64 --- diff --git a/core/simd/x86/sha.odin b/core/simd/x86/sha.odin index f015f4b8a..bc58e8504 100644 --- a/core/simd/x86/sha.odin +++ b/core/simd/x86/sha.odin @@ -30,7 +30,7 @@ _mm_sha256rnds2_epu32 :: #force_inline proc "c" (a, b, k: __m128i) -> __m128i { return transmute(__m128i)sha256rnds2(transmute(i32x4)a, transmute(i32x4)b, transmute(i32x4)k) } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.sha1msg1") sha1msg1 :: proc(a, b: i32x4) -> i32x4 --- diff --git a/core/simd/x86/sse.odin b/core/simd/x86/sse.odin index 2b70e954f..903a43dfd 100644 --- a/core/simd/x86/sse.odin +++ b/core/simd/x86/sse.odin @@ -532,7 +532,7 @@ when ODIN_ARCH == .amd64 { } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.sse.add.ss") addss :: proc(a, b: __m128) -> __m128 --- diff --git a/core/simd/x86/sse2.odin b/core/simd/x86/sse2.odin index dd292712f..a597122f1 100644 --- a/core/simd/x86/sse2.odin +++ b/core/simd/x86/sse2.odin @@ -1040,7 +1040,7 @@ when ODIN_ARCH == .amd64 { } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.sse2.pause") pause :: proc() --- diff --git a/core/simd/x86/sse3.odin b/core/simd/x86/sse3.odin index 7a3073c18..cf5f3b2fa 100644 --- a/core/simd/x86/sse3.odin +++ b/core/simd/x86/sse3.odin @@ -49,7 +49,7 @@ _mm_moveldup_ps :: #force_inline proc "c" (a: __m128) -> __m128 { return simd.shuffle(a, a, 0, 0, 2, 2) } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name = "llvm.x86.sse3.addsub.ps") addsubps :: proc(a, b: __m128) -> __m128 --- diff --git a/core/simd/x86/sse41.odin b/core/simd/x86/sse41.odin index b35be33f2..8c306ba4c 100644 --- a/core/simd/x86/sse41.odin +++ b/core/simd/x86/sse41.odin @@ -291,7 +291,7 @@ when ODIN_ARCH == .amd64 { } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name = "llvm.x86.sse41.pblendvb") pblendvb :: proc(a, b: i8x16, mask: i8x16) -> i8x16 --- diff --git a/core/simd/x86/sse42.odin b/core/simd/x86/sse42.odin index 62b4f0478..621346342 100644 --- a/core/simd/x86/sse42.odin +++ b/core/simd/x86/sse42.odin @@ -104,7 +104,7 @@ when ODIN_ARCH == .amd64 { } } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { // SSE 4.2 string and text comparison ops @(link_name="llvm.x86.sse42.pcmpestrm128") diff --git a/core/simd/x86/ssse3.odin b/core/simd/x86/ssse3.odin index f11ef6774..0264a1c93 100644 --- a/core/simd/x86/ssse3.odin +++ b/core/simd/x86/ssse3.odin @@ -105,7 +105,7 @@ _mm_sign_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name = "llvm.x86.ssse3.pabs.b.128") pabsb128 :: proc(a: i8x16) -> u8x16 ---