diff --git a/.gitignore b/.gitignore index 1f55b7ab7..91d8328da 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,8 @@ tests/core/test_linalg_glsl_math tests/core/test_noise tests/core/test_varint tests/core/test_xml +tests/core/test_core_slice +tests/core/test_core_thread tests/vendor/vendor_botan # Visual Studio 2015 cache/options directory .vs/ diff --git a/core/simd/x86/adx.odin b/core/simd/x86/adx.odin index d03cffcff..5750ae627 100644 --- a/core/simd/x86/adx.odin +++ b/core/simd/x86/adx.odin @@ -37,7 +37,7 @@ when ODIN_ARCH == .amd64 { } } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.addcarry.32") llvm_addcarry_u32 :: proc(a: u8, b: u32, c: u32) -> (u8, u32) --- diff --git a/core/simd/x86/fxsr.odin b/core/simd/x86/fxsr.odin index cd78de7d4..a9213fed2 100644 --- a/core/simd/x86/fxsr.odin +++ b/core/simd/x86/fxsr.odin @@ -21,7 +21,7 @@ when ODIN_ARCH == .amd64 { } } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.fxsave") fxsave :: proc(p: rawptr) --- diff --git a/core/simd/x86/pclmulqdq.odin b/core/simd/x86/pclmulqdq.odin index 692fb7ce1..e827bf6b9 100644 --- a/core/simd/x86/pclmulqdq.odin +++ b/core/simd/x86/pclmulqdq.odin @@ -1,12 +1,12 @@ //+build i386, amd64 package simd_x86 -@(require_results, enable_target_feature="pclmulqdq") +@(require_results, enable_target_feature="pclmul") _mm_clmulepi64_si128 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i { return pclmulqdq(a, b, u8(IMM8)) } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.pclmulqdq") pclmulqdq :: proc(a, round_key: __m128i, #const imm8: u8) -> __m128i --- diff --git a/core/simd/x86/rdtsc.odin b/core/simd/x86/rdtsc.odin index 54024c3f2..8a8b13c4b 100644 --- a/core/simd/x86/rdtsc.odin +++ b/core/simd/x86/rdtsc.odin @@ -11,7 +11,7 @@ __rdtscp :: #force_inline proc "c" (aux: ^u32) -> u64 { return rdtscp(aux) } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.rdtsc") rdtsc :: proc() -> u64 --- diff --git a/core/simd/x86/sha.odin b/core/simd/x86/sha.odin index f015f4b8a..bc58e8504 100644 --- a/core/simd/x86/sha.odin +++ b/core/simd/x86/sha.odin @@ -30,7 +30,7 @@ _mm_sha256rnds2_epu32 :: #force_inline proc "c" (a, b, k: __m128i) -> __m128i { return transmute(__m128i)sha256rnds2(transmute(i32x4)a, transmute(i32x4)b, transmute(i32x4)k) } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.sha1msg1") sha1msg1 :: proc(a, b: i32x4) -> i32x4 --- diff --git a/core/simd/x86/sse.odin b/core/simd/x86/sse.odin index 2b70e954f..903a43dfd 100644 --- a/core/simd/x86/sse.odin +++ b/core/simd/x86/sse.odin @@ -532,7 +532,7 @@ when ODIN_ARCH == .amd64 { } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.sse.add.ss") addss :: proc(a, b: __m128) -> __m128 --- diff --git a/core/simd/x86/sse2.odin b/core/simd/x86/sse2.odin index dd292712f..a597122f1 100644 --- a/core/simd/x86/sse2.odin +++ b/core/simd/x86/sse2.odin @@ -1040,7 +1040,7 @@ when ODIN_ARCH == .amd64 { } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name="llvm.x86.sse2.pause") pause :: proc() --- diff --git a/core/simd/x86/sse3.odin b/core/simd/x86/sse3.odin index 7a3073c18..cf5f3b2fa 100644 --- a/core/simd/x86/sse3.odin +++ b/core/simd/x86/sse3.odin @@ -49,7 +49,7 @@ _mm_moveldup_ps :: #force_inline proc "c" (a: __m128) -> __m128 { return simd.shuffle(a, a, 0, 0, 2, 2) } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name = "llvm.x86.sse3.addsub.ps") addsubps :: proc(a, b: __m128) -> __m128 --- diff --git a/core/simd/x86/sse41.odin b/core/simd/x86/sse41.odin index b35be33f2..8c306ba4c 100644 --- a/core/simd/x86/sse41.odin +++ b/core/simd/x86/sse41.odin @@ -291,7 +291,7 @@ when ODIN_ARCH == .amd64 { } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name = "llvm.x86.sse41.pblendvb") pblendvb :: proc(a, b: i8x16, mask: i8x16) -> i8x16 --- diff --git a/core/simd/x86/sse42.odin b/core/simd/x86/sse42.odin index 62b4f0478..621346342 100644 --- a/core/simd/x86/sse42.odin +++ b/core/simd/x86/sse42.odin @@ -104,7 +104,7 @@ when ODIN_ARCH == .amd64 { } } -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { // SSE 4.2 string and text comparison ops @(link_name="llvm.x86.sse42.pcmpestrm128") diff --git a/core/simd/x86/ssse3.odin b/core/simd/x86/ssse3.odin index f11ef6774..0264a1c93 100644 --- a/core/simd/x86/ssse3.odin +++ b/core/simd/x86/ssse3.odin @@ -105,7 +105,7 @@ _mm_sign_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { -@(private, default_calling_convention="c") +@(private, default_calling_convention="none") foreign _ { @(link_name = "llvm.x86.ssse3.pabs.b.128") pabsb128 :: proc(a: i8x16) -> u8x16 --- diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 18ad8ac0d..9d909fcae 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -1493,7 +1493,7 @@ gb_internal void enable_target_feature(TokenPos pos, String const &target_featur } -gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes) { +gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes, bool with_plus) { isize len = 0; isize i = 0; for (String const &feature : build_context.target_features_set) { @@ -1502,6 +1502,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo } len += feature.len; if (with_quotes) len += 2; + if (with_plus) len += 1; i += 1; } char *features = gb_alloc_array(allocator, char, len+1); @@ -1513,6 +1514,7 @@ gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bo } if (with_quotes) features[len++] = '"'; + if (with_plus) features[len++] = '+'; gb_memmove(features + len, feature.text, feature.len); len += feature.len; if (with_quotes) features[len++] = '"'; diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index ca71a0f45..b90fd8495 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -2531,7 +2531,46 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { */ if (build_context.target_features_set.entries.count != 0) { - llvm_features = target_features_set_to_cstring(permanent_allocator(), false); + // Prefix all of the features with a `+`, because we are + // enabling additional features. + char const *additional_features = target_features_set_to_cstring(permanent_allocator(), false, true); + + String f_string = make_string_c(llvm_features); + String a_string = make_string_c(additional_features); + isize f_len = f_string.len; + + if (f_len == 0) { + // The common case is that llvm_features is empty, so + // the target_features_set additions can be used as is. + llvm_features = additional_features; + } else { + // The user probably specified `-microarch:native`, so + // llvm_features is populated by LLVM's idea of what + // the host CPU supports. + // + // As far as I can tell, (which is barely better than + // wild guessing), a bitset is formed by parsing the + // string left to right. + // + // So, llvm_features + ',' + additonal_features, will + // makes the target_features_set override llvm_features. + + char *tmp = gb_alloc_array(permanent_allocator(), char, f_len + 1 + a_string.len + 1); + isize len = 0; + + // tmp = f_string + gb_memmove(tmp, f_string.text, f_string.len); + len += f_string.len; + // tmp += ',' + tmp[len++] = ','; + // tmp += a_string + gb_memmove(tmp + len, a_string.text, a_string.len); + len += a_string.len; + // tmp += NUL + tmp[len++] = 0; + + llvm_features = tmp; + } } // GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target));