diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ede32f093..1a1d18231 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,14 +93,14 @@ jobs: - name: Download LLVM (MacOS Intel) if: matrix.os == 'macos-13' run: | - brew install llvm@18 lua@5.4 - echo "/usr/local/opt/llvm@18/bin" >> $GITHUB_PATH + brew update + brew install llvm@20 lua@5.4 lld - name: Download LLVM (MacOS ARM) if: matrix.os == 'macos-14' run: | - brew install llvm@18 wasmtime lua@5.4 - echo "/opt/homebrew/opt/llvm@18/bin" >> $GITHUB_PATH + brew update + brew install llvm@20 wasmtime lua@5.4 lld - name: Build Odin run: ./build_odin.sh release diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 314711efb..eb67eb209 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -49,12 +49,12 @@ jobs: - uses: actions/checkout@v4 - uses: jirutka/setup-alpine@v1 with: - branch: v3.20 + branch: edge - name: (Linux) Download LLVM run: | apk add --no-cache \ - musl-dev llvm18-dev clang18 git mold lz4 \ - libxml2-static llvm18-static zlib-static zstd-static \ + musl-dev llvm20-dev clang20 git mold lz4 \ + libxml2-static llvm20-static zlib-static zstd-static \ make shell: alpine.sh --root {0} - name: build odin @@ -93,8 +93,9 @@ jobs: - uses: actions/checkout@v4 - name: Download LLVM and setup PATH run: | - brew install llvm@18 dylibbundler - echo "/usr/local/opt/llvm@18/bin" >> $GITHUB_PATH + brew update + brew install llvm@20 dylibbundler lld + - name: build odin # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to # not link with libunwind bundled with LLVM but link with libunwind on the system. @@ -130,8 +131,9 @@ jobs: - uses: actions/checkout@v4 - name: Download LLVM and setup PATH run: | - brew install llvm@18 dylibbundler - echo "/opt/homebrew/opt/llvm@18/bin" >> $GITHUB_PATH + brew update + brew install llvm@20 dylibbundler lld + - name: build odin # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to # not link with libunwind bundled with LLVM but link with libunwind on the system. diff --git a/base/runtime/os_specific_bsd.odin b/base/runtime/os_specific_bsd.odin index 5d198484b..466001ada 100644 --- a/base/runtime/os_specific_bsd.odin +++ b/base/runtime/os_specific_bsd.odin @@ -9,7 +9,7 @@ foreign libc { @(link_name="write") _unix_write :: proc(fd: i32, buf: rawptr, size: int) -> int --- - when ODIN_OS == .NetBSD { + when ODIN_OS == .NetBSD || ODIN_OS == .OpenBSD { @(link_name="__errno") __error :: proc() -> ^i32 --- } else { __error :: proc() -> ^i32 --- diff --git a/build.bat b/build.bat index a788a8c04..4c015e133 100644 --- a/build.bat +++ b/build.bat @@ -4,12 +4,12 @@ setlocal EnableDelayedExpansion where /Q cl.exe || ( set __VSCMD_ARG_NO_LOGO=1 - for /f "tokens=*" %%i in ('"C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -requires Microsoft.VisualStudio.Workload.NativeDesktop -property installationPath') do set VS=%%i + for /f "tokens=*" %%i in ('"C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath') do set VS=%%i if "!VS!" equ "" ( - echo ERROR: Visual Studio installation not found + echo ERROR: MSVC installation not found exit /b 1 ) - call "!VS!\VC\Auxiliary\Build\vcvarsall.bat" amd64 || exit /b 1 + call "!VS!\Common7\Tools\vsdevcmd.bat" -arch=x64 -host_arch=x64 || exit /b 1 ) if "%VSCMD_ARG_TGT_ARCH%" neq "x64" ( @@ -152,4 +152,4 @@ if %release_mode% EQU 0 echo: & echo Debug compiler built. Note: run "build.bat del *.obj > NUL 2> NUL -:end_of_build \ No newline at end of file +:end_of_build diff --git a/build_odin.sh b/build_odin.sh index 773958d5f..19bb82a11 100755 --- a/build_odin.sh +++ b/build_odin.sh @@ -25,7 +25,7 @@ error() { # Brew advises people not to add llvm to their $PATH, so try and use brew to find it. if [ -z "$LLVM_CONFIG" ] && [ -n "$(command -v brew)" ]; then - if [ -n "$(command -v $(brew --prefix llvm)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm)/bin/llvm-config" + if [ -n "$(command -v $(brew --prefix llvm@20)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@20)/bin/llvm-config" elif [ -n "$(command -v $(brew --prefix llvm@19)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@19)/bin/llvm-config" elif [ -n "$(command -v $(brew --prefix llvm@18)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@18)/bin/llvm-config" elif [ -n "$(command -v $(brew --prefix llvm@17)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@17)/bin/llvm-config" diff --git a/ci/build_linux_static.sh b/ci/build_linux_static.sh index f821cbb59..2eb99116e 100755 --- a/ci/build_linux_static.sh +++ b/ci/build_linux_static.sh @@ -1,8 +1,8 @@ #!/usr/bin/env sh # Intended for use in Alpine containers, see the "nightly" Github action for a list of dependencies -CXX="clang++-18" -LLVM_CONFIG="llvm-config-18" +CXX="clang++-20" +LLVM_CONFIG="llvm-config-20" DISABLED_WARNINGS="-Wno-switch -Wno-macro-redefined -Wno-unused-value" diff --git a/core/crypto/_aes/aes.odin b/core/crypto/_aes/aes.odin index 4f52485d2..f458a12fb 100644 --- a/core/crypto/_aes/aes.odin +++ b/core/crypto/_aes/aes.odin @@ -25,4 +25,5 @@ GHASH_BLOCK_SIZE :: 16 GHASH_TAG_SIZE :: 16 // RCON is the AES keyschedule round constants. +@(rodata) RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36} diff --git a/core/crypto/_aes/ct64/ct64.odin b/core/crypto/_aes/ct64/ct64.odin index f198cab81..af2b42c1e 100644 --- a/core/crypto/_aes/ct64/ct64.odin +++ b/core/crypto/_aes/ct64/ct64.odin @@ -22,8 +22,6 @@ package aes_ct64 -import "base:intrinsics" - // Bitsliced AES for 64-bit general purpose (integer) registers. Each // invocation will process up to 4 blocks at a time. This implementation // is derived from the BearSSL ct64 code, and distributed under a 1-clause @@ -212,11 +210,8 @@ orthogonalize :: proc "contextless" (q: ^[8]u64) { } @(require_results) -interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check { - if len(w) < 4 { - intrinsics.trap() - } - x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3]) +interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check { + x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3) x0 |= (x0 << 16) x1 |= (x1 << 16) x2 |= (x2 << 16) diff --git a/core/crypto/_aes/ct64/ct64_enc.odin b/core/crypto/_aes/ct64/ct64_enc.odin index 36d4aebc8..bee6de722 100644 --- a/core/crypto/_aes/ct64/ct64_enc.odin +++ b/core/crypto/_aes/ct64/ct64_enc.odin @@ -22,12 +22,8 @@ package aes_ct64 -import "base:intrinsics" - add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check { - if len(sk) < 8 { - intrinsics.trap() - } + ensure_contextless(len(sk) >= 8, "aes/ct64: invalid round key size") q[0] ~= sk[0] q[1] ~= sk[1] diff --git a/core/crypto/_aes/ct64/ct64_keysched.odin b/core/crypto/_aes/ct64/ct64_keysched.odin index 060a2c03e..0f00bba57 100644 --- a/core/crypto/_aes/ct64/ct64_keysched.odin +++ b/core/crypto/_aes/ct64/ct64_keysched.odin @@ -22,7 +22,6 @@ package aes_ct64 -import "base:intrinsics" import "core:crypto/_aes" import "core:encoding/endian" import "core:mem" @@ -42,7 +41,7 @@ sub_word :: proc "contextless" (x: u32) -> u32 { } @(private, require_results) -keysched :: proc(comp_skey: []u64, key: []byte) -> int { +keysched :: proc "contextless" (comp_skey: []u64, key: []byte) -> int { num_rounds, key_len := 0, len(key) switch key_len { case _aes.KEY_SIZE_128: @@ -52,7 +51,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int { case _aes.KEY_SIZE_256: num_rounds = _aes.ROUNDS_256 case: - panic("crypto/aes: invalid AES key size") + panic_contextless("crypto/aes: invalid AES key size") } skey: [60]u32 = --- @@ -78,7 +77,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int { q: [8]u64 = --- for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 { - q[0], q[4] = interleave_in(skey[i:]) + q[0], q[4] = interleave_in(skey[i], skey[i+1], skey[i+2], skey[i+3]) q[1] = q[0] q[2] = q[0] q[3] = q[0] @@ -123,57 +122,3 @@ skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) { skey[v + 3] = (x3 << 4) - x3 } } - -orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) { - if len(qq) < 8 || len(key) != 16 { - intrinsics.trap() - } - - skey: [4]u32 = --- - skey[0] = endian.unchecked_get_u32le(key[0:]) - skey[1] = endian.unchecked_get_u32le(key[4:]) - skey[2] = endian.unchecked_get_u32le(key[8:]) - skey[3] = endian.unchecked_get_u32le(key[12:]) - - q: [8]u64 = --- - q[0], q[4] = interleave_in(skey[:]) - q[1] = q[0] - q[2] = q[0] - q[3] = q[0] - q[5] = q[4] - q[6] = q[4] - q[7] = q[4] - orthogonalize(&q) - - comp_skey: [2]u64 = --- - comp_skey[0] = - (q[0] & 0x1111111111111111) | - (q[1] & 0x2222222222222222) | - (q[2] & 0x4444444444444444) | - (q[3] & 0x8888888888888888) - comp_skey[1] = - (q[4] & 0x1111111111111111) | - (q[5] & 0x2222222222222222) | - (q[6] & 0x4444444444444444) | - (q[7] & 0x8888888888888888) - - for x, u in comp_skey { - x0 := x - x1, x2, x3 := x0, x0, x0 - x0 &= 0x1111111111111111 - x1 &= 0x2222222222222222 - x2 &= 0x4444444444444444 - x3 &= 0x8888888888888888 - x1 >>= 1 - x2 >>= 2 - x3 >>= 3 - qq[u * 4 + 0] = (x0 << 4) - x0 - qq[u * 4 + 1] = (x1 << 4) - x1 - qq[u * 4 + 2] = (x2 << 4) - x2 - qq[u * 4 + 3] = (x3 << 4) - x3 - } - - mem.zero_explicit(&skey, size_of(skey)) - mem.zero_explicit(&q, size_of(q)) - mem.zero_explicit(&comp_skey, size_of(comp_skey)) -} diff --git a/core/crypto/_aes/ct64/ghash.odin b/core/crypto/_aes/ct64/ghash.odin index a522a481a..0c885d8ba 100644 --- a/core/crypto/_aes/ct64/ghash.odin +++ b/core/crypto/_aes/ct64/ghash.odin @@ -22,7 +22,6 @@ package aes_ct64 -import "base:intrinsics" import "core:crypto/_aes" import "core:encoding/endian" @@ -64,9 +63,8 @@ rev64 :: proc "contextless" (x: u64) -> u64 { // Note: `dst` is both an input and an output, to support easy implementation // of GCM. ghash :: proc "contextless" (dst, key, data: []byte) { - if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE { - intrinsics.trap() - } + ensure_contextless(len(dst) == _aes.GHASH_BLOCK_SIZE) + ensure_contextless(len(key) == _aes.GHASH_BLOCK_SIZE) buf := data l := len(buf) diff --git a/core/crypto/_aes/ct64/helpers.odin b/core/crypto/_aes/ct64/helpers.odin index 169271f6d..7eec5bdc4 100644 --- a/core/crypto/_aes/ct64/helpers.odin +++ b/core/crypto/_aes/ct64/helpers.odin @@ -1,60 +1,61 @@ package aes_ct64 -import "base:intrinsics" import "core:crypto/_aes" import "core:encoding/endian" -load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) { - if len(src) != _aes.BLOCK_SIZE { - intrinsics.trap() - } - - w: [4]u32 = --- - w[0] = endian.unchecked_get_u32le(src[0:]) - w[1] = endian.unchecked_get_u32le(src[4:]) - w[2] = endian.unchecked_get_u32le(src[8:]) - w[3] = endian.unchecked_get_u32le(src[12:]) - q[0], q[4] = interleave_in(w[:]) - orthogonalize(q) +@(require_results) +load_interleaved :: proc "contextless" (src: []byte) -> (u64, u64) #no_bounds_check { + w0 := endian.unchecked_get_u32le(src[0:]) + w1 := endian.unchecked_get_u32le(src[4:]) + w2 := endian.unchecked_get_u32le(src[8:]) + w3 := endian.unchecked_get_u32le(src[12:]) + return interleave_in(w0, w1, w2, w3) } -store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) { - if len(dst) != _aes.BLOCK_SIZE { - intrinsics.trap() - } - - orthogonalize(q) - w0, w1, w2, w3 := interleave_out(q[0], q[4]) +store_interleaved :: proc "contextless" (dst: []byte, a0, a1: u64) #no_bounds_check { + w0, w1, w2, w3 := interleave_out(a0, a1) endian.unchecked_put_u32le(dst[0:], w0) endian.unchecked_put_u32le(dst[4:], w1) endian.unchecked_put_u32le(dst[8:], w2) endian.unchecked_put_u32le(dst[12:], w3) } +@(require_results) +xor_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) { + return a0 ~ b0, a1 ~ b1 +} + +@(require_results) +and_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) { + return a0 & b0, a1 & b1 +} + +load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) { + ensure_contextless(len(src) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size") + + q[0], q[4] = #force_inline load_interleaved(src) + orthogonalize(q) +} + +store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) { + ensure_contextless(len(dst) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size") + + orthogonalize(q) + #force_inline store_interleaved(dst, q[0], q[4]) +} + load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) { - if n := len(src); n > STRIDE || n == 0 { - intrinsics.trap() - } + ensure_contextless(len(src) == 0 || len(src) <= STRIDE, "aes/ct64: invalid block(s) size") - w: [4]u32 = --- for s, i in src { - if len(s) != _aes.BLOCK_SIZE { - intrinsics.trap() - } - - w[0] = endian.unchecked_get_u32le(s[0:]) - w[1] = endian.unchecked_get_u32le(s[4:]) - w[2] = endian.unchecked_get_u32le(s[8:]) - w[3] = endian.unchecked_get_u32le(s[12:]) - q[i], q[i + 4] = interleave_in(w[:]) + ensure_contextless(len(s) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size") + q[i], q[i + 4] = #force_inline load_interleaved(s) } orthogonalize(q) } store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) { - if n := len(dst); n > STRIDE || n == 0 { - intrinsics.trap() - } + ensure_contextless(len(dst) == 0 || len(dst) <= STRIDE, "aes/ct64: invalid block(s) size") orthogonalize(q) for d, i in dst { @@ -62,14 +63,7 @@ store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) { if d == nil { break } - if len(d) != _aes.BLOCK_SIZE { - intrinsics.trap() - } - - w0, w1, w2, w3 := interleave_out(q[i], q[i + 4]) - endian.unchecked_put_u32le(d[0:], w0) - endian.unchecked_put_u32le(d[4:], w1) - endian.unchecked_put_u32le(d[8:], w2) - endian.unchecked_put_u32le(d[12:], w3) + ensure_contextless(len(d) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size") + #force_inline store_interleaved(d, q[i], q[i + 4]) } } diff --git a/core/crypto/_aes/hw_intel/ghash.odin b/core/crypto/_aes/hw_intel/ghash.odin index 4320dd59b..5f51b614b 100644 --- a/core/crypto/_aes/hw_intel/ghash.odin +++ b/core/crypto/_aes/hw_intel/ghash.odin @@ -52,7 +52,7 @@ GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE // that it is right-shifted by 1 bit. The left-shift is relatively // inexpensive, and it can be mutualised. // -// Since SSE2 opcodes do not have facilities for shitfting full 128-bit +// Since SSE2 opcodes do not have facilities for shifting full 128-bit // values with bit precision, we have to break down values into 64-bit // chunks. We number chunks from 0 to 3 in left to right order. @@ -155,7 +155,7 @@ square_f128 :: #force_inline proc "contextless" (kw: x86.__m128i) -> (x86.__m128 @(enable_target_feature = "sse2,ssse3,pclmul") ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check { if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE { - intrinsics.trap() + panic_contextless("aes/ghash: invalid dst or key size") } // Note: BearSSL opts to copy the remainder into a zero-filled diff --git a/core/crypto/_blake2/blake2.odin b/core/crypto/_blake2/blake2.odin index 2ad74843b..89fbe3a7a 100644 --- a/core/crypto/_blake2/blake2.odin +++ b/core/crypto/_blake2/blake2.odin @@ -18,6 +18,8 @@ BLAKE2S_SIZE :: 32 BLAKE2B_BLOCK_SIZE :: 128 BLAKE2B_SIZE :: 64 +MAX_SIZE :: 255 + Blake2s_Context :: struct { h: [8]u32, t: [2]u32, @@ -68,13 +70,13 @@ Blake2_Tree :: struct { is_last_node: bool, } -@(private) +@(private, rodata) BLAKE2S_IV := [8]u32 { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, } -@(private) +@(private, rodata) BLAKE2B_IV := [8]u64 { 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, @@ -82,16 +84,13 @@ BLAKE2B_IV := [8]u64 { 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, } -init :: proc(ctx: ^$T, cfg: ^Blake2_Config) { +init :: proc "contextless" (ctx: ^$T, cfg: ^Blake2_Config) { when T == Blake2s_Context { max_size :: BLAKE2S_SIZE } else when T == Blake2b_Context { max_size :: BLAKE2B_SIZE } - - if cfg.size > max_size { - panic("blake2: requested output size exceeeds algorithm max") - } + ensure_contextless(cfg.size <= max_size, "blake2: requested output size exceeeds algorithm max") // To save having to allocate a scratch buffer, use the internal // data buffer (`ctx.x`), as it is exactly the correct size. @@ -167,8 +166,8 @@ init :: proc(ctx: ^$T, cfg: ^Blake2_Config) { ctx.is_initialized = true } -update :: proc(ctx: ^$T, p: []byte) { - assert(ctx.is_initialized) +update :: proc "contextless" (ctx: ^$T, p: []byte) { + ensure_contextless(ctx.is_initialized) p := p when T == Blake2s_Context { @@ -195,8 +194,8 @@ update :: proc(ctx: ^$T, p: []byte) { ctx.nx += copy(ctx.x[ctx.nx:], p) } -final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) { - assert(ctx.is_initialized) +final :: proc "contextless" (ctx: ^$T, hash: []byte, finalize_clone: bool = false) { + ensure_contextless(ctx.is_initialized) ctx := ctx if finalize_clone { @@ -206,24 +205,19 @@ final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) { } defer(reset(ctx)) + ensure_contextless(len(hash) >= int(ctx.size), "crypto/blake2: invalid destination digest size") when T == Blake2s_Context { - if len(hash) < int(ctx.size) { - panic("crypto/blake2s: invalid destination digest size") - } blake2s_final(ctx, hash) } else when T == Blake2b_Context { - if len(hash) < int(ctx.size) { - panic("crypto/blake2b: invalid destination digest size") - } blake2b_final(ctx, hash) } } -clone :: proc(ctx, other: ^$T) { +clone :: proc "contextless" (ctx, other: ^$T) { ctx^ = other^ } -reset :: proc(ctx: ^$T) { +reset :: proc "contextless" (ctx: ^$T) { if !ctx.is_initialized { return } diff --git a/core/crypto/_chacha20/chacha20.odin b/core/crypto/_chacha20/chacha20.odin index a907209de..1a4b5a507 100644 --- a/core/crypto/_chacha20/chacha20.odin +++ b/core/crypto/_chacha20/chacha20.odin @@ -1,6 +1,5 @@ package _chacha20 -import "base:intrinsics" import "core:encoding/endian" import "core:math/bits" import "core:mem" @@ -46,9 +45,8 @@ Context :: struct { // derivation is expected to be handled by the caller, so that the // HChaCha call can be suitably accelerated. init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) { - if len(key) != KEY_SIZE || len(iv) != IV_SIZE { - intrinsics.trap() - } + ensure_contextless(len(key) == KEY_SIZE, "chacha20: invalid key size") + ensure_contextless(len(iv) == IV_SIZE, "chacha20: invalid key size") k, n := key, iv @@ -76,12 +74,10 @@ init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) { // seek seeks the (X)ChaCha20 stream counter to the specified block. seek :: proc(ctx: ^Context, block_nr: u64) { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) if ctx._is_ietf_flavor { - if block_nr > MAX_CTR_IETF { - panic("crypto/chacha20: attempted to seek past maximum counter") - } + ensure(block_nr <= MAX_CTR_IETF, "crypto/chacha20: attempted to seek past maximum counter") } else { ctx._s[13] = u32(block_nr >> 32) } @@ -102,7 +98,7 @@ check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) { // Enforce the maximum consumed keystream per IV. // // While all modern "standard" definitions of ChaCha20 use - // the IETF 32-bit counter, for XChaCha20 most common + // the IETF 32-bit counter, for XChaCha20 historical // implementations allow for a 64-bit counter. // // Honestly, the answer here is "use a MRAE primitive", but @@ -110,14 +106,14 @@ check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) { ERR_CTR_EXHAUSTED :: "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached" + ctr_ok: bool if ctx._is_ietf_flavor { - if u64(ctx._s[12]) + u64(nr_blocks) > MAX_CTR_IETF { - panic(ERR_CTR_EXHAUSTED) - } + ctr_ok = u64(ctx._s[12]) + u64(nr_blocks) <= MAX_CTR_IETF } else { ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12]) - if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 { - panic(ERR_CTR_EXHAUSTED) - } + _, carry := bits.add_u64(ctr, u64(nr_blocks), 0) + ctr_ok = carry == 0 } + + ensure(ctr_ok, "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached") } diff --git a/core/crypto/_chacha20/simd128/chacha20_simd128.odin b/core/crypto/_chacha20/simd128/chacha20_simd128.odin index fe0d0d518..cf78541d1 100644 --- a/core/crypto/_chacha20/simd128/chacha20_simd128.odin +++ b/core/crypto/_chacha20/simd128/chacha20_simd128.odin @@ -29,11 +29,24 @@ when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 { // explicitly using simd.u8x16 shuffles. @(private = "file") TARGET_SIMD_FEATURES :: "sse2,ssse3" +} else when ODIN_ARCH == .riscv64 { + @(private = "file") + TARGET_SIMD_FEATURES :: "v" } else { @(private = "file") TARGET_SIMD_FEATURES :: "" } +// Some targets lack runtime feature detection, and will flat out refuse +// to load binaries that have unknown instructions. This is distinct from +// `simd.IS_EMULATED` as actually good designs support runtime feature +// detection and that constant establishes a baseline. +// +// See: +// - https://github.com/WebAssembly/design/issues/1161 +@(private = "file") +TARGET_IS_DESIGNED_BY_IDIOTS :: (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128") + @(private = "file") _ROT_7L: simd.u32x4 : {7, 7, 7, 7} @(private = "file") @@ -205,11 +218,13 @@ _store_simd128 :: #force_inline proc "contextless" ( // is_performant returns true iff the target and current host both support // "enough" 128-bit SIMD to make this implementation performant. is_performant :: proc "contextless" () -> bool { - when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 || ODIN_ARCH == .riscv64 { when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 { req_features :: info.CPU_Features{.asimd} } else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 { req_features :: info.CPU_Features{.sse2, .ssse3} + } else when ODIN_ARCH == .riscv64 { + req_features :: info.CPU_Features{.V} } features, ok := info.cpu_features.? @@ -245,8 +260,17 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) // 8 blocks at a time. // - // Note: This is only worth it on Aarch64. - when ODIN_ARCH == .arm64 { + // Note: + // This uses a ton of registers so it is only worth it on targets + // that have something like 32 128-bit registers. This is currently + // all ARMv8 targets, and RISC-V Zvl128b (`V` application profile) + // targets. + // + // While our current definition of `.arm32` is 32-bit ARMv8, this + // may change in the future (ARMv7 is still relevant), and things + // like Cortex-A8/A9 does "pretend" 128-bit SIMD 64-bits at a time + // thus needs bemchmarking. + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { for ; n >= 8; n = n - 8 { v0, v1, v2, v3 := s0, s1, s2, s3 @@ -354,9 +378,11 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) // 4 blocks at a time. // - // Note: The i386 target lacks the required number of registers - // for this to be performant, so it is skipped. - when ODIN_ARCH != .i386 { + // Note: This is skipped on several targets for various reasons. + // - i386 lacks the required number of registers + // - Generating code when runtime "hardware" SIMD support is impossible + // to detect is pointless, since this will be emulated using GP regs. + when ODIN_ARCH != .i386 && !TARGET_IS_DESIGNED_BY_IDIOTS { for ; n >= 4; n = n - 4 { v0, v1, v2, v3 := s0, s1, s2, s3 diff --git a/core/crypto/_chacha20/simd256/chacha20_simd256_stub.odin b/core/crypto/_chacha20/simd256/chacha20_simd256_stub.odin index ce673b42b..287ddd885 100644 --- a/core/crypto/_chacha20/simd256/chacha20_simd256_stub.odin +++ b/core/crypto/_chacha20/simd256/chacha20_simd256_stub.odin @@ -13,5 +13,5 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) } hchacha20 :: proc "contextless" (dst, key, iv: []byte) { - intrinsics.trap() + panic_contextless("crypto/chacha20: simd256 implementation unsupported") } \ No newline at end of file diff --git a/core/crypto/_edwards25519/edwards25519.odin b/core/crypto/_edwards25519/edwards25519.odin index 6495f7a3a..d6f01d497 100644 --- a/core/crypto/_edwards25519/edwards25519.odin +++ b/core/crypto/_edwards25519/edwards25519.odin @@ -11,7 +11,6 @@ See: - https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html */ -import "base:intrinsics" import "core:crypto" import field "core:crypto/_fiat/field_curve25519" import "core:mem" @@ -32,6 +31,7 @@ import "core:mem" // - The group element decoding routine takes the opinionated stance of // rejecting non-canonical encodings. +@(rodata) FE_D := field.Tight_Field_Element { 929955233495203, 466365720129213, @@ -39,7 +39,7 @@ FE_D := field.Tight_Field_Element { 2033849074728123, 1442794654840575, } -@(private) +@(private, rodata) FE_A := field.Tight_Field_Element { 2251799813685228, 2251799813685247, @@ -47,7 +47,7 @@ FE_A := field.Tight_Field_Element { 2251799813685247, 2251799813685247, } -@(private) +@(private, rodata) FE_D2 := field.Tight_Field_Element { 1859910466990425, 932731440258426, @@ -55,7 +55,7 @@ FE_D2 := field.Tight_Field_Element { 1815898335770999, 633789495995903, } -@(private) +@(private, rodata) GE_BASEPOINT := Group_Element { field.Tight_Field_Element { 1738742601995546, @@ -80,6 +80,7 @@ GE_BASEPOINT := Group_Element { 1821297809914039, }, } +@(rodata) GE_IDENTITY := Group_Element { field.Tight_Field_Element{0, 0, 0, 0, 0}, field.Tight_Field_Element{1, 0, 0, 0, 0}, @@ -107,9 +108,7 @@ ge_set :: proc "contextless" (ge, a: ^Group_Element) { @(require_results) ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool { - if len(b) != 32 { - intrinsics.trap() - } + ensure_contextless(len(b) == 32, "edwards25519: invalid group element size") b_ := (^[32]byte)(raw_data(b)) // Do the work in a scratch element, so that ge is unchanged on @@ -166,9 +165,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool { } ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) { - if len(dst) != 32 { - intrinsics.trap() - } + ensure_contextless(len(dst) == 32, "edwards25519: invalid group element size") dst_ := (^[32]byte)(raw_data(dst)) // Convert the element to affine (x, y) representation. diff --git a/core/crypto/_edwards25519/edwards25519_scalar.odin b/core/crypto/_edwards25519/edwards25519_scalar.odin index e21fa3755..68c79a6e8 100644 --- a/core/crypto/_edwards25519/edwards25519_scalar.odin +++ b/core/crypto/_edwards25519/edwards25519_scalar.odin @@ -1,6 +1,5 @@ package _edwards25519 -import "base:intrinsics" import field "core:crypto/_fiat/field_scalar25519" import "core:mem" @@ -8,7 +7,7 @@ Scalar :: field.Montgomery_Domain_Field_Element // WARNING: This is non-canonical and only to be used when checking if // a group element is on the prime-order subgroup. -@(private) +@(private, rodata) SC_ELL := field.Non_Montgomery_Domain_Field_Element { field.ELL[0], field.ELL[1], @@ -25,17 +24,13 @@ sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) { @(require_results) sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool { - if len(b) != 32 { - intrinsics.trap() - } + ensure_contextless(len(b) == 32, "edwards25519: invalid scalar size") b_ := (^[32]byte)(raw_data(b)) return field.fe_from_bytes(sc, b_) } sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) { - if len(b) != 32 { - intrinsics.trap() - } + ensure_contextless(len(b) == 32, "edwards25519: invalid scalar size") b_ := (^[32]byte)(raw_data(b)) field.fe_from_bytes_rfc8032(sc, b_) } diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin index d039bd411..6716fa158 100644 --- a/core/crypto/_fiat/field_curve25519/field51.odin +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -42,9 +42,12 @@ import "core:math/bits" Loose_Field_Element :: distinct [5]u64 Tight_Field_Element :: distinct [5]u64 +@(rodata) FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0} +@(rodata) FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0} +@(rodata) FE_SQRT_M1 := Tight_Field_Element { 1718705420411056, 234908883556509, diff --git a/core/crypto/_fiat/field_curve448/field.odin b/core/crypto/_fiat/field_curve448/field.odin new file mode 100644 index 000000000..540d88f28 --- /dev/null +++ b/core/crypto/_fiat/field_curve448/field.odin @@ -0,0 +1,235 @@ +package field_curve448 + +import "core:mem" + +fe_relax_cast :: #force_inline proc "contextless" ( + arg1: ^Tight_Field_Element, +) -> ^Loose_Field_Element { + return (^Loose_Field_Element)(arg1) +} + +fe_tighten_cast :: #force_inline proc "contextless" ( + arg1: ^Loose_Field_Element, +) -> ^Tight_Field_Element { + return (^Tight_Field_Element)(arg1) +} + +fe_clear :: proc "contextless" ( + arg1: $T, +) where T == ^Tight_Field_Element || T == ^Loose_Field_Element { + mem.zero_explicit(arg1, size_of(arg1^)) +} + +fe_clear_vec :: proc "contextless" ( + arg1: $T, +) where T == []^Tight_Field_Element || T == []^Loose_Field_Element { + for fe in arg1 { + fe_clear(fe) + } +} + +fe_carry_mul_small :: proc "contextless" ( + out1: ^Tight_Field_Element, + arg1: ^Loose_Field_Element, + arg2: u64, +) { + arg2_ := Loose_Field_Element{arg2, 0, 0, 0, 0, 0, 0, 0} + fe_carry_mul(out1, arg1, &arg2_) +} + +fe_carry_pow2k :: proc "contextless" ( + out1: ^Tight_Field_Element, + arg1: ^Loose_Field_Element, + arg2: uint, +) { + // Special case: `arg1^(2 * 0) = 1`, though this should never happen. + if arg2 == 0 { + fe_one(out1) + return + } + + fe_carry_square(out1, arg1) + for _ in 1 ..< arg2 { + fe_carry_square(out1, fe_relax_cast(out1)) + } +} + +fe_carry_inv :: proc "contextless" ( + out1: ^Tight_Field_Element, + arg1: ^Loose_Field_Element, +) { + // Inversion computation is derived from the addition chain: + // + // _10 = 2*1 + // _11 = 1 + _10 + // _110 = 2*_11 + // _111 = 1 + _110 + // _111000 = _111 << 3 + // _111111 = _111 + _111000 + // x12 = _111111 << 6 + _111111 + // x24 = x12 << 12 + x12 + // i34 = x24 << 6 + // x30 = _111111 + i34 + // x48 = i34 << 18 + x24 + // x96 = x48 << 48 + x48 + // x192 = x96 << 96 + x96 + // x222 = x192 << 30 + x30 + // x223 = 2*x222 + 1 + // return (x223 << 223 + x222) << 2 + 1 + // + // Operations: 447 squares 13 multiplies + // + // Generated by github.com/mmcloughlin/addchain v0.4.0. + + t0, t1, t2: Tight_Field_Element = ---, ---, --- + + // Step 1: t0 = x^0x2 + fe_carry_square(&t0, arg1) + + // Step 2: t0 = x^0x3 + fe_carry_mul(&t0, arg1, fe_relax_cast(&t0)) + + // t0.Sqr(t0) + fe_carry_square(&t0, fe_relax_cast(&t0)) + + // Step 4: t0 = x^0x7 + fe_carry_mul(&t0, arg1, fe_relax_cast(&t0)) + + // Step 7: t1 = x^0x38 + fe_carry_pow2k(&t1, fe_relax_cast(&t0), 3) + + // Step 8: t0 = x^0x3f + fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1)) + + // Step 14: t1 = x^0xfc0 + fe_carry_pow2k(&t1, fe_relax_cast(&t0), 6) + + // Step 15: t1 = x^0xfff + fe_carry_mul(&t1, fe_relax_cast(&t0), fe_relax_cast(&t1)) + + // Step 27: t2 = x^0xfff000 + fe_carry_pow2k(&t2, fe_relax_cast(&t1), 12) + + // Step 28: t1 = x^0xffffff + fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2)) + + // Step 34: t2 = x^0x3fffffc0 + fe_carry_pow2k(&t2, fe_relax_cast(&t1), 6) + + // Step 35: t0 = x^0x3fffffff + fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t2)) + + // Step 53: t2 = x^0xffffff000000 + fe_carry_pow2k(&t2, fe_relax_cast(&t2), 18) + + // Step 54: t1 = x^0xffffffffffff + fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2)) + + // Step 102: t2 = x^0xffffffffffff000000000000 + fe_carry_pow2k(&t2, fe_relax_cast(&t1), 48) + + // Step 103: t1 = x^0xffffffffffffffffffffffff + fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2)) + + // Step 199: t2 = x^0xffffffffffffffffffffffff000000000000000000000000 + fe_carry_pow2k(&t2, fe_relax_cast(&t1), 96) + + // Step 200: t1 = x^0xffffffffffffffffffffffffffffffffffffffffffffffff + fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2)) + + // Step 230: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffc0000000 + fe_carry_pow2k(&t1, fe_relax_cast(&t1), 30) + + // Step 231: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff + fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1)) + + // Step 232: t1 = x^0x7ffffffffffffffffffffffffffffffffffffffffffffffffffffffe + fe_carry_square(&t1, fe_relax_cast(&t0)) + + // Step 233: t1 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff + fe_carry_mul(&t1, arg1, fe_relax_cast(&t1)) + + // Step 456: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000000000000000000000000000000000000000000000000000 + fe_carry_pow2k(&t1, fe_relax_cast(&t1), 223) + + // Step 457: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff + fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1)) + + // Step 459: t0 = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffc + fe_carry_pow2k(&t0, fe_relax_cast(&t0), 2) + + // Step 460: z = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd + fe_carry_mul(out1, arg1, fe_relax_cast(&t0)) + + fe_clear_vec([]^Tight_Field_Element{&t0, &t1, &t2}) +} + +fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) { + out1[0] = 0 + out1[1] = 0 + out1[2] = 0 + out1[3] = 0 + out1[4] = 0 + out1[5] = 0 + out1[6] = 0 + out1[7] = 0 +} + +fe_one :: proc "contextless" (out1: ^Tight_Field_Element) { + out1[0] = 1 + out1[1] = 0 + out1[2] = 0 + out1[3] = 0 + out1[4] = 0 + out1[5] = 0 + out1[6] = 0 + out1[7] = 0 +} + +fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) { + x1 := arg1[0] + x2 := arg1[1] + x3 := arg1[2] + x4 := arg1[3] + x5 := arg1[4] + x6 := arg1[5] + x7 := arg1[6] + x8 := arg1[7] + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 +} + +@(optimization_mode = "none") +fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) { + mask := (u64(arg1) * 0xffffffffffffffff) + x := (out1[0] ~ out2[0]) & mask + x1, y1 := out1[0] ~ x, out2[0] ~ x + x = (out1[1] ~ out2[1]) & mask + x2, y2 := out1[1] ~ x, out2[1] ~ x + x = (out1[2] ~ out2[2]) & mask + x3, y3 := out1[2] ~ x, out2[2] ~ x + x = (out1[3] ~ out2[3]) & mask + x4, y4 := out1[3] ~ x, out2[3] ~ x + x = (out1[4] ~ out2[4]) & mask + x5, y5 := out1[4] ~ x, out2[4] ~ x + x = (out1[5] ~ out2[5]) & mask + x6, y6 := out1[5] ~ x, out2[5] ~ x + x = (out1[6] ~ out2[6]) & mask + x7, y7 := out1[6] ~ x, out2[6] ~ x + x = (out1[7] ~ out2[7]) & mask + x8, y8 := out1[7] ~ x, out2[7] ~ x + out1[0], out2[0] = x1, y1 + out1[1], out2[1] = x2, y2 + out1[2], out2[2] = x3, y3 + out1[3], out2[3] = x4, y4 + out1[4], out2[4] = x5, y5 + out1[5], out2[5] = x6, y6 + out1[6], out2[6] = x7, y7 + out1[7], out2[7] = x8, y8 +} \ No newline at end of file diff --git a/core/crypto/_fiat/field_curve448/field51.odin b/core/crypto/_fiat/field_curve448/field51.odin new file mode 100644 index 000000000..d8e49e04d --- /dev/null +++ b/core/crypto/_fiat/field_curve448/field51.odin @@ -0,0 +1,1060 @@ +// The BSD 1-Clause License (BSD-1-Clause) +// +// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file) +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design, +// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package field_curve448 + +// The file provides arithmetic on the field Z/(2^448 - 2^224 - 1) using +// unsaturated 64-bit integer arithmetic. It is derived primarily +// from the machine generated Golang output from the fiat-crypto project. +// +// While the base implementation is provably correct, this implementation +// makes no such claims as the port and optimizations were done by hand. +// +// TODO: +// * When fiat-crypto supports it, using a saturated 64-bit limbs +// instead of 56-bit limbs will be faster, though the gains are +// minimal unless adcx/adox/mulx are used. + +import fiat "core:crypto/_fiat" +import "core:math/bits" + +Loose_Field_Element :: distinct [8]u64 +Tight_Field_Element :: distinct [8]u64 + +@(rodata) +FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0, 0, 0, 0} +@(rodata) +FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0, 0, 0, 0} + +_addcarryx_u56 :: #force_inline proc "contextless" ( + arg1: fiat.u1, + arg2, arg3: u64, +) -> ( + out1: u64, + out2: fiat.u1, +) { + x1 := ((u64(arg1) + arg2) + arg3) + x2 := (x1 & 0xffffffffffffff) + x3 := fiat.u1((x1 >> 56)) + out1 = x2 + out2 = x3 + return +} + +_subborrowx_u56 :: #force_inline proc "contextless" ( + arg1: fiat.u1, + arg2, arg3: u64, +) -> ( + out1: u64, + out2: fiat.u1, +) { + x1 := ((i64(arg2) - i64(arg1)) - i64(arg3)) + x2 := fiat.u1((x1 >> 56)) + x3 := (u64(x1) & 0xffffffffffffff) + out1 = x3 + out2 = (0x0 - fiat.u1(x2)) + return +} + +fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { + x2, x1 := bits.mul_u64(arg1[7], arg2[7]) + x4, x3 := bits.mul_u64(arg1[7], arg2[6]) + x6, x5 := bits.mul_u64(arg1[7], arg2[5]) + x8, x7 := bits.mul_u64(arg1[6], arg2[7]) + x10, x9 := bits.mul_u64(arg1[6], arg2[6]) + x12, x11 := bits.mul_u64(arg1[5], arg2[7]) + x14, x13 := bits.mul_u64(arg1[7], arg2[7]) + x16, x15 := bits.mul_u64(arg1[7], arg2[6]) + x18, x17 := bits.mul_u64(arg1[7], arg2[5]) + x20, x19 := bits.mul_u64(arg1[6], arg2[7]) + x22, x21 := bits.mul_u64(arg1[6], arg2[6]) + x24, x23 := bits.mul_u64(arg1[5], arg2[7]) + x26, x25 := bits.mul_u64(arg1[7], arg2[7]) + x28, x27 := bits.mul_u64(arg1[7], arg2[6]) + x30, x29 := bits.mul_u64(arg1[7], arg2[5]) + x32, x31 := bits.mul_u64(arg1[7], arg2[4]) + x34, x33 := bits.mul_u64(arg1[7], arg2[3]) + x36, x35 := bits.mul_u64(arg1[7], arg2[2]) + x38, x37 := bits.mul_u64(arg1[7], arg2[1]) + x40, x39 := bits.mul_u64(arg1[6], arg2[7]) + x42, x41 := bits.mul_u64(arg1[6], arg2[6]) + x44, x43 := bits.mul_u64(arg1[6], arg2[5]) + x46, x45 := bits.mul_u64(arg1[6], arg2[4]) + x48, x47 := bits.mul_u64(arg1[6], arg2[3]) + x50, x49 := bits.mul_u64(arg1[6], arg2[2]) + x52, x51 := bits.mul_u64(arg1[5], arg2[7]) + x54, x53 := bits.mul_u64(arg1[5], arg2[6]) + x56, x55 := bits.mul_u64(arg1[5], arg2[5]) + x58, x57 := bits.mul_u64(arg1[5], arg2[4]) + x60, x59 := bits.mul_u64(arg1[5], arg2[3]) + x62, x61 := bits.mul_u64(arg1[4], arg2[7]) + x64, x63 := bits.mul_u64(arg1[4], arg2[6]) + x66, x65 := bits.mul_u64(arg1[4], arg2[5]) + x68, x67 := bits.mul_u64(arg1[4], arg2[4]) + x70, x69 := bits.mul_u64(arg1[3], arg2[7]) + x72, x71 := bits.mul_u64(arg1[3], arg2[6]) + x74, x73 := bits.mul_u64(arg1[3], arg2[5]) + x76, x75 := bits.mul_u64(arg1[2], arg2[7]) + x78, x77 := bits.mul_u64(arg1[2], arg2[6]) + x80, x79 := bits.mul_u64(arg1[1], arg2[7]) + x82, x81 := bits.mul_u64(arg1[7], arg2[4]) + x84, x83 := bits.mul_u64(arg1[7], arg2[3]) + x86, x85 := bits.mul_u64(arg1[7], arg2[2]) + x88, x87 := bits.mul_u64(arg1[7], arg2[1]) + x90, x89 := bits.mul_u64(arg1[6], arg2[5]) + x92, x91 := bits.mul_u64(arg1[6], arg2[4]) + x94, x93 := bits.mul_u64(arg1[6], arg2[3]) + x96, x95 := bits.mul_u64(arg1[6], arg2[2]) + x98, x97 := bits.mul_u64(arg1[5], arg2[6]) + x100, x99 := bits.mul_u64(arg1[5], arg2[5]) + x102, x101 := bits.mul_u64(arg1[5], arg2[4]) + x104, x103 := bits.mul_u64(arg1[5], arg2[3]) + x106, x105 := bits.mul_u64(arg1[4], arg2[7]) + x108, x107 := bits.mul_u64(arg1[4], arg2[6]) + x110, x109 := bits.mul_u64(arg1[4], arg2[5]) + x112, x111 := bits.mul_u64(arg1[4], arg2[4]) + x114, x113 := bits.mul_u64(arg1[3], arg2[7]) + x116, x115 := bits.mul_u64(arg1[3], arg2[6]) + x118, x117 := bits.mul_u64(arg1[3], arg2[5]) + x120, x119 := bits.mul_u64(arg1[2], arg2[7]) + x122, x121 := bits.mul_u64(arg1[2], arg2[6]) + x124, x123 := bits.mul_u64(arg1[1], arg2[7]) + x126, x125 := bits.mul_u64(arg1[7], arg2[0]) + x128, x127 := bits.mul_u64(arg1[6], arg2[1]) + x130, x129 := bits.mul_u64(arg1[6], arg2[0]) + x132, x131 := bits.mul_u64(arg1[5], arg2[2]) + x134, x133 := bits.mul_u64(arg1[5], arg2[1]) + x136, x135 := bits.mul_u64(arg1[5], arg2[0]) + x138, x137 := bits.mul_u64(arg1[4], arg2[3]) + x140, x139 := bits.mul_u64(arg1[4], arg2[2]) + x142, x141 := bits.mul_u64(arg1[4], arg2[1]) + x144, x143 := bits.mul_u64(arg1[4], arg2[0]) + x146, x145 := bits.mul_u64(arg1[3], arg2[4]) + x148, x147 := bits.mul_u64(arg1[3], arg2[3]) + x150, x149 := bits.mul_u64(arg1[3], arg2[2]) + x152, x151 := bits.mul_u64(arg1[3], arg2[1]) + x154, x153 := bits.mul_u64(arg1[3], arg2[0]) + x156, x155 := bits.mul_u64(arg1[2], arg2[5]) + x158, x157 := bits.mul_u64(arg1[2], arg2[4]) + x160, x159 := bits.mul_u64(arg1[2], arg2[3]) + x162, x161 := bits.mul_u64(arg1[2], arg2[2]) + x164, x163 := bits.mul_u64(arg1[2], arg2[1]) + x166, x165 := bits.mul_u64(arg1[2], arg2[0]) + x168, x167 := bits.mul_u64(arg1[1], arg2[6]) + x170, x169 := bits.mul_u64(arg1[1], arg2[5]) + x172, x171 := bits.mul_u64(arg1[1], arg2[4]) + x174, x173 := bits.mul_u64(arg1[1], arg2[3]) + x176, x175 := bits.mul_u64(arg1[1], arg2[2]) + x178, x177 := bits.mul_u64(arg1[1], arg2[1]) + x180, x179 := bits.mul_u64(arg1[1], arg2[0]) + x182, x181 := bits.mul_u64(arg1[0], arg2[7]) + x184, x183 := bits.mul_u64(arg1[0], arg2[6]) + x186, x185 := bits.mul_u64(arg1[0], arg2[5]) + x188, x187 := bits.mul_u64(arg1[0], arg2[4]) + x190, x189 := bits.mul_u64(arg1[0], arg2[3]) + x192, x191 := bits.mul_u64(arg1[0], arg2[2]) + x194, x193 := bits.mul_u64(arg1[0], arg2[1]) + x196, x195 := bits.mul_u64(arg1[0], arg2[0]) + x197, x198 := bits.add_u64(x43, x31, u64(0x0)) + x199, _ := bits.add_u64(x44, x32, u64(fiat.u1(x198))) + x201, x202 := bits.add_u64(x53, x197, u64(0x0)) + x203, _ := bits.add_u64(x54, x199, u64(fiat.u1(x202))) + x205, x206 := bits.add_u64(x61, x201, u64(0x0)) + x207, _ := bits.add_u64(x62, x203, u64(fiat.u1(x206))) + x209, x210 := bits.add_u64(x153, x205, u64(0x0)) + x211, _ := bits.add_u64(x154, x207, u64(fiat.u1(x210))) + x213, x214 := bits.add_u64(x163, x209, u64(0x0)) + x215, _ := bits.add_u64(x164, x211, u64(fiat.u1(x214))) + x217, x218 := bits.add_u64(x175, x213, u64(0x0)) + x219, _ := bits.add_u64(x176, x215, u64(fiat.u1(x218))) + x221, x222 := bits.add_u64(x189, x217, u64(0x0)) + x223, _ := bits.add_u64(x190, x219, u64(fiat.u1(x222))) + x225 := ((x221 >> 56) | ((x223 << 8) & 0xffffffffffffffff)) + x226 := (x221 & 0xffffffffffffff) + x227, x228 := bits.add_u64(x89, x81, u64(0x0)) + x229, _ := bits.add_u64(x90, x82, u64(fiat.u1(x228))) + x231, x232 := bits.add_u64(x97, x227, u64(0x0)) + x233, _ := bits.add_u64(x98, x229, u64(fiat.u1(x232))) + x235, x236 := bits.add_u64(x105, x231, u64(0x0)) + x237, _ := bits.add_u64(x106, x233, u64(fiat.u1(x236))) + x239, x240 := bits.add_u64(x125, x235, u64(0x0)) + x241, _ := bits.add_u64(x126, x237, u64(fiat.u1(x240))) + x243, x244 := bits.add_u64(x127, x239, u64(0x0)) + x245, _ := bits.add_u64(x128, x241, u64(fiat.u1(x244))) + x247, x248 := bits.add_u64(x131, x243, u64(0x0)) + x249, _ := bits.add_u64(x132, x245, u64(fiat.u1(x248))) + x251, x252 := bits.add_u64(x137, x247, u64(0x0)) + x253, _ := bits.add_u64(x138, x249, u64(fiat.u1(x252))) + x255, x256 := bits.add_u64(x145, x251, u64(0x0)) + x257, _ := bits.add_u64(x146, x253, u64(fiat.u1(x256))) + x259, x260 := bits.add_u64(x155, x255, u64(0x0)) + x261, _ := bits.add_u64(x156, x257, u64(fiat.u1(x260))) + x263, x264 := bits.add_u64(x167, x259, u64(0x0)) + x265, _ := bits.add_u64(x168, x261, u64(fiat.u1(x264))) + x267, x268 := bits.add_u64(x181, x263, u64(0x0)) + x269, _ := bits.add_u64(x182, x265, u64(fiat.u1(x268))) + x271, x272 := bits.add_u64(x25, x13, u64(0x0)) + x273, _ := bits.add_u64(x26, x14, u64(fiat.u1(x272))) + x275, x276 := bits.add_u64(x83, x271, u64(0x0)) + x277, _ := bits.add_u64(x84, x273, u64(fiat.u1(x276))) + x279, x280 := bits.add_u64(x91, x275, u64(0x0)) + x281, _ := bits.add_u64(x92, x277, u64(fiat.u1(x280))) + x283, x284 := bits.add_u64(x99, x279, u64(0x0)) + x285, _ := bits.add_u64(x100, x281, u64(fiat.u1(x284))) + x287, x288 := bits.add_u64(x107, x283, u64(0x0)) + x289, _ := bits.add_u64(x108, x285, u64(fiat.u1(x288))) + x291, x292 := bits.add_u64(x113, x287, u64(0x0)) + x293, _ := bits.add_u64(x114, x289, u64(fiat.u1(x292))) + x295, x296 := bits.add_u64(x129, x291, u64(0x0)) + x297, _ := bits.add_u64(x130, x293, u64(fiat.u1(x296))) + x299, x300 := bits.add_u64(x133, x295, u64(0x0)) + x301, _ := bits.add_u64(x134, x297, u64(fiat.u1(x300))) + x303, x304 := bits.add_u64(x139, x299, u64(0x0)) + x305, _ := bits.add_u64(x140, x301, u64(fiat.u1(x304))) + x307, x308 := bits.add_u64(x147, x303, u64(0x0)) + x309, _ := bits.add_u64(x148, x305, u64(fiat.u1(x308))) + x311, x312 := bits.add_u64(x157, x307, u64(0x0)) + x313, _ := bits.add_u64(x158, x309, u64(fiat.u1(x312))) + x315, x316 := bits.add_u64(x169, x311, u64(0x0)) + x317, _ := bits.add_u64(x170, x313, u64(fiat.u1(x316))) + x319, x320 := bits.add_u64(x183, x315, u64(0x0)) + x321, _ := bits.add_u64(x184, x317, u64(fiat.u1(x320))) + x323, x324 := bits.add_u64(x19, x15, u64(0x0)) + x325, _ := bits.add_u64(x20, x16, u64(fiat.u1(x324))) + x327, x328 := bits.add_u64(x27, x323, u64(0x0)) + x329, _ := bits.add_u64(x28, x325, u64(fiat.u1(x328))) + x331, x332 := bits.add_u64(x39, x327, u64(0x0)) + x333, _ := bits.add_u64(x40, x329, u64(fiat.u1(x332))) + x335, x336 := bits.add_u64(x85, x331, u64(0x0)) + x337, _ := bits.add_u64(x86, x333, u64(fiat.u1(x336))) + x339, x340 := bits.add_u64(x93, x335, u64(0x0)) + x341, _ := bits.add_u64(x94, x337, u64(fiat.u1(x340))) + x343, x344 := bits.add_u64(x101, x339, u64(0x0)) + x345, _ := bits.add_u64(x102, x341, u64(fiat.u1(x344))) + x347, x348 := bits.add_u64(x109, x343, u64(0x0)) + x349, _ := bits.add_u64(x110, x345, u64(fiat.u1(x348))) + x351, x352 := bits.add_u64(x115, x347, u64(0x0)) + x353, _ := bits.add_u64(x116, x349, u64(fiat.u1(x352))) + x355, x356 := bits.add_u64(x119, x351, u64(0x0)) + x357, _ := bits.add_u64(x120, x353, u64(fiat.u1(x356))) + x359, x360 := bits.add_u64(x135, x355, u64(0x0)) + x361, _ := bits.add_u64(x136, x357, u64(fiat.u1(x360))) + x363, x364 := bits.add_u64(x141, x359, u64(0x0)) + x365, _ := bits.add_u64(x142, x361, u64(fiat.u1(x364))) + x367, x368 := bits.add_u64(x149, x363, u64(0x0)) + x369, _ := bits.add_u64(x150, x365, u64(fiat.u1(x368))) + x371, x372 := bits.add_u64(x159, x367, u64(0x0)) + x373, _ := bits.add_u64(x160, x369, u64(fiat.u1(x372))) + x375, x376 := bits.add_u64(x171, x371, u64(0x0)) + x377, _ := bits.add_u64(x172, x373, u64(fiat.u1(x376))) + x379, x380 := bits.add_u64(x185, x375, u64(0x0)) + x381, _ := bits.add_u64(x186, x377, u64(fiat.u1(x380))) + x383, x384 := bits.add_u64(x21, x17, u64(0x0)) + x385, _ := bits.add_u64(x22, x18, u64(fiat.u1(x384))) + x387, x388 := bits.add_u64(x23, x383, u64(0x0)) + x389, _ := bits.add_u64(x24, x385, u64(fiat.u1(x388))) + x391, x392 := bits.add_u64(x29, x387, u64(0x0)) + x393, _ := bits.add_u64(x30, x389, u64(fiat.u1(x392))) + x395, x396 := bits.add_u64(x41, x391, u64(0x0)) + x397, _ := bits.add_u64(x42, x393, u64(fiat.u1(x396))) + x399, x400 := bits.add_u64(x51, x395, u64(0x0)) + x401, _ := bits.add_u64(x52, x397, u64(fiat.u1(x400))) + x403, x404 := bits.add_u64(x87, x399, u64(0x0)) + x405, _ := bits.add_u64(x88, x401, u64(fiat.u1(x404))) + x407, x408 := bits.add_u64(x95, x403, u64(0x0)) + x409, _ := bits.add_u64(x96, x405, u64(fiat.u1(x408))) + x411, x412 := bits.add_u64(x103, x407, u64(0x0)) + x413, _ := bits.add_u64(x104, x409, u64(fiat.u1(x412))) + x415, x416 := bits.add_u64(x111, x411, u64(0x0)) + x417, _ := bits.add_u64(x112, x413, u64(fiat.u1(x416))) + x419, x420 := bits.add_u64(x117, x415, u64(0x0)) + x421, _ := bits.add_u64(x118, x417, u64(fiat.u1(x420))) + x423, x424 := bits.add_u64(x121, x419, u64(0x0)) + x425, _ := bits.add_u64(x122, x421, u64(fiat.u1(x424))) + x427, x428 := bits.add_u64(x123, x423, u64(0x0)) + x429, _ := bits.add_u64(x124, x425, u64(fiat.u1(x428))) + x431, x432 := bits.add_u64(x143, x427, u64(0x0)) + x433, _ := bits.add_u64(x144, x429, u64(fiat.u1(x432))) + x435, x436 := bits.add_u64(x151, x431, u64(0x0)) + x437, _ := bits.add_u64(x152, x433, u64(fiat.u1(x436))) + x439, x440 := bits.add_u64(x161, x435, u64(0x0)) + x441, _ := bits.add_u64(x162, x437, u64(fiat.u1(x440))) + x443, x444 := bits.add_u64(x173, x439, u64(0x0)) + x445, _ := bits.add_u64(x174, x441, u64(fiat.u1(x444))) + x447, x448 := bits.add_u64(x187, x443, u64(0x0)) + x449, _ := bits.add_u64(x188, x445, u64(fiat.u1(x448))) + x451, x452 := bits.add_u64(x33, x1, u64(0x0)) + x453, _ := bits.add_u64(x34, x2, u64(fiat.u1(x452))) + x455, x456 := bits.add_u64(x45, x451, u64(0x0)) + x457, _ := bits.add_u64(x46, x453, u64(fiat.u1(x456))) + x459, x460 := bits.add_u64(x55, x455, u64(0x0)) + x461, _ := bits.add_u64(x56, x457, u64(fiat.u1(x460))) + x463, x464 := bits.add_u64(x63, x459, u64(0x0)) + x465, _ := bits.add_u64(x64, x461, u64(fiat.u1(x464))) + x467, x468 := bits.add_u64(x69, x463, u64(0x0)) + x469, _ := bits.add_u64(x70, x465, u64(fiat.u1(x468))) + x471, x472 := bits.add_u64(x165, x467, u64(0x0)) + x473, _ := bits.add_u64(x166, x469, u64(fiat.u1(x472))) + x475, x476 := bits.add_u64(x177, x471, u64(0x0)) + x477, _ := bits.add_u64(x178, x473, u64(fiat.u1(x476))) + x479, x480 := bits.add_u64(x191, x475, u64(0x0)) + x481, _ := bits.add_u64(x192, x477, u64(fiat.u1(x480))) + x483, x484 := bits.add_u64(x7, x3, u64(0x0)) + x485, _ := bits.add_u64(x8, x4, u64(fiat.u1(x484))) + x487, x488 := bits.add_u64(x35, x483, u64(0x0)) + x489, _ := bits.add_u64(x36, x485, u64(fiat.u1(x488))) + x491, x492 := bits.add_u64(x47, x487, u64(0x0)) + x493, _ := bits.add_u64(x48, x489, u64(fiat.u1(x492))) + x495, x496 := bits.add_u64(x57, x491, u64(0x0)) + x497, _ := bits.add_u64(x58, x493, u64(fiat.u1(x496))) + x499, x500 := bits.add_u64(x65, x495, u64(0x0)) + x501, _ := bits.add_u64(x66, x497, u64(fiat.u1(x500))) + x503, x504 := bits.add_u64(x71, x499, u64(0x0)) + x505, _ := bits.add_u64(x72, x501, u64(fiat.u1(x504))) + x507, x508 := bits.add_u64(x75, x503, u64(0x0)) + x509, _ := bits.add_u64(x76, x505, u64(fiat.u1(x508))) + x511, x512 := bits.add_u64(x179, x507, u64(0x0)) + x513, _ := bits.add_u64(x180, x509, u64(fiat.u1(x512))) + x515, x516 := bits.add_u64(x193, x511, u64(0x0)) + x517, _ := bits.add_u64(x194, x513, u64(fiat.u1(x516))) + x519, x520 := bits.add_u64(x9, x5, u64(0x0)) + x521, _ := bits.add_u64(x10, x6, u64(fiat.u1(x520))) + x523, x524 := bits.add_u64(x11, x519, u64(0x0)) + x525, _ := bits.add_u64(x12, x521, u64(fiat.u1(x524))) + x527, x528 := bits.add_u64(x37, x523, u64(0x0)) + x529, _ := bits.add_u64(x38, x525, u64(fiat.u1(x528))) + x531, x532 := bits.add_u64(x49, x527, u64(0x0)) + x533, _ := bits.add_u64(x50, x529, u64(fiat.u1(x532))) + x535, x536 := bits.add_u64(x59, x531, u64(0x0)) + x537, _ := bits.add_u64(x60, x533, u64(fiat.u1(x536))) + x539, x540 := bits.add_u64(x67, x535, u64(0x0)) + x541, _ := bits.add_u64(x68, x537, u64(fiat.u1(x540))) + x543, x544 := bits.add_u64(x73, x539, u64(0x0)) + x545, _ := bits.add_u64(x74, x541, u64(fiat.u1(x544))) + x547, x548 := bits.add_u64(x77, x543, u64(0x0)) + x549, _ := bits.add_u64(x78, x545, u64(fiat.u1(x548))) + x551, x552 := bits.add_u64(x79, x547, u64(0x0)) + x553, _ := bits.add_u64(x80, x549, u64(fiat.u1(x552))) + x555, x556 := bits.add_u64(x195, x551, u64(0x0)) + x557, _ := bits.add_u64(x196, x553, u64(fiat.u1(x556))) + x559, x560 := bits.add_u64(x225, x447, u64(0x0)) + x561 := (u64(fiat.u1(x560)) + x449) + x562 := ((x267 >> 56) | ((x269 << 8) & 0xffffffffffffffff)) + x563 := (x267 & 0xffffffffffffff) + x564, x565 := bits.add_u64(x559, x562, u64(0x0)) + x566 := (u64(fiat.u1(x565)) + x561) + x567 := ((x564 >> 56) | ((x566 << 8) & 0xffffffffffffffff)) + x568 := (x564 & 0xffffffffffffff) + x569, x570 := bits.add_u64(x555, x562, u64(0x0)) + x571 := (u64(fiat.u1(x570)) + x557) + x572, x573 := bits.add_u64(x567, x379, u64(0x0)) + x574 := (u64(fiat.u1(x573)) + x381) + x575 := ((x569 >> 56) | ((x571 << 8) & 0xffffffffffffffff)) + x576 := (x569 & 0xffffffffffffff) + x577, x578 := bits.add_u64(x575, x515, u64(0x0)) + x579 := (u64(fiat.u1(x578)) + x517) + x580 := ((x572 >> 56) | ((x574 << 8) & 0xffffffffffffffff)) + x581 := (x572 & 0xffffffffffffff) + x582, x583 := bits.add_u64(x580, x319, u64(0x0)) + x584 := (u64(fiat.u1(x583)) + x321) + x585 := ((x577 >> 56) | ((x579 << 8) & 0xffffffffffffffff)) + x586 := (x577 & 0xffffffffffffff) + x587, x588 := bits.add_u64(x585, x479, u64(0x0)) + x589 := (u64(fiat.u1(x588)) + x481) + x590 := ((x582 >> 56) | ((x584 << 8) & 0xffffffffffffffff)) + x591 := (x582 & 0xffffffffffffff) + x592 := (x590 + x563) + x593 := ((x587 >> 56) | ((x589 << 8) & 0xffffffffffffffff)) + x594 := (x587 & 0xffffffffffffff) + x595 := (x593 + x226) + x596 := (x592 >> 56) + x597 := (x592 & 0xffffffffffffff) + x598 := (x595 >> 56) + x599 := (x595 & 0xffffffffffffff) + x600 := (x568 + x596) + x601 := (x576 + x596) + x602 := (x598 + x600) + x603 := fiat.u1((x602 >> 56)) + x604 := (x602 & 0xffffffffffffff) + x605 := (u64(x603) + x581) + x606 := fiat.u1((x601 >> 56)) + x607 := (x601 & 0xffffffffffffff) + x608 := (u64(x606) + x586) + out1[0] = x607 + out1[1] = x608 + out1[2] = x594 + out1[3] = x599 + out1[4] = x604 + out1[5] = x605 + out1[6] = x591 + out1[7] = x597 +} + +fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { + x1 := arg1[7] + x2 := arg1[7] + x3 := (x1 * 0x2) + x4 := (x2 * 0x2) + x5 := (arg1[7] * 0x2) + x6 := arg1[6] + x7 := arg1[6] + x8 := (x6 * 0x2) + x9 := (x7 * 0x2) + x10 := (arg1[6] * 0x2) + x11 := arg1[5] + x12 := arg1[5] + x13 := (x11 * 0x2) + x14 := (x12 * 0x2) + x15 := (arg1[5] * 0x2) + x16 := arg1[4] + x17 := arg1[4] + x18 := (arg1[4] * 0x2) + x19 := (arg1[3] * 0x2) + x20 := (arg1[2] * 0x2) + x21 := (arg1[1] * 0x2) + x23, x22 := bits.mul_u64(arg1[7], x1) + x25, x24 := bits.mul_u64(arg1[6], x3) + x27, x26 := bits.mul_u64(arg1[6], x6) + x29, x28 := bits.mul_u64(arg1[5], x3) + x31, x30 := bits.mul_u64(arg1[7], x1) + x33, x32 := bits.mul_u64(arg1[6], x3) + x35, x34 := bits.mul_u64(arg1[6], x6) + x37, x36 := bits.mul_u64(arg1[5], x3) + x39, x38 := bits.mul_u64(arg1[7], x2) + x41, x40 := bits.mul_u64(arg1[6], x4) + x43, x42 := bits.mul_u64(arg1[6], x7) + x45, x44 := bits.mul_u64(arg1[5], x4) + x47, x46 := bits.mul_u64(arg1[5], x9) + x49, x48 := bits.mul_u64(arg1[5], x8) + x51, x50 := bits.mul_u64(arg1[5], x12) + x53, x52 := bits.mul_u64(arg1[5], x11) + x55, x54 := bits.mul_u64(arg1[4], x4) + x57, x56 := bits.mul_u64(arg1[4], x3) + x59, x58 := bits.mul_u64(arg1[4], x9) + x61, x60 := bits.mul_u64(arg1[4], x8) + x63, x62 := bits.mul_u64(arg1[4], x14) + x65, x64 := bits.mul_u64(arg1[4], x13) + x67, x66 := bits.mul_u64(arg1[4], x17) + x69, x68 := bits.mul_u64(arg1[4], x16) + x71, x70 := bits.mul_u64(arg1[3], x4) + x73, x72 := bits.mul_u64(arg1[3], x3) + x75, x74 := bits.mul_u64(arg1[3], x9) + x77, x76 := bits.mul_u64(arg1[3], x8) + x79, x78 := bits.mul_u64(arg1[3], x14) + x81, x80 := bits.mul_u64(arg1[3], x13) + x83, x82 := bits.mul_u64(arg1[3], x18) + x85, x84 := bits.mul_u64(arg1[3], arg1[3]) + x87, x86 := bits.mul_u64(arg1[2], x4) + x89, x88 := bits.mul_u64(arg1[2], x3) + x91, x90 := bits.mul_u64(arg1[2], x9) + x93, x92 := bits.mul_u64(arg1[2], x8) + x95, x94 := bits.mul_u64(arg1[2], x15) + x97, x96 := bits.mul_u64(arg1[2], x18) + x99, x98 := bits.mul_u64(arg1[2], x19) + x101, x100 := bits.mul_u64(arg1[2], arg1[2]) + x103, x102 := bits.mul_u64(arg1[1], x4) + x105, x104 := bits.mul_u64(arg1[1], x3) + x107, x106 := bits.mul_u64(arg1[1], x10) + x109, x108 := bits.mul_u64(arg1[1], x15) + x111, x110 := bits.mul_u64(arg1[1], x18) + x113, x112 := bits.mul_u64(arg1[1], x19) + x115, x114 := bits.mul_u64(arg1[1], x20) + x117, x116 := bits.mul_u64(arg1[1], arg1[1]) + x119, x118 := bits.mul_u64(arg1[0], x5) + x121, x120 := bits.mul_u64(arg1[0], x10) + x123, x122 := bits.mul_u64(arg1[0], x15) + x125, x124 := bits.mul_u64(arg1[0], x18) + x127, x126 := bits.mul_u64(arg1[0], x19) + x129, x128 := bits.mul_u64(arg1[0], x20) + x131, x130 := bits.mul_u64(arg1[0], x21) + x133, x132 := bits.mul_u64(arg1[0], arg1[0]) + x134, x135 := bits.add_u64(x54, x46, u64(0x0)) + x136, _ := bits.add_u64(x55, x47, u64(fiat.u1(x135))) + x138, x139 := bits.add_u64(x114, x134, u64(0x0)) + x140, _ := bits.add_u64(x115, x136, u64(fiat.u1(x139))) + x142, x143 := bits.add_u64(x126, x138, u64(0x0)) + x144, _ := bits.add_u64(x127, x140, u64(fiat.u1(x143))) + x146 := ((x142 >> 56) | ((x144 << 8) & 0xffffffffffffffff)) + x147 := (x142 & 0xffffffffffffff) + x148, x149 := bits.add_u64(x56, x48, u64(0x0)) + x150, _ := bits.add_u64(x57, x49, u64(fiat.u1(x149))) + x152, x153 := bits.add_u64(x82, x148, u64(0x0)) + x154, _ := bits.add_u64(x83, x150, u64(fiat.u1(x153))) + x156, x157 := bits.add_u64(x94, x152, u64(0x0)) + x158, _ := bits.add_u64(x95, x154, u64(fiat.u1(x157))) + x160, x161 := bits.add_u64(x106, x156, u64(0x0)) + x162, _ := bits.add_u64(x107, x158, u64(fiat.u1(x161))) + x164, x165 := bits.add_u64(x118, x160, u64(0x0)) + x166, _ := bits.add_u64(x119, x162, u64(fiat.u1(x165))) + x168, x169 := bits.add_u64(x38, x30, u64(0x0)) + x170, _ := bits.add_u64(x39, x31, u64(fiat.u1(x169))) + x172, x173 := bits.add_u64(x52, x168, u64(0x0)) + x174, _ := bits.add_u64(x53, x170, u64(fiat.u1(x173))) + x176, x177 := bits.add_u64(x60, x172, u64(0x0)) + x178, _ := bits.add_u64(x61, x174, u64(fiat.u1(x177))) + x180, x181 := bits.add_u64(x72, x176, u64(0x0)) + x182, _ := bits.add_u64(x73, x178, u64(fiat.u1(x181))) + x184, x185 := bits.add_u64(x84, x180, u64(0x0)) + x186, _ := bits.add_u64(x85, x182, u64(fiat.u1(x185))) + x188, x189 := bits.add_u64(x96, x184, u64(0x0)) + x190, _ := bits.add_u64(x97, x186, u64(fiat.u1(x189))) + x192, x193 := bits.add_u64(x108, x188, u64(0x0)) + x194, _ := bits.add_u64(x109, x190, u64(fiat.u1(x193))) + x196, x197 := bits.add_u64(x120, x192, u64(0x0)) + x198, _ := bits.add_u64(x121, x194, u64(fiat.u1(x197))) + x200, x201 := bits.add_u64(x40, x32, u64(0x0)) + x202, _ := bits.add_u64(x41, x33, u64(fiat.u1(x201))) + x204, x205 := bits.add_u64(x64, x200, u64(0x0)) + x206, _ := bits.add_u64(x65, x202, u64(fiat.u1(x205))) + x208, x209 := bits.add_u64(x76, x204, u64(0x0)) + x210, _ := bits.add_u64(x77, x206, u64(fiat.u1(x209))) + x212, x213 := bits.add_u64(x88, x208, u64(0x0)) + x214, _ := bits.add_u64(x89, x210, u64(fiat.u1(x213))) + x216, x217 := bits.add_u64(x98, x212, u64(0x0)) + x218, _ := bits.add_u64(x99, x214, u64(fiat.u1(x217))) + x220, x221 := bits.add_u64(x110, x216, u64(0x0)) + x222, _ := bits.add_u64(x111, x218, u64(fiat.u1(x221))) + x224, x225 := bits.add_u64(x122, x220, u64(0x0)) + x226, _ := bits.add_u64(x123, x222, u64(fiat.u1(x225))) + x228, x229 := bits.add_u64(x36, x34, u64(0x0)) + x230, _ := bits.add_u64(x37, x35, u64(fiat.u1(x229))) + x232, x233 := bits.add_u64(x42, x228, u64(0x0)) + x234, _ := bits.add_u64(x43, x230, u64(fiat.u1(x233))) + x236, x237 := bits.add_u64(x44, x232, u64(0x0)) + x238, _ := bits.add_u64(x45, x234, u64(fiat.u1(x237))) + x240, x241 := bits.add_u64(x68, x236, u64(0x0)) + x242, _ := bits.add_u64(x69, x238, u64(fiat.u1(x241))) + x244, x245 := bits.add_u64(x80, x240, u64(0x0)) + x246, _ := bits.add_u64(x81, x242, u64(fiat.u1(x245))) + x248, x249 := bits.add_u64(x92, x244, u64(0x0)) + x250, _ := bits.add_u64(x93, x246, u64(fiat.u1(x249))) + x252, x253 := bits.add_u64(x100, x248, u64(0x0)) + x254, _ := bits.add_u64(x101, x250, u64(fiat.u1(x253))) + x256, x257 := bits.add_u64(x104, x252, u64(0x0)) + x258, _ := bits.add_u64(x105, x254, u64(fiat.u1(x257))) + x260, x261 := bits.add_u64(x112, x256, u64(0x0)) + x262, _ := bits.add_u64(x113, x258, u64(fiat.u1(x261))) + x264, x265 := bits.add_u64(x124, x260, u64(0x0)) + x266, _ := bits.add_u64(x125, x262, u64(fiat.u1(x265))) + x268, x269 := bits.add_u64(x50, x22, u64(0x0)) + x270, _ := bits.add_u64(x51, x23, u64(fiat.u1(x269))) + x272, x273 := bits.add_u64(x58, x268, u64(0x0)) + x274, _ := bits.add_u64(x59, x270, u64(fiat.u1(x273))) + x276, x277 := bits.add_u64(x70, x272, u64(0x0)) + x278, _ := bits.add_u64(x71, x274, u64(fiat.u1(x277))) + x280, x281 := bits.add_u64(x116, x276, u64(0x0)) + x282, _ := bits.add_u64(x117, x278, u64(fiat.u1(x281))) + x284, x285 := bits.add_u64(x128, x280, u64(0x0)) + x286, _ := bits.add_u64(x129, x282, u64(fiat.u1(x285))) + x288, x289 := bits.add_u64(x62, x24, u64(0x0)) + x290, _ := bits.add_u64(x63, x25, u64(fiat.u1(x289))) + x292, x293 := bits.add_u64(x74, x288, u64(0x0)) + x294, _ := bits.add_u64(x75, x290, u64(fiat.u1(x293))) + x296, x297 := bits.add_u64(x86, x292, u64(0x0)) + x298, _ := bits.add_u64(x87, x294, u64(fiat.u1(x297))) + x300, x301 := bits.add_u64(x130, x296, u64(0x0)) + x302, _ := bits.add_u64(x131, x298, u64(fiat.u1(x301))) + x304, x305 := bits.add_u64(x28, x26, u64(0x0)) + x306, _ := bits.add_u64(x29, x27, u64(fiat.u1(x305))) + x308, x309 := bits.add_u64(x66, x304, u64(0x0)) + x310, _ := bits.add_u64(x67, x306, u64(fiat.u1(x309))) + x312, x313 := bits.add_u64(x78, x308, u64(0x0)) + x314, _ := bits.add_u64(x79, x310, u64(fiat.u1(x313))) + x316, x317 := bits.add_u64(x90, x312, u64(0x0)) + x318, _ := bits.add_u64(x91, x314, u64(fiat.u1(x317))) + x320, x321 := bits.add_u64(x102, x316, u64(0x0)) + x322, _ := bits.add_u64(x103, x318, u64(fiat.u1(x321))) + x324, x325 := bits.add_u64(x132, x320, u64(0x0)) + x326, _ := bits.add_u64(x133, x322, u64(fiat.u1(x325))) + x328, x329 := bits.add_u64(x146, x264, u64(0x0)) + x330 := (u64(fiat.u1(x329)) + x266) + x331 := ((x164 >> 56) | ((x166 << 8) & 0xffffffffffffffff)) + x332 := (x164 & 0xffffffffffffff) + x333, x334 := bits.add_u64(x328, x331, u64(0x0)) + x335 := (u64(fiat.u1(x334)) + x330) + x336 := ((x333 >> 56) | ((x335 << 8) & 0xffffffffffffffff)) + x337 := (x333 & 0xffffffffffffff) + x338, x339 := bits.add_u64(x324, x331, u64(0x0)) + x340 := (u64(fiat.u1(x339)) + x326) + x341, x342 := bits.add_u64(x336, x224, u64(0x0)) + x343 := (u64(fiat.u1(x342)) + x226) + x344 := ((x338 >> 56) | ((x340 << 8) & 0xffffffffffffffff)) + x345 := (x338 & 0xffffffffffffff) + x346, x347 := bits.add_u64(x344, x300, u64(0x0)) + x348 := (u64(fiat.u1(x347)) + x302) + x349 := ((x341 >> 56) | ((x343 << 8) & 0xffffffffffffffff)) + x350 := (x341 & 0xffffffffffffff) + x351, x352 := bits.add_u64(x349, x196, u64(0x0)) + x353 := (u64(fiat.u1(x352)) + x198) + x354 := ((x346 >> 56) | ((x348 << 8) & 0xffffffffffffffff)) + x355 := (x346 & 0xffffffffffffff) + x356, x357 := bits.add_u64(x354, x284, u64(0x0)) + x358 := (u64(fiat.u1(x357)) + x286) + x359 := ((x351 >> 56) | ((x353 << 8) & 0xffffffffffffffff)) + x360 := (x351 & 0xffffffffffffff) + x361 := (x359 + x332) + x362 := ((x356 >> 56) | ((x358 << 8) & 0xffffffffffffffff)) + x363 := (x356 & 0xffffffffffffff) + x364 := (x362 + x147) + x365 := (x361 >> 56) + x366 := (x361 & 0xffffffffffffff) + x367 := (x364 >> 56) + x368 := (x364 & 0xffffffffffffff) + x369 := (x337 + x365) + x370 := (x345 + x365) + x371 := (x367 + x369) + x372 := fiat.u1((x371 >> 56)) + x373 := (x371 & 0xffffffffffffff) + x374 := (u64(x372) + x350) + x375 := fiat.u1((x370 >> 56)) + x376 := (x370 & 0xffffffffffffff) + x377 := (u64(x375) + x355) + out1[0] = x376 + out1[1] = x377 + out1[2] = x363 + out1[3] = x368 + out1[4] = x373 + out1[5] = x374 + out1[6] = x360 + out1[7] = x366 +} + +fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { + x1 := arg1[3] + x2 := arg1[7] + x3 := (x2 >> 56) + x4 := (((x1 >> 56) + arg1[4]) + x3) + x5 := (arg1[0] + x3) + x6 := ((x4 >> 56) + arg1[5]) + x7 := ((x5 >> 56) + arg1[1]) + x8 := ((x6 >> 56) + arg1[6]) + x9 := ((x7 >> 56) + arg1[2]) + x10 := ((x8 >> 56) + (x2 & 0xffffffffffffff)) + x11 := ((x9 >> 56) + (x1 & 0xffffffffffffff)) + x12 := fiat.u1((x10 >> 56)) + x13 := ((x5 & 0xffffffffffffff) + u64(x12)) + x14 := (u64(fiat.u1((x11 >> 56))) + ((x4 & 0xffffffffffffff) + u64(x12))) + x15 := (x13 & 0xffffffffffffff) + x16 := (u64(fiat.u1((x13 >> 56))) + (x7 & 0xffffffffffffff)) + x17 := (x9 & 0xffffffffffffff) + x18 := (x11 & 0xffffffffffffff) + x19 := (x14 & 0xffffffffffffff) + x20 := (u64(fiat.u1((x14 >> 56))) + (x6 & 0xffffffffffffff)) + x21 := (x8 & 0xffffffffffffff) + x22 := (x10 & 0xffffffffffffff) + out1[0] = x15 + out1[1] = x16 + out1[2] = x17 + out1[3] = x18 + out1[4] = x19 + out1[5] = x20 + out1[6] = x21 + out1[7] = x22 +} + +fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) { + x1 := (arg1[0] + arg2[0]) + x2 := (arg1[1] + arg2[1]) + x3 := (arg1[2] + arg2[2]) + x4 := (arg1[3] + arg2[3]) + x5 := (arg1[4] + arg2[4]) + x6 := (arg1[5] + arg2[5]) + x7 := (arg1[6] + arg2[6]) + x8 := (arg1[7] + arg2[7]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 +} + +fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) { + x1 := ((0x1fffffffffffffe + arg1[0]) - arg2[0]) + x2 := ((0x1fffffffffffffe + arg1[1]) - arg2[1]) + x3 := ((0x1fffffffffffffe + arg1[2]) - arg2[2]) + x4 := ((0x1fffffffffffffe + arg1[3]) - arg2[3]) + x5 := ((0x1fffffffffffffc + arg1[4]) - arg2[4]) + x6 := ((0x1fffffffffffffe + arg1[5]) - arg2[5]) + x7 := ((0x1fffffffffffffe + arg1[6]) - arg2[6]) + x8 := ((0x1fffffffffffffe + arg1[7]) - arg2[7]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 +} + +fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) { + x1 := (0x1fffffffffffffe - arg1[0]) + x2 := (0x1fffffffffffffe - arg1[1]) + x3 := (0x1fffffffffffffe - arg1[2]) + x4 := (0x1fffffffffffffe - arg1[3]) + x5 := (0x1fffffffffffffc - arg1[4]) + x6 := (0x1fffffffffffffe - arg1[5]) + x7 := (0x1fffffffffffffe - arg1[6]) + x8 := (0x1fffffffffffffe - arg1[7]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 +} + +@(optimization_mode = "none") +fe_cond_assign :: #force_no_inline proc "contextless" ( + out1, arg1: ^Tight_Field_Element, + arg2: int, +) { + x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0]) + x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1]) + x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2]) + x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3]) + x5 := fiat.cmovznz_u64(fiat.u1(arg2), out1[4], arg1[4]) + x6 := fiat.cmovznz_u64(fiat.u1(arg2), out1[5], arg1[5]) + x7 := fiat.cmovznz_u64(fiat.u1(arg2), out1[6], arg1[6]) + x8 := fiat.cmovznz_u64(fiat.u1(arg2), out1[7], arg1[7]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 +} + +fe_to_bytes :: proc "contextless" (out1: ^[56]byte, arg1: ^Tight_Field_Element) { + x1, x2 := _subborrowx_u56(0x0, arg1[0], 0xffffffffffffff) + x3, x4 := _subborrowx_u56(x2, arg1[1], 0xffffffffffffff) + x5, x6 := _subborrowx_u56(x4, arg1[2], 0xffffffffffffff) + x7, x8 := _subborrowx_u56(x6, arg1[3], 0xffffffffffffff) + x9, x10 := _subborrowx_u56(x8, arg1[4], 0xfffffffffffffe) + x11, x12 := _subborrowx_u56(x10, arg1[5], 0xffffffffffffff) + x13, x14 := _subborrowx_u56(x12, arg1[6], 0xffffffffffffff) + x15, x16 := _subborrowx_u56(x14, arg1[7], 0xffffffffffffff) + x17 := fiat.cmovznz_u64(x16, u64(0x0), 0xffffffffffffffff) + x18, x19 := _addcarryx_u56(0x0, x1, (x17 & 0xffffffffffffff)) + x20, x21 := _addcarryx_u56(x19, x3, (x17 & 0xffffffffffffff)) + x22, x23 := _addcarryx_u56(x21, x5, (x17 & 0xffffffffffffff)) + x24, x25 := _addcarryx_u56(x23, x7, (x17 & 0xffffffffffffff)) + x26, x27 := _addcarryx_u56(x25, x9, (x17 & 0xfffffffffffffe)) + x28, x29 := _addcarryx_u56(x27, x11, (x17 & 0xffffffffffffff)) + x30, x31 := _addcarryx_u56(x29, x13, (x17 & 0xffffffffffffff)) + x32, _ := _addcarryx_u56(x31, x15, (x17 & 0xffffffffffffff)) + x34 := (u8(x18) & 0xff) + x35 := (x18 >> 8) + x36 := (u8(x35) & 0xff) + x37 := (x35 >> 8) + x38 := (u8(x37) & 0xff) + x39 := (x37 >> 8) + x40 := (u8(x39) & 0xff) + x41 := (x39 >> 8) + x42 := (u8(x41) & 0xff) + x43 := (x41 >> 8) + x44 := (u8(x43) & 0xff) + x45 := u8((x43 >> 8)) + x46 := (u8(x20) & 0xff) + x47 := (x20 >> 8) + x48 := (u8(x47) & 0xff) + x49 := (x47 >> 8) + x50 := (u8(x49) & 0xff) + x51 := (x49 >> 8) + x52 := (u8(x51) & 0xff) + x53 := (x51 >> 8) + x54 := (u8(x53) & 0xff) + x55 := (x53 >> 8) + x56 := (u8(x55) & 0xff) + x57 := u8((x55 >> 8)) + x58 := (u8(x22) & 0xff) + x59 := (x22 >> 8) + x60 := (u8(x59) & 0xff) + x61 := (x59 >> 8) + x62 := (u8(x61) & 0xff) + x63 := (x61 >> 8) + x64 := (u8(x63) & 0xff) + x65 := (x63 >> 8) + x66 := (u8(x65) & 0xff) + x67 := (x65 >> 8) + x68 := (u8(x67) & 0xff) + x69 := u8((x67 >> 8)) + x70 := (u8(x24) & 0xff) + x71 := (x24 >> 8) + x72 := (u8(x71) & 0xff) + x73 := (x71 >> 8) + x74 := (u8(x73) & 0xff) + x75 := (x73 >> 8) + x76 := (u8(x75) & 0xff) + x77 := (x75 >> 8) + x78 := (u8(x77) & 0xff) + x79 := (x77 >> 8) + x80 := (u8(x79) & 0xff) + x81 := u8((x79 >> 8)) + x82 := (u8(x26) & 0xff) + x83 := (x26 >> 8) + x84 := (u8(x83) & 0xff) + x85 := (x83 >> 8) + x86 := (u8(x85) & 0xff) + x87 := (x85 >> 8) + x88 := (u8(x87) & 0xff) + x89 := (x87 >> 8) + x90 := (u8(x89) & 0xff) + x91 := (x89 >> 8) + x92 := (u8(x91) & 0xff) + x93 := u8((x91 >> 8)) + x94 := (u8(x28) & 0xff) + x95 := (x28 >> 8) + x96 := (u8(x95) & 0xff) + x97 := (x95 >> 8) + x98 := (u8(x97) & 0xff) + x99 := (x97 >> 8) + x100 := (u8(x99) & 0xff) + x101 := (x99 >> 8) + x102 := (u8(x101) & 0xff) + x103 := (x101 >> 8) + x104 := (u8(x103) & 0xff) + x105 := u8((x103 >> 8)) + x106 := (u8(x30) & 0xff) + x107 := (x30 >> 8) + x108 := (u8(x107) & 0xff) + x109 := (x107 >> 8) + x110 := (u8(x109) & 0xff) + x111 := (x109 >> 8) + x112 := (u8(x111) & 0xff) + x113 := (x111 >> 8) + x114 := (u8(x113) & 0xff) + x115 := (x113 >> 8) + x116 := (u8(x115) & 0xff) + x117 := u8((x115 >> 8)) + x118 := (u8(x32) & 0xff) + x119 := (x32 >> 8) + x120 := (u8(x119) & 0xff) + x121 := (x119 >> 8) + x122 := (u8(x121) & 0xff) + x123 := (x121 >> 8) + x124 := (u8(x123) & 0xff) + x125 := (x123 >> 8) + x126 := (u8(x125) & 0xff) + x127 := (x125 >> 8) + x128 := (u8(x127) & 0xff) + x129 := u8((x127 >> 8)) + out1[0] = x34 + out1[1] = x36 + out1[2] = x38 + out1[3] = x40 + out1[4] = x42 + out1[5] = x44 + out1[6] = x45 + out1[7] = x46 + out1[8] = x48 + out1[9] = x50 + out1[10] = x52 + out1[11] = x54 + out1[12] = x56 + out1[13] = x57 + out1[14] = x58 + out1[15] = x60 + out1[16] = x62 + out1[17] = x64 + out1[18] = x66 + out1[19] = x68 + out1[20] = x69 + out1[21] = x70 + out1[22] = x72 + out1[23] = x74 + out1[24] = x76 + out1[25] = x78 + out1[26] = x80 + out1[27] = x81 + out1[28] = x82 + out1[29] = x84 + out1[30] = x86 + out1[31] = x88 + out1[32] = x90 + out1[33] = x92 + out1[34] = x93 + out1[35] = x94 + out1[36] = x96 + out1[37] = x98 + out1[38] = x100 + out1[39] = x102 + out1[40] = x104 + out1[41] = x105 + out1[42] = x106 + out1[43] = x108 + out1[44] = x110 + out1[45] = x112 + out1[46] = x114 + out1[47] = x116 + out1[48] = x117 + out1[49] = x118 + out1[50] = x120 + out1[51] = x122 + out1[52] = x124 + out1[53] = x126 + out1[54] = x128 + out1[55] = x129 +} + +fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[56]byte) { + x1 := (u64(arg1[55]) << 48) + x2 := (u64(arg1[54]) << 40) + x3 := (u64(arg1[53]) << 32) + x4 := (u64(arg1[52]) << 24) + x5 := (u64(arg1[51]) << 16) + x6 := (u64(arg1[50]) << 8) + x7 := arg1[49] + x8 := (u64(arg1[48]) << 48) + x9 := (u64(arg1[47]) << 40) + x10 := (u64(arg1[46]) << 32) + x11 := (u64(arg1[45]) << 24) + x12 := (u64(arg1[44]) << 16) + x13 := (u64(arg1[43]) << 8) + x14 := arg1[42] + x15 := (u64(arg1[41]) << 48) + x16 := (u64(arg1[40]) << 40) + x17 := (u64(arg1[39]) << 32) + x18 := (u64(arg1[38]) << 24) + x19 := (u64(arg1[37]) << 16) + x20 := (u64(arg1[36]) << 8) + x21 := arg1[35] + x22 := (u64(arg1[34]) << 48) + x23 := (u64(arg1[33]) << 40) + x24 := (u64(arg1[32]) << 32) + x25 := (u64(arg1[31]) << 24) + x26 := (u64(arg1[30]) << 16) + x27 := (u64(arg1[29]) << 8) + x28 := arg1[28] + x29 := (u64(arg1[27]) << 48) + x30 := (u64(arg1[26]) << 40) + x31 := (u64(arg1[25]) << 32) + x32 := (u64(arg1[24]) << 24) + x33 := (u64(arg1[23]) << 16) + x34 := (u64(arg1[22]) << 8) + x35 := arg1[21] + x36 := (u64(arg1[20]) << 48) + x37 := (u64(arg1[19]) << 40) + x38 := (u64(arg1[18]) << 32) + x39 := (u64(arg1[17]) << 24) + x40 := (u64(arg1[16]) << 16) + x41 := (u64(arg1[15]) << 8) + x42 := arg1[14] + x43 := (u64(arg1[13]) << 48) + x44 := (u64(arg1[12]) << 40) + x45 := (u64(arg1[11]) << 32) + x46 := (u64(arg1[10]) << 24) + x47 := (u64(arg1[9]) << 16) + x48 := (u64(arg1[8]) << 8) + x49 := arg1[7] + x50 := (u64(arg1[6]) << 48) + x51 := (u64(arg1[5]) << 40) + x52 := (u64(arg1[4]) << 32) + x53 := (u64(arg1[3]) << 24) + x54 := (u64(arg1[2]) << 16) + x55 := (u64(arg1[1]) << 8) + x56 := arg1[0] + x57 := (x55 + u64(x56)) + x58 := (x54 + x57) + x59 := (x53 + x58) + x60 := (x52 + x59) + x61 := (x51 + x60) + x62 := (x50 + x61) + x63 := (x48 + u64(x49)) + x64 := (x47 + x63) + x65 := (x46 + x64) + x66 := (x45 + x65) + x67 := (x44 + x66) + x68 := (x43 + x67) + x69 := (x41 + u64(x42)) + x70 := (x40 + x69) + x71 := (x39 + x70) + x72 := (x38 + x71) + x73 := (x37 + x72) + x74 := (x36 + x73) + x75 := (x34 + u64(x35)) + x76 := (x33 + x75) + x77 := (x32 + x76) + x78 := (x31 + x77) + x79 := (x30 + x78) + x80 := (x29 + x79) + x81 := (x27 + u64(x28)) + x82 := (x26 + x81) + x83 := (x25 + x82) + x84 := (x24 + x83) + x85 := (x23 + x84) + x86 := (x22 + x85) + x87 := (x20 + u64(x21)) + x88 := (x19 + x87) + x89 := (x18 + x88) + x90 := (x17 + x89) + x91 := (x16 + x90) + x92 := (x15 + x91) + x93 := (x13 + u64(x14)) + x94 := (x12 + x93) + x95 := (x11 + x94) + x96 := (x10 + x95) + x97 := (x9 + x96) + x98 := (x8 + x97) + x99 := (x6 + u64(x7)) + x100 := (x5 + x99) + x101 := (x4 + x100) + x102 := (x3 + x101) + x103 := (x2 + x102) + x104 := (x1 + x103) + out1[0] = x62 + out1[1] = x68 + out1[2] = x74 + out1[3] = x80 + out1[4] = x86 + out1[5] = x92 + out1[6] = x98 + out1[7] = x104 +} + +fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) { + x1 := arg1[0] + x2 := arg1[1] + x3 := arg1[2] + x4 := arg1[3] + x5 := arg1[4] + x6 := arg1[5] + x7 := arg1[6] + x8 := arg1[7] + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 + out1[5] = x6 + out1[6] = x7 + out1[7] = x8 +} diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin index b12046858..caaece98e 100644 --- a/core/crypto/_fiat/field_poly1305/field.odin +++ b/core/crypto/_fiat/field_poly1305/field.odin @@ -1,6 +1,5 @@ package field_poly1305 -import "base:intrinsics" import "core:encoding/endian" import "core:mem" @@ -29,9 +28,7 @@ fe_from_bytes :: #force_inline proc "contextless" ( // makes implementing the actual MAC block processing considerably // neater. - if len(arg1) != 16 { - intrinsics.trap() - } + ensure_contextless(len(arg1) == 16, "poly1305: invalid field element size") // While it may be unwise to do deserialization here on our // own when fiat-crypto provides equivalent functionality, diff --git a/core/crypto/_fiat/field_scalar25519/field.odin b/core/crypto/_fiat/field_scalar25519/field.odin index 9b40661b7..933637c54 100644 --- a/core/crypto/_fiat/field_scalar25519/field.odin +++ b/core/crypto/_fiat/field_scalar25519/field.odin @@ -1,18 +1,17 @@ package field_scalar25519 -import "base:intrinsics" import "core:encoding/endian" import "core:math/bits" import "core:mem" -@(private) +@(private, rodata) _TWO_168 := Montgomery_Domain_Field_Element { 0x5b8ab432eac74798, 0x38afddd6de59d5d7, 0xa2c131b399411b7c, 0x6329a7ed9ce5a30, } -@(private) +@(private, rodata) _TWO_336 := Montgomery_Domain_Field_Element { 0xbd3d108e2b35ecc5, 0x5c3a3718bdf9c90b, @@ -95,9 +94,8 @@ fe_from_bytes_wide :: proc "contextless" ( @(private) _fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element, arg1: []byte) { // INVARIANT: len(arg1) < 32. - if len(arg1) >= 32 { - intrinsics.trap() - } + ensure_contextless(len(arg1) < 32, "edwards25519: oversized short scalar") + tmp: [32]byte copy(tmp[:], arg1) @@ -106,9 +104,7 @@ _fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Eleme } fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) { - if len(out1) != 32 { - intrinsics.trap() - } + ensure_contextless(len(out1) == 32, "edwards25519: oversized scalar output buffer") tmp: Non_Montgomery_Domain_Field_Element fe_from_montgomery(&tmp, arg1) diff --git a/core/crypto/_sha3/sha3.odin b/core/crypto/_sha3/sha3.odin index 2db76fce0..52b3fbda9 100644 --- a/core/crypto/_sha3/sha3.odin +++ b/core/crypto/_sha3/sha3.odin @@ -44,7 +44,7 @@ Context :: struct { is_finalized: bool, // For SHAKE (unlimited squeeze is allowed) } -@(private) +@(private, rodata) keccakf_rndc := [?]u64 { 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, @@ -56,13 +56,13 @@ keccakf_rndc := [?]u64 { 0x8000000000008080, 0x0000000080000001, 0x8000000080008008, } -@(private) +@(private, rodata) keccakf_rotc := [?]int { 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44, } -@(private) +@(private, rodata) keccakf_piln := [?]i32 { 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1, @@ -122,7 +122,7 @@ keccakf :: proc "contextless" (st: ^[25]u64) { } } -init :: proc(ctx: ^Context) { +init :: proc "contextless" (ctx: ^Context) { for i := 0; i < 25; i += 1 { ctx.st.q[i] = 0 } @@ -133,9 +133,9 @@ init :: proc(ctx: ^Context) { ctx.is_finalized = false } -update :: proc(ctx: ^Context, data: []byte) { - assert(ctx.is_initialized) - assert(!ctx.is_finalized) +update :: proc "contextless" (ctx: ^Context, data: []byte) { + ensure_contextless(ctx.is_initialized) + ensure_contextless(!ctx.is_finalized) j := ctx.pt for i := 0; i < len(data); i += 1 { @@ -149,12 +149,9 @@ update :: proc(ctx: ^Context, data: []byte) { ctx.pt = j } -final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) { - assert(ctx.is_initialized) - - if len(hash) < ctx.mdlen { - panic("crypto/sha3: invalid destination digest size") - } +final :: proc "contextless" (ctx: ^Context, hash: []byte, finalize_clone: bool = false) { + ensure_contextless(ctx.is_initialized) + ensure_contextless(len(hash) >= ctx.mdlen, "crypto/sha3: invalid destination digest size") ctx := ctx if finalize_clone { @@ -173,11 +170,11 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) { } } -clone :: proc(ctx, other: ^Context) { +clone :: proc "contextless" (ctx, other: ^Context) { ctx^ = other^ } -reset :: proc(ctx: ^Context) { +reset :: proc "contextless" (ctx: ^Context) { if !ctx.is_initialized { return } @@ -185,9 +182,9 @@ reset :: proc(ctx: ^Context) { mem.zero_explicit(ctx, size_of(ctx^)) } -shake_xof :: proc(ctx: ^Context) { - assert(ctx.is_initialized) - assert(!ctx.is_finalized) +shake_xof :: proc "contextless" (ctx: ^Context) { + ensure_contextless(ctx.is_initialized) + ensure_contextless(!ctx.is_finalized) ctx.st.b[ctx.pt] ~= ctx.dsbyte ctx.st.b[ctx.rsiz - 1] ~= 0x80 @@ -197,9 +194,9 @@ shake_xof :: proc(ctx: ^Context) { ctx.is_finalized = true // No more absorb, unlimited squeeze. } -shake_out :: proc(ctx: ^Context, hash: []byte) { - assert(ctx.is_initialized) - assert(ctx.is_finalized) +shake_out :: proc "contextless" (ctx: ^Context, hash: []byte) { + ensure_contextless(ctx.is_initialized) + ensure_contextless(ctx.is_finalized) j := ctx.pt for i := 0; i < len(hash); i += 1 { diff --git a/core/crypto/_sha3/sp800_185.odin b/core/crypto/_sha3/sp800_185.odin index a96f78cc1..8390d8490 100644 --- a/core/crypto/_sha3/sp800_185.odin +++ b/core/crypto/_sha3/sp800_185.odin @@ -3,7 +3,7 @@ package _sha3 import "core:encoding/endian" import "core:math/bits" -init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) { +init_cshake :: proc "contextless" (ctx: ^Context, n, s: []byte, sec_strength: int) { ctx.mdlen = sec_strength / 8 // No domain separator is equivalent to vanilla SHAKE. @@ -18,7 +18,7 @@ init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) { bytepad(ctx, [][]byte{n, s}, rate_cshake(sec_strength)) } -final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) { +final_cshake :: proc "contextless" (ctx: ^Context, dst: []byte, finalize_clone: bool = false) { ctx := ctx if finalize_clone { tmp_ctx: Context @@ -32,7 +32,7 @@ final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) { shake_out(ctx, dst) } -rate_cshake :: #force_inline proc(sec_strength: int) -> int { +rate_cshake :: #force_inline proc "contextless" (sec_strength: int) -> int { switch sec_strength { case 128: return RATE_128 @@ -40,7 +40,7 @@ rate_cshake :: #force_inline proc(sec_strength: int) -> int { return RATE_256 } - panic("crypto/sha3: invalid security strength") + panic_contextless("crypto/sha3: invalid security strength") } // right_encode and left_encode are defined to support 0 <= x < 2^2040 @@ -52,10 +52,10 @@ rate_cshake :: #force_inline proc(sec_strength: int) -> int { // // Thus we support 0 <= x < 2^128. -@(private) +@(private, rodata) _PAD: [RATE_128]byte // Biggest possible value of w per spec. -bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) { +bytepad :: proc "contextless" (ctx: ^Context, x_strings: [][]byte, w: int) { // 1. z = left_encode(w) || X. z_hi: u64 z_lo := left_right_encode(ctx, 0, u64(w), true) @@ -70,9 +70,7 @@ bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) { // This isn't actually possible, at least with the currently // defined SP 800-185 routines. - if carry != 0 { - panic("crypto/sha3: bytepad input length overflow") - } + ensure_contextless(carry == 0, "crypto/sha3: bytepad input length overflow") } // We skip this step as we are doing a byte-oriented implementation @@ -95,7 +93,7 @@ bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) { } } -encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) { +encode_string :: #force_inline proc "contextless" (ctx: ^Context, s: []byte) -> (u64, u64) { l := encode_byte_len(ctx, len(s), true) // left_encode update(ctx, s) @@ -104,13 +102,13 @@ encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) { return hi, lo } -encode_byte_len :: #force_inline proc(ctx: ^Context, l: int, is_left: bool) -> u64 { +encode_byte_len :: #force_inline proc "contextless" (ctx: ^Context, l: int, is_left: bool) -> u64 { hi, lo := bits.mul_u64(u64(l), 8) return left_right_encode(ctx, hi, lo, is_left) } @(private) -left_right_encode :: proc(ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 { +left_right_encode :: proc "contextless" (ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 { HI_OFFSET :: 1 LO_OFFSET :: HI_OFFSET + 8 RIGHT_OFFSET :: LO_OFFSET + 8 diff --git a/core/crypto/aead/aead.odin b/core/crypto/aead/aead.odin index 9b7d810e4..c8f324929 100644 --- a/core/crypto/aead/aead.odin +++ b/core/crypto/aead/aead.odin @@ -16,7 +16,7 @@ seal_oneshot :: proc(algo: Algorithm, dst, tag, key, iv, aad, plaintext: []byte, // returning true iff the authentication was successful. If authentication // fails, the destination buffer will be zeroed. // -// dst and plaintext MUST alias exactly or not at all. +// dst and ciphertext MUST alias exactly or not at all. @(require_results) open_oneshot :: proc(algo: Algorithm, dst, key, iv, aad, ciphertext, tag: []byte, impl: Implementation = nil) -> bool { ctx: Context diff --git a/core/crypto/aead/low_level.odin b/core/crypto/aead/low_level.odin index 38a0c84ba..c80574a0d 100644 --- a/core/crypto/aead/low_level.odin +++ b/core/crypto/aead/low_level.odin @@ -1,8 +1,10 @@ package aead +import "core:crypto/aegis" import "core:crypto/aes" import "core:crypto/chacha20" import "core:crypto/chacha20poly1305" +import "core:crypto/deoxysii" import "core:reflect" // Implementation is an AEAD implementation. Most callers will not need @@ -15,7 +17,7 @@ Implementation :: union { // MAX_TAG_SIZE is the maximum size tag that can be returned by any of the // Algorithms supported via this package. -MAX_TAG_SIZE :: 16 +MAX_TAG_SIZE :: 32 // Algorithm is the algorithm identifier associated with a given Context. Algorithm :: enum { @@ -25,9 +27,14 @@ Algorithm :: enum { AES_GCM_256, CHACHA20POLY1305, XCHACHA20POLY1305, + AEGIS_128L, + AEGIS_128L_256, // AEGIS-128L (256-bit tag) + AEGIS_256, + AEGIS_256_256, // AEGIS-256 (256-bit tag) + DEOXYS_II_256, } -// ALGORITM_NAMES is the Agorithm to algorithm name string. +// ALGORITM_NAMES is the Algorithm to algorithm name string. ALGORITHM_NAMES := [Algorithm]string { .Invalid = "Invalid", .AES_GCM_128 = "AES-GCM-128", @@ -35,6 +42,11 @@ ALGORITHM_NAMES := [Algorithm]string { .AES_GCM_256 = "AES-GCM-256", .CHACHA20POLY1305 = "chacha20poly1305", .XCHACHA20POLY1305 = "xchacha20poly1305", + .AEGIS_128L = "AEGIS-128L", + .AEGIS_128L_256 = "AEGIS-128L-256", + .AEGIS_256 = "AEGIS-256", + .AEGIS_256_256 = "AEGIS-256-256", + .DEOXYS_II_256 = "Deoxys-II-256", } // TAG_SIZES is the Algorithm to tag size in bytes. @@ -45,6 +57,11 @@ TAG_SIZES := [Algorithm]int { .AES_GCM_256 = aes.GCM_TAG_SIZE, .CHACHA20POLY1305 = chacha20poly1305.TAG_SIZE, .XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE, + .AEGIS_128L = aegis.TAG_SIZE_128, + .AEGIS_128L_256 = aegis.TAG_SIZE_256, + .AEGIS_256 = aegis.TAG_SIZE_128, + .AEGIS_256_256 = aegis.TAG_SIZE_256, + .DEOXYS_II_256 = deoxysii.TAG_SIZE, } // KEY_SIZES is the Algorithm to key size in bytes. @@ -55,6 +72,11 @@ KEY_SIZES := [Algorithm]int { .AES_GCM_256 = aes.KEY_SIZE_256, .CHACHA20POLY1305 = chacha20poly1305.KEY_SIZE, .XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE, + .AEGIS_128L = aegis.KEY_SIZE_128L, + .AEGIS_128L_256 = aegis.KEY_SIZE_128L, + .AEGIS_256 = aegis.KEY_SIZE_256, + .AEGIS_256_256 = aegis.KEY_SIZE_256, + .DEOXYS_II_256 = deoxysii.KEY_SIZE, } // IV_SIZES is the Algorithm to initialization vector size in bytes. @@ -67,6 +89,11 @@ IV_SIZES := [Algorithm]int { .AES_GCM_256 = aes.GCM_IV_SIZE, .CHACHA20POLY1305 = chacha20poly1305.IV_SIZE, .XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE, + .AEGIS_128L = aegis.IV_SIZE_128L, + .AEGIS_128L_256 = aegis.IV_SIZE_128L, + .AEGIS_256 = aegis.IV_SIZE_256, + .AEGIS_256_256 = aegis.IV_SIZE_256, + .DEOXYS_II_256 = deoxysii.IV_SIZE, } // Context is a concrete instantiation of a specific AEAD algorithm. @@ -75,6 +102,8 @@ Context :: struct { _impl: union { aes.Context_GCM, chacha20poly1305.Context, + aegis.Context, + deoxysii.Context, }, } @@ -86,6 +115,11 @@ _IMPL_IDS := [Algorithm]typeid { .AES_GCM_256 = typeid_of(aes.Context_GCM), .CHACHA20POLY1305 = typeid_of(chacha20poly1305.Context), .XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context), + .AEGIS_128L = typeid_of(aegis.Context), + .AEGIS_128L_256 = typeid_of(aegis.Context), + .AEGIS_256 = typeid_of(aegis.Context), + .AEGIS_256_256 = typeid_of(aegis.Context), + .DEOXYS_II_256 = typeid_of(deoxysii.Context), } // init initializes a Context with a specific AEAD Algorithm. @@ -94,9 +128,7 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat reset(ctx) } - if len(key) != KEY_SIZES[algorithm] { - panic("crypto/aead: invalid key size") - } + ensure(len(key) == KEY_SIZES[algorithm], "crypto/aead: invalid key size") // Directly specialize the union by setting the type ID (save a copy). reflect.set_union_variant_typeid( @@ -113,6 +145,12 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat case .XCHACHA20POLY1305: impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_) + case .AEGIS_128L, .AEGIS_128L_256, .AEGIS_256, .AEGIS_256_256: + impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION + aegis.init(&ctx._impl.(aegis.Context), key, impl_) + case .DEOXYS_II_256: + impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION + deoxysii.init(&ctx._impl.(deoxysii.Context), key, impl_) case .Invalid: panic("crypto/aead: uninitialized algorithm") case: @@ -127,11 +165,17 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat // // dst and plaintext MUST alias exactly or not at all. seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { + ensure(len(tag) == TAG_SIZES[ctx._algo], "crypto/aead: invalid tag size") + switch &impl in ctx._impl { case aes.Context_GCM: aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext) case chacha20poly1305.Context: chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext) + case aegis.Context: + aegis.seal(&impl, dst, tag, iv, aad, plaintext) + case deoxysii.Context: + deoxysii.seal(&impl, dst, tag, iv, aad, plaintext) case: panic("crypto/aead: uninitialized algorithm") } @@ -145,11 +189,17 @@ seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { // dst and plaintext MUST alias exactly or not at all. @(require_results) open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + ensure(len(tag) == TAG_SIZES[ctx._algo], "crypto/aead: invalid tag size") + switch &impl in ctx._impl { case aes.Context_GCM: return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag) case chacha20poly1305.Context: return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag) + case aegis.Context: + return aegis.open(&impl, dst, iv, aad, ciphertext, tag) + case deoxysii.Context: + return deoxysii.open(&impl, dst, iv, aad, ciphertext, tag) case: panic("crypto/aead: uninitialized algorithm") } @@ -163,6 +213,10 @@ reset :: proc(ctx: ^Context) { aes.reset_gcm(&impl) case chacha20poly1305.Context: chacha20poly1305.reset(&impl) + case aegis.Context: + aegis.reset(&impl) + case deoxysii.Context: + deoxysii.reset(&impl) case: // Calling reset repeatedly is fine. } diff --git a/core/crypto/aegis/aegis.odin b/core/crypto/aegis/aegis.odin new file mode 100644 index 000000000..adecce91f --- /dev/null +++ b/core/crypto/aegis/aegis.odin @@ -0,0 +1,213 @@ +/* +package aegis implements the AEGIS-128L and AEGIS-256 Authenticated +Encryption with Additional Data algorithms. + +See: +- [[ https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-12.txt ]] +*/ +package aegis + +import "core:bytes" +import "core:crypto" +import "core:crypto/aes" +import "core:mem" + +// KEY_SIZE_128L is the AEGIS-128L key size in bytes. +KEY_SIZE_128L :: 16 +// KEY_SIZE_256 is the AEGIS-256 key size in bytes. +KEY_SIZE_256 :: 32 +// IV_SIZE_128L is the AEGIS-128L IV size in bytes. +IV_SIZE_128L :: 16 +// IV_SIZE_256 is the AEGIS-256 IV size in bytes. +IV_SIZE_256 :: 32 +// TAG_SIZE_128 is the AEGIS-128L or AEGIS-256 128-bit tag size in bytes. +TAG_SIZE_128 :: 16 +// TAG_SIZE_256 is the AEGIS-128L or AEGIS-256 256-bit tag size in bytes. +TAG_SIZE_256 :: 32 + +@(private) +_RATE_128L :: 32 +@(private) +_RATE_256 :: 16 +@(private) +_RATE_MAX :: _RATE_128L + +@(private, rodata) +_C0 := [16]byte{ + 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, + 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62, +} + +@(private, rodata) +_C1 := [16]byte { + 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, + 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd, +} + +// Context is a keyed AEGIS-128L or AEGIS-256 instance. +Context :: struct { + _key: [KEY_SIZE_256]byte, + _key_len: int, + _impl: aes.Implementation, + _is_initialized: bool, +} + +@(private) +_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) { + switch len(tag) { + case TAG_SIZE_128, TAG_SIZE_256: + case: + panic("crypto/aegis: invalid tag size") + } + + iv_ok: bool + switch ctx._key_len { + case KEY_SIZE_128L: + iv_ok = len(iv) == IV_SIZE_128L + case KEY_SIZE_256: + iv_ok = len(iv) == IV_SIZE_256 + } + ensure(iv_ok,"crypto/aegis: invalid IV size") + + #assert(size_of(int) == 8 || size_of(int) <= 4) + // As A_MAX and P_MAX are both defined to be 2^61 - 1 bytes, and + // the maximum length of a slice is bound by `size_of(int)`, where + // `int` is register sized, there is no need to check AAD/text + // lengths. +} + +// init initializes a Context with the provided key, for AEGIS-128L or AEGIS-256. +init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) { + switch len(key) { + case KEY_SIZE_128L, KEY_SIZE_256: + case: + panic("crypto/aegis: invalid key size") + } + + copy(ctx._key[:], key) + ctx._key_len = len(key) + ctx._impl = impl + if ctx._impl == .Hardware && !is_hardware_accelerated() { + ctx._impl = .Portable + } + ctx._is_initialized = true +} + +// seal encrypts the plaintext and authenticates the aad and ciphertext, +// with the provided Context and iv, stores the output in dst and tag. +// +// dst and plaintext MUST alias exactly or not at all. +seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { + ensure(ctx._is_initialized) + + _validate_common_slice_sizes(ctx, tag, iv, aad, plaintext) + ensure(len(dst) == len(plaintext), "crypto/aegis: invalid destination ciphertext size") + ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aegis: dst and plaintext alias inexactly") + + switch ctx._impl { + case .Hardware: + st: State_HW + defer reset_state_hw(&st) + + init_hw(ctx, &st, iv) + + aad_len, pt_len := len(aad), len(plaintext) + if aad_len > 0 { + absorb_hw(&st, aad) + } + + if pt_len > 0 { + enc_hw(&st, dst, plaintext) + } + + finalize_hw(&st, tag, aad_len, pt_len) + case .Portable: + st: State_SW + defer reset_state_sw(&st) + + init_sw(ctx, &st, iv) + + aad_len, pt_len := len(aad), len(plaintext) + if aad_len > 0 { + absorb_sw(&st, aad) + } + + if pt_len > 0 { + enc_sw(&st, dst, plaintext) + } + + finalize_sw(&st, tag, aad_len, pt_len) + case: + panic("core/crypto/aegis: not implemented") + } +} + +// open authenticates the aad and ciphertext, and decrypts the ciphertext, +// with the provided Context, iv, and tag, and stores the output in dst, +// returning true iff the authentication was successful. If authentication +// fails, the destination buffer will be zeroed. +// +// dst and plaintext MUST alias exactly or not at all. +@(require_results) +open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + ensure(ctx._is_initialized) + + _validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext) + ensure(len(dst) == len(ciphertext), "crypto/aegis: invalid destination plaintext size") + ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aegis: dst and ciphertext alias inexactly") + + tmp: [TAG_SIZE_256]byte + derived_tag := tmp[:len(tag)] + aad_len, ct_len := len(aad), len(ciphertext) + + switch ctx._impl { + case .Hardware: + st: State_HW + defer reset_state_hw(&st) + + init_hw(ctx, &st, iv) + + if aad_len > 0 { + absorb_hw(&st, aad) + } + + if ct_len > 0 { + dec_hw(&st, dst, ciphertext) + } + + finalize_hw(&st, derived_tag, aad_len, ct_len) + case .Portable: + st: State_SW + defer reset_state_sw(&st) + + init_sw(ctx, &st, iv) + + if aad_len > 0 { + absorb_sw(&st, aad) + } + + if ct_len > 0 { + dec_sw(&st, dst, ciphertext) + } + + finalize_sw(&st, derived_tag, aad_len, ct_len) + case: + panic("core/crypto/aegis: not implemented") + } + + if crypto.compare_constant_time(tag, derived_tag) != 1 { + mem.zero_explicit(raw_data(derived_tag), len(derived_tag)) + mem.zero_explicit(raw_data(dst), ct_len) + return false + } + + return true +} + +// reset sanitizes the Context. The Context must be +// re-initialized to be used again. +reset :: proc "contextless" (ctx: ^Context) { + mem.zero_explicit(&ctx._key, len(ctx._key)) + ctx._key_len = 0 + ctx._is_initialized = false +} diff --git a/core/crypto/aegis/aegis_impl_ct64.odin b/core/crypto/aegis/aegis_impl_ct64.odin new file mode 100644 index 000000000..4813b37ec --- /dev/null +++ b/core/crypto/aegis/aegis_impl_ct64.odin @@ -0,0 +1,452 @@ +package aegis + +import aes "core:crypto/_aes/ct64" +import "core:encoding/endian" +import "core:mem" + +// This uses the bitlsiced 64-bit general purpose register SWAR AES +// round function. The intermediate state is stored in interleaved +// but NOT orthogonalized form, as leaving things in the orthgonalized +// format would overly complicate the update implementation. +// +// Note/perf: Per Frank Denis and a review of the specification, it is +// possible to gain slightly more performance by leaving the state in +// orthogonalized form while doing initialization, finalization, and +// absorbing AAD. This implementation opts out of those optimizations +// for the sake of simplicity. +// +// The update function leverages the paralleism (4xblocks) at once. + +@(private) +State_SW :: struct { + s0_0, s0_1: u64, + s1_0, s1_1: u64, + s2_0, s2_1: u64, + s3_0, s3_1: u64, + s4_0, s4_1: u64, + s5_0, s5_1: u64, + s6_0, s6_1: u64, + s7_0, s7_1: u64, + q_k, q_b: [8]u64, + rate: int, +} + +@(private) +init_sw :: proc "contextless" (ctx: ^Context, st: ^State_SW, iv: []byte) { + switch ctx._key_len { + case KEY_SIZE_128L: + key_0, key_1 := aes.load_interleaved(ctx._key[:16]) + iv_0, iv_1 := aes.load_interleaved(iv) + + st.s0_0, st.s0_1 = aes.xor_interleaved(key_0, key_1, iv_0, iv_1) + st.s1_0, st.s1_1 = aes.load_interleaved(_C1[:]) + st.s2_0, st.s2_1 = aes.load_interleaved(_C0[:]) + st.s3_0, st.s3_1 = st.s1_0, st.s1_1 + st.s4_0, st.s4_1 = st.s0_0, st.s0_1 + st.s5_0, st.s5_1 = aes.xor_interleaved(key_0, key_1, st.s2_0, st.s2_1) + st.s6_0, st.s6_1 = aes.xor_interleaved(key_0, key_1, st.s1_0, st.s1_1) + st.s7_0, st.s7_1 = st.s5_0, st.s5_1 + st.rate = _RATE_128L + + for _ in 0 ..< 10 { + update_sw_128l(st, iv_0, iv_1, key_0, key_1) + } + case KEY_SIZE_256: + k0_0, k0_1 := aes.load_interleaved(ctx._key[:16]) + k1_0, k1_1 := aes.load_interleaved(ctx._key[16:]) + n0_0, n0_1 := aes.load_interleaved(iv[:16]) + n1_0, n1_1 := aes.load_interleaved(iv[16:]) + + st.s0_0, st.s0_1 = aes.xor_interleaved(k0_0, k0_1, n0_0, n0_1) + st.s1_0, st.s1_1 = aes.xor_interleaved(k1_0, k1_1, n1_0, n1_1) + st.s2_0, st.s2_1 = aes.load_interleaved(_C1[:]) + st.s3_0, st.s3_1 = aes.load_interleaved(_C0[:]) + st.s4_0, st.s4_1 = aes.xor_interleaved(k0_0, k0_1, st.s3_0, st.s3_1) + st.s5_0, st.s5_1 = aes.xor_interleaved(k1_0, k1_1, st.s2_0, st.s2_1) + st.rate = _RATE_256 + + u0_0, u0_1, u1_0, u1_1 := st.s0_0, st.s0_1, st.s1_0, st.s1_1 + for _ in 0 ..< 4 { + update_sw_256(st, k0_0, k0_1) + update_sw_256(st, k1_0, k1_1) + update_sw_256(st, u0_0, u0_1) + update_sw_256(st, u1_0, u1_1) + } + } +} + +@(private = "file") +update_sw_128l :: proc "contextless" (st: ^State_SW, m0_0, m0_1, m1_0, m1_1: u64) { + st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m0_0, m0_1) + st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1 + st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1 + st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1 + aes.orthogonalize(&st.q_k) + + st.q_b[0], st.q_b[4] = st.s7_0, st.s7_1 + st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1 + st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1 + st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1 + aes.orthogonalize(&st.q_b) + + aes.sub_bytes(&st.q_b) + aes.shift_rows(&st.q_b) + aes.mix_columns(&st.q_b) + aes.add_round_key(&st.q_b, st.q_k[:]) + aes.orthogonalize(&st.q_b) + + st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4] + st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5] + st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6] + s3_0, s3_1 := st.q_b[3], st.q_b[7] + + st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s4_0, st.s4_1, m1_0, m1_1) + st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1 + st.q_k[2], st.q_k[6] = st.s6_0, st.s6_1 + st.q_k[3], st.q_k[7] = st.s7_0, st.s7_1 + aes.orthogonalize(&st.q_k) + + st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1 + st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1 + st.q_b[2], st.q_b[6] = st.s5_0, st.s5_1 + st.q_b[3], st.q_b[7] = st.s6_0, st.s6_1 + aes.orthogonalize(&st.q_b) + + aes.sub_bytes(&st.q_b) + aes.shift_rows(&st.q_b) + aes.mix_columns(&st.q_b) + aes.add_round_key(&st.q_b, st.q_k[:]) + aes.orthogonalize(&st.q_b) + + st.s3_0, st.s3_1 = s3_0, s3_1 + st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4] + st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5] + st.s6_0, st.s6_1 = st.q_b[2], st.q_b[6] + st.s7_0, st.s7_1 = st.q_b[3], st.q_b[7] +} + +@(private = "file") +update_sw_256 :: proc "contextless" (st: ^State_SW, m_0, m_1: u64) { + st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m_0, m_1) + st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1 + st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1 + st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1 + aes.orthogonalize(&st.q_k) + + st.q_b[0], st.q_b[4] = st.s5_0, st.s5_1 + st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1 + st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1 + st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1 + aes.orthogonalize(&st.q_b) + + aes.sub_bytes(&st.q_b) + aes.shift_rows(&st.q_b) + aes.mix_columns(&st.q_b) + aes.add_round_key(&st.q_b, st.q_k[:]) + aes.orthogonalize(&st.q_b) + + st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4] + st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5] + st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6] + s3_0, s3_1 := st.q_b[3], st.q_b[7] + + st.q_k[0], st.q_k[4] = st.s4_0, st.s4_1 + st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1 + aes.orthogonalize(&st.q_k) + + st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1 + st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1 + aes.orthogonalize(&st.q_b) + + aes.sub_bytes(&st.q_b) + aes.shift_rows(&st.q_b) + aes.mix_columns(&st.q_b) + aes.add_round_key(&st.q_b, st.q_k[:]) + aes.orthogonalize(&st.q_b) + + st.s3_0, st.s3_1 = s3_0, s3_1 + st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4] + st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5] +} + +@(private = "file") +absorb_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) #no_bounds_check { + t0_0, t0_1 := aes.load_interleaved(ai[:16]) + t1_0, t1_1 := aes.load_interleaved(ai[16:]) + update_sw_128l(st, t0_0, t0_1, t1_0, t1_1) +} + +@(private = "file") +absorb_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) { + m_0, m_1 := aes.load_interleaved(ai) + update_sw_256(st, m_0, m_1) +} + +@(private) +absorb_sw :: proc "contextless" (st: ^State_SW, aad: []byte) #no_bounds_check { + ai, l := aad, len(aad) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + absorb_sw_128l(st, ai) + ai = ai[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + absorb_sw_256(st, ai) + + ai = ai[_RATE_256:] + l -= _RATE_256 + } + } + + // Pad out the remainder with `0`s till it is rate sized. + if l > 0 { + tmp: [_RATE_MAX]byte // AAD is not confidential. + copy(tmp[:], ai) + switch st.rate { + case _RATE_128L: + absorb_sw_128l(st, tmp[:]) + case _RATE_256: + absorb_sw_256(st, tmp[:]) + } + } +} + +@(private = "file", require_results) +z_sw_128l :: proc "contextless" (st: ^State_SW) -> (u64, u64, u64, u64) { + z0_0, z0_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1) + z0_0, z0_1 = aes.xor_interleaved(st.s1_0, st.s1_1, z0_0, z0_1) + z0_0, z0_1 = aes.xor_interleaved(st.s6_0, st.s6_1, z0_0, z0_1) + + z1_0, z1_1 := aes.and_interleaved(st.s6_0, st.s6_1, st.s7_0, st.s7_1) + z1_0, z1_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z1_0, z1_1) + z1_0, z1_1 = aes.xor_interleaved(st.s2_0, st.s2_1, z1_0, z1_1) + + return z0_0, z0_1, z1_0, z1_1 +} + +@(private = "file", require_results) +z_sw_256 :: proc "contextless" (st: ^State_SW) -> (u64, u64) { + z_0, z_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1) + z_0, z_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z_0, z_1) + z_0, z_1 = aes.xor_interleaved(st.s4_0, st.s4_1, z_0, z_1) + return aes.xor_interleaved(st.s1_0, st.s1_1, z_0, z_1) +} + +@(private = "file") +enc_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check { + z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st) + + t0_0, t0_1 := aes.load_interleaved(xi[:16]) + t1_0, t1_1 := aes.load_interleaved(xi[16:]) + update_sw_128l(st, t0_0, t0_1, t1_0, t1_1) + + out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1) + out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1) + aes.store_interleaved(ci[:16], out0_0, out0_1) + aes.store_interleaved(ci[16:], out1_0, out1_1) +} + +@(private = "file") +enc_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check { + z_0, z_1 := z_sw_256(st) + + xi_0, xi_1 := aes.load_interleaved(xi) + update_sw_256(st, xi_0, xi_1) + + ci_0, ci_1 := aes.xor_interleaved(xi_0, xi_1, z_0, z_1) + aes.store_interleaved(ci, ci_0, ci_1) +} + +@(private) +enc_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check { + ci, xi, l := dst, src, len(src) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + enc_sw_128l(st, ci, xi) + ci = ci[_RATE_128L:] + xi = xi[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + enc_sw_256(st, ci, xi) + ci = ci[_RATE_256:] + xi = xi[_RATE_256:] + l -= _RATE_256 + } + } + + // Pad out the remainder with `0`s till it is rate sized. + if l > 0 { + tmp: [_RATE_MAX]byte // Ciphertext is not confidential. + copy(tmp[:], xi) + switch st.rate { + case _RATE_128L: + enc_sw_128l(st, tmp[:], tmp[:]) + case _RATE_256: + enc_sw_256(st, tmp[:], tmp[:]) + } + copy(ci, tmp[:l]) + } +} + +@(private = "file") +dec_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check { + z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st) + + t0_0, t0_1 := aes.load_interleaved(ci[:16]) + t1_0, t1_1 := aes.load_interleaved(ci[16:]) + out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1) + out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1) + + update_sw_128l(st, out0_0, out0_1, out1_0, out1_1) + aes.store_interleaved(xi[:16], out0_0, out0_1) + aes.store_interleaved(xi[16:], out1_0, out1_1) +} + +@(private = "file") +dec_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check { + z_0, z_1 := z_sw_256(st) + + ci_0, ci_1 := aes.load_interleaved(ci) + xi_0, xi_1 := aes.xor_interleaved(ci_0, ci_1, z_0, z_1) + + update_sw_256(st, xi_0, xi_1) + aes.store_interleaved(xi, xi_0, xi_1) +} + +@(private = "file") +dec_partial_sw_128l :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check { + tmp: [_RATE_128L]byte + defer mem.zero_explicit(&tmp, size_of(tmp)) + + z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st) + copy(tmp[:], cn) + + t0_0, t0_1 := aes.load_interleaved(tmp[:16]) + t1_0, t1_1 := aes.load_interleaved(tmp[16:]) + out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1) + out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1) + + aes.store_interleaved(tmp[:16], out0_0, out0_1) + aes.store_interleaved(tmp[16:], out1_0, out1_1) + copy(xn, tmp[:]) + + for off := len(xn); off < _RATE_128L; off += 1 { + tmp[off] = 0 + } + out0_0, out0_1 = aes.load_interleaved(tmp[:16]) + out1_0, out1_1 = aes.load_interleaved(tmp[16:]) + update_sw_128l(st, out0_0, out0_1, out1_0, out1_1) +} + +@(private = "file") +dec_partial_sw_256 :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check { + tmp: [_RATE_256]byte + defer mem.zero_explicit(&tmp, size_of(tmp)) + + z_0, z_1 := z_sw_256(st) + copy(tmp[:], cn) + + cn_0, cn_1 := aes.load_interleaved(tmp[:]) + xn_0, xn_1 := aes.xor_interleaved(cn_0, cn_1, z_0, z_1) + + aes.store_interleaved(tmp[:], xn_0, xn_1) + copy(xn, tmp[:]) + + for off := len(xn); off < _RATE_256; off += 1 { + tmp[off] = 0 + } + xn_0, xn_1 = aes.load_interleaved(tmp[:]) + update_sw_256(st, xn_0, xn_1) +} + +@(private) +dec_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check { + xi, ci, l := dst, src, len(src) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + dec_sw_128l(st, xi, ci) + xi = xi[_RATE_128L:] + ci = ci[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + dec_sw_256(st, xi, ci) + xi = xi[_RATE_256:] + ci = ci[_RATE_256:] + l -= _RATE_256 + } + } + + // Process the remainder. + if l > 0 { + switch st.rate { + case _RATE_128L: + dec_partial_sw_128l(st, xi, ci) + case _RATE_256: + dec_partial_sw_256(st, xi, ci) + } + } +} + +@(private) +finalize_sw :: proc "contextless" (st: ^State_SW, tag: []byte, ad_len, msg_len: int) { + tmp: [16]byte + endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8) + endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8) + + t_0, t_1 := aes.load_interleaved(tmp[:]) + + t0_0, t0_1, t1_0, t1_1: u64 = ---, ---, ---, --- + switch st.rate { + case _RATE_128L: + t_0, t_1 = aes.xor_interleaved(st.s2_0, st.s2_1, t_0, t_1) + for _ in 0 ..< 7 { + update_sw_128l(st, t_0, t_1, t_0, t_1) + } + + t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1) + t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1) + t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s3_0, st.s3_1) + + t1_0, t1_1 = aes.xor_interleaved(st.s4_0, st.s4_1, st.s5_0, st.s5_1) + t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s6_0, st.s6_1) + if len(tag) == TAG_SIZE_256 { + t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s7_0, st.s7_1) + } + case _RATE_256: + t_0, t_1 = aes.xor_interleaved(st.s3_0, st.s3_1, t_0, t_1) + for _ in 0 ..< 7 { + update_sw_256(st, t_0, t_1) + } + + t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1) + t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1) + + t1_0, t1_1 = aes.xor_interleaved(st.s3_0, st.s3_1, st.s4_0, st.s4_1) + t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s5_0, st.s5_1) + } + switch len(tag) { + case TAG_SIZE_128: + t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, t1_0, t1_1) + aes.store_interleaved(tag, t0_0, t0_1) + case TAG_SIZE_256: + aes.store_interleaved(tag[:16], t0_0, t0_1) + aes.store_interleaved(tag[16:], t1_0, t1_1) + } +} + +@(private) +reset_state_sw :: proc "contextless" (st: ^State_SW) { + mem.zero_explicit(st, size_of(st^)) +} diff --git a/core/crypto/aegis/aegis_impl_hw_gen.odin b/core/crypto/aegis/aegis_impl_hw_gen.odin new file mode 100644 index 000000000..5ec2f3d6e --- /dev/null +++ b/core/crypto/aegis/aegis_impl_hw_gen.odin @@ -0,0 +1,44 @@ +#+build !amd64 +package aegis + +@(private = "file") +ERR_HW_NOT_SUPPORTED :: "crypto/aegis: hardware implementation unsupported" + +@(private) +State_HW :: struct {} + +// is_hardware_accelerated returns true iff hardware accelerated AEGIS +// is supported. +is_hardware_accelerated :: proc "contextless" () -> bool { + return false +} + +@(private) +init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +reset_state_hw :: proc "contextless" (st: ^State_HW) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} diff --git a/core/crypto/aegis/aegis_impl_hw_intel.odin b/core/crypto/aegis/aegis_impl_hw_intel.odin new file mode 100644 index 000000000..5334f3258 --- /dev/null +++ b/core/crypto/aegis/aegis_impl_hw_intel.odin @@ -0,0 +1,389 @@ +#+build amd64 +package aegis + +import "base:intrinsics" +import "core:crypto/aes" +import "core:encoding/endian" +import "core:mem" +import "core:simd/x86" + +@(private) +State_HW :: struct { + s0: x86.__m128i, + s1: x86.__m128i, + s2: x86.__m128i, + s3: x86.__m128i, + s4: x86.__m128i, + s5: x86.__m128i, + s6: x86.__m128i, + s7: x86.__m128i, + rate: int, +} + +// is_hardware_accelerated returns true iff hardware accelerated AEGIS +// is supported. +is_hardware_accelerated :: proc "contextless" () -> bool { + return aes.is_hardware_accelerated() +} + +@(private, enable_target_feature = "sse2,aes") +init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) { + switch ctx._key_len { + case KEY_SIZE_128L: + key := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0])) + iv := intrinsics.unaligned_load((^x86.__m128i)(raw_data(iv))) + + st.s0 = x86._mm_xor_si128(key, iv) + st.s1 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0])) + st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0])) + st.s3 = st.s1 + st.s4 = st.s0 + st.s5 = x86._mm_xor_si128(key, st.s2) // key ^ C0 + st.s6 = x86._mm_xor_si128(key, st.s1) // key ^ C1 + st.s7 = st.s5 + st.rate = _RATE_128L + + for _ in 0 ..< 10 { + update_hw_128l(st, iv, key) + } + case KEY_SIZE_256: + k0 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0])) + k1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[16])) + n0 := intrinsics.unaligned_load((^x86.__m128i)(&iv[0])) + n1 := intrinsics.unaligned_load((^x86.__m128i)(&iv[16])) + + st.s0 = x86._mm_xor_si128(k0, n0) + st.s1 = x86._mm_xor_si128(k1, n1) + st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0])) + st.s3 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0])) + st.s4 = x86._mm_xor_si128(k0, st.s3) // k0 ^ C0 + st.s5 = x86._mm_xor_si128(k1, st.s2) // k1 ^ C1 + st.rate = _RATE_256 + + u0, u1 := st.s0, st.s1 + for _ in 0 ..< 4 { + update_hw_256(st, k0) + update_hw_256(st, k1) + update_hw_256(st, u0) + update_hw_256(st, u1) + } + } +} + +@(private = "file", enable_target_feature = "sse2,aes") +update_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, m0, m1: x86.__m128i) { + s0_ := x86._mm_aesenc_si128(st.s7, x86._mm_xor_si128(st.s0, m0)) + s1_ := x86._mm_aesenc_si128(st.s0, st.s1) + s2_ := x86._mm_aesenc_si128(st.s1, st.s2) + s3_ := x86._mm_aesenc_si128(st.s2, st.s3) + s4_ := x86._mm_aesenc_si128(st.s3, x86._mm_xor_si128(st.s4, m1)) + s5_ := x86._mm_aesenc_si128(st.s4, st.s5) + s6_ := x86._mm_aesenc_si128(st.s5, st.s6) + s7_ := x86._mm_aesenc_si128(st.s6, st.s7) + st.s0, st.s1, st.s2, st.s3, st.s4, st.s5, st.s6, st.s7 = s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_ +} + +@(private = "file", enable_target_feature = "sse2,aes") +update_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, m: x86.__m128i) { + s0_ := x86._mm_aesenc_si128(st.s5, x86._mm_xor_si128(st.s0, m)) + s1_ := x86._mm_aesenc_si128(st.s0, st.s1) + s2_ := x86._mm_aesenc_si128(st.s1, st.s2) + s3_ := x86._mm_aesenc_si128(st.s2, st.s3) + s4_ := x86._mm_aesenc_si128(st.s3, st.s4) + s5_ := x86._mm_aesenc_si128(st.s4, st.s5) + st.s0, st.s1, st.s2, st.s3, st.s4, st.s5 = s0_, s1_, s2_, s3_, s4_, s5_ +} + +@(private = "file", enable_target_feature = "sse2,aes") +absorb_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) { + t0 := intrinsics.unaligned_load((^x86.__m128i)(&ai[0])) + t1 := intrinsics.unaligned_load((^x86.__m128i)(&ai[16])) + update_hw_128l(st, t0, t1) +} + +@(private = "file", enable_target_feature = "sse2,aes") +absorb_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) { + m := intrinsics.unaligned_load((^x86.__m128i)(&ai[0])) + update_hw_256(st, m) +} + +@(private, enable_target_feature = "sse2,aes") +absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) #no_bounds_check { + ai, l := aad, len(aad) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + absorb_hw_128l(st, ai) + ai = ai[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + absorb_hw_256(st, ai) + + ai = ai[_RATE_256:] + l -= _RATE_256 + } + } + + // Pad out the remainder with `0`s till it is rate sized. + if l > 0 { + tmp: [_RATE_MAX]byte // AAD is not confidential. + copy(tmp[:], ai) + switch st.rate { + case _RATE_128L: + absorb_hw_128l(st, tmp[:]) + case _RATE_256: + absorb_hw_256(st, tmp[:]) + } + } +} + +@(private = "file", enable_target_feature = "sse2", require_results) +z_hw_128l :: #force_inline proc "contextless" (st: ^State_HW) -> (x86.__m128i, x86.__m128i) { + z0 := x86._mm_xor_si128( + st.s6, + x86._mm_xor_si128( + st.s1, + x86._mm_and_si128(st.s2, st.s3), + ), + ) + z1 := x86._mm_xor_si128( + st.s2, + x86._mm_xor_si128( + st.s5, + x86._mm_and_si128(st.s6, st.s7), + ), + ) + return z0, z1 +} + +@(private = "file", enable_target_feature = "sse2", require_results) +z_hw_256 :: #force_inline proc "contextless" (st: ^State_HW) -> x86.__m128i { + return x86._mm_xor_si128( + st.s1, + x86._mm_xor_si128( + st.s4, + x86._mm_xor_si128( + st.s5, + x86._mm_and_si128(st.s2, st.s3), + ), + ), + ) +} + +@(private = "file", enable_target_feature = "sse2,aes") +enc_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check { + z0, z1 := z_hw_128l(st) + + t0 := intrinsics.unaligned_load((^x86.__m128i)(&xi[0])) + t1 := intrinsics.unaligned_load((^x86.__m128i)(&xi[16])) + update_hw_128l(st, t0, t1) + + out0 := x86._mm_xor_si128(t0, z0) + out1 := x86._mm_xor_si128(t1, z1) + intrinsics.unaligned_store((^x86.__m128i)(&ci[0]), out0) + intrinsics.unaligned_store((^x86.__m128i)(&ci[16]), out1) +} + +@(private = "file", enable_target_feature = "sse2,aes") +enc_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check { + z := z_hw_256(st) + + xi_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(xi))) + update_hw_256(st, xi_) + + ci_ := x86._mm_xor_si128(xi_, z) + intrinsics.unaligned_store((^x86.__m128i)(raw_data(ci)), ci_) +} + +@(private, enable_target_feature = "sse2,aes") +enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check { + ci, xi, l := dst, src, len(src) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + enc_hw_128l(st, ci, xi) + ci = ci[_RATE_128L:] + xi = xi[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + enc_hw_256(st, ci, xi) + ci = ci[_RATE_256:] + xi = xi[_RATE_256:] + l -= _RATE_256 + } + } + + // Pad out the remainder with `0`s till it is rate sized. + if l > 0 { + tmp: [_RATE_MAX]byte // Ciphertext is not confidential. + copy(tmp[:], xi) + switch st.rate { + case _RATE_128L: + enc_hw_128l(st, tmp[:], tmp[:]) + case _RATE_256: + enc_hw_256(st, tmp[:], tmp[:]) + } + copy(ci, tmp[:l]) + } +} + +@(private = "file", enable_target_feature = "sse2,aes") +dec_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check { + z0, z1 := z_hw_128l(st) + + t0 := intrinsics.unaligned_load((^x86.__m128i)(&ci[0])) + t1 := intrinsics.unaligned_load((^x86.__m128i)(&ci[16])) + out0 := x86._mm_xor_si128(t0, z0) + out1 := x86._mm_xor_si128(t1, z1) + + update_hw_128l(st, out0, out1) + intrinsics.unaligned_store((^x86.__m128i)(&xi[0]), out0) + intrinsics.unaligned_store((^x86.__m128i)(&xi[16]), out1) +} + +@(private = "file", enable_target_feature = "sse2,aes") +dec_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check { + z := z_hw_256(st) + + ci_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(ci))) + xi_ := x86._mm_xor_si128(ci_, z) + + update_hw_256(st, xi_) + intrinsics.unaligned_store((^x86.__m128i)(raw_data(xi)), xi_) +} + +@(private = "file", enable_target_feature = "sse2,aes") +dec_partial_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check { + tmp: [_RATE_128L]byte + defer mem.zero_explicit(&tmp, size_of(tmp)) + + z0, z1 := z_hw_128l(st) + copy(tmp[:], cn) + + t0 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) + t1 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[16])) + out0 := x86._mm_xor_si128(t0, z0) + out1 := x86._mm_xor_si128(t1, z1) + + intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), out0) + intrinsics.unaligned_store((^x86.__m128i)(&tmp[16]), out1) + copy(xn, tmp[:]) + + for off := len(xn); off < _RATE_128L; off += 1 { + tmp[off] = 0 + } + out0 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) // v0 + out1 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[16])) // v1 + update_hw_128l(st, out0, out1) +} + +@(private = "file", enable_target_feature = "sse2,aes") +dec_partial_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check { + tmp: [_RATE_256]byte + defer mem.zero_explicit(&tmp, size_of(tmp)) + + z := z_hw_256(st) + copy(tmp[:], cn) + + cn_ := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) + xn_ := x86._mm_xor_si128(cn_, z) + + intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), xn_) + copy(xn, tmp[:]) + + for off := len(xn); off < _RATE_256; off += 1 { + tmp[off] = 0 + } + xn_ = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) + update_hw_256(st, xn_) +} + +@(private, enable_target_feature = "sse2,aes") +dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check { + xi, ci, l := dst, src, len(src) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + dec_hw_128l(st, xi, ci) + xi = xi[_RATE_128L:] + ci = ci[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + dec_hw_256(st, xi, ci) + xi = xi[_RATE_256:] + ci = ci[_RATE_256:] + l -= _RATE_256 + } + } + + // Process the remainder. + if l > 0 { + switch st.rate { + case _RATE_128L: + dec_partial_hw_128l(st, xi, ci) + case _RATE_256: + dec_partial_hw_256(st, xi, ci) + } + } +} + +@(private, enable_target_feature = "sse2,aes") +finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) { + tmp: [16]byte + endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8) + endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8) + + t := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) + + t0, t1: x86.__m128i = ---, --- + switch st.rate { + case _RATE_128L: + t = x86._mm_xor_si128(st.s2, t) + for _ in 0 ..< 7 { + update_hw_128l(st, t, t) + } + + t0 = x86._mm_xor_si128(st.s0, st.s1) + t0 = x86._mm_xor_si128(t0, st.s2) + t0 = x86._mm_xor_si128(t0, st.s3) + + t1 = x86._mm_xor_si128(st.s4, st.s5) + t1 = x86._mm_xor_si128(t1, st.s6) + if len(tag) == TAG_SIZE_256 { + t1 = x86._mm_xor_si128(t1, st.s7) + } + case _RATE_256: + t = x86._mm_xor_si128(st.s3, t) + for _ in 0 ..< 7 { + update_hw_256(st, t) + } + + t0 = x86._mm_xor_si128(st.s0, st.s1) + t0 = x86._mm_xor_si128(t0, st.s2) + + t1 = x86._mm_xor_si128(st.s3, st.s4) + t1 = x86._mm_xor_si128(t1, st.s5) + } + switch len(tag) { + case TAG_SIZE_128: + t0 = x86._mm_xor_si128(t0, t1) + intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0) + case TAG_SIZE_256: + intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0) + intrinsics.unaligned_store((^x86.__m128i)(&tag[16]), t1) + } +} + +@(private) +reset_state_hw :: proc "contextless" (st: ^State_HW) { + mem.zero_explicit(st, size_of(st^)) +} diff --git a/core/crypto/aes/aes_ctr.odin b/core/crypto/aes/aes_ctr.odin index 20b75e57f..a74133235 100644 --- a/core/crypto/aes/aes_ctr.odin +++ b/core/crypto/aes/aes_ctr.odin @@ -21,9 +21,7 @@ Context_CTR :: struct { // init_ctr initializes a Context_CTR with the provided key and IV. init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) { - if len(iv) != CTR_IV_SIZE { - panic("crypto/aes: invalid CTR IV size") - } + ensure(len(iv) == CTR_IV_SIZE, "crypto/aes: invalid CTR IV size") init_impl(&ctx._impl, key, impl) ctx._off = BLOCK_SIZE @@ -36,16 +34,14 @@ init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTAT // keystream, and writes the resulting output to dst. dst and src MUST // alias exactly or not at all. xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) src, dst := src, dst if dst_len := len(dst); dst_len < len(src) { src = src[:dst_len] } - if bytes.alias_inexactly(dst, src) { - panic("crypto/aes: dst and src alias inexactly") - } + ensure(!bytes.alias_inexactly(dst, src), "crypto/aes: dst and src alias inexactly") #no_bounds_check for remaining := len(src); remaining > 0; { // Process multiple blocks at once @@ -82,7 +78,7 @@ xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) { // keystream_bytes_ctr fills dst with the raw AES-CTR keystream output. keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) dst := dst #no_bounds_check for remaining := len(dst); remaining > 0; { diff --git a/core/crypto/aes/aes_ecb.odin b/core/crypto/aes/aes_ecb.odin index 32476006c..cac62de5d 100644 --- a/core/crypto/aes/aes_ecb.odin +++ b/core/crypto/aes/aes_ecb.odin @@ -19,11 +19,9 @@ init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := DEFAULT_IMPLEMENTATION) // encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst. encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) { - assert(ctx._is_initialized) - - if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE { - panic("crypto/aes: invalid buffer size(s)") - } + ensure(ctx._is_initialized) + ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid dst size") + ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid src size") switch &impl in ctx._impl { case ct64.Context: @@ -35,11 +33,9 @@ encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) { // decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst. decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) { - assert(ctx._is_initialized) - - if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE { - panic("crypto/aes: invalid buffer size(s)") - } + ensure(ctx._is_initialized) + ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid dst size") + ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid src size") switch &impl in ctx._impl { case ct64.Context: diff --git a/core/crypto/aes/aes_gcm.odin b/core/crypto/aes/aes_gcm.odin index 8616821ce..d349aa353 100644 --- a/core/crypto/aes/aes_gcm.odin +++ b/core/crypto/aes/aes_gcm.odin @@ -36,15 +36,11 @@ init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := DEFAULT_IMPLEMENTATION) // // dst and plaintext MUST alias exactly or not at all. seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) gcm_validate_common_slice_sizes(tag, iv, aad, plaintext) - if len(dst) != len(plaintext) { - panic("crypto/aes: invalid destination ciphertext size") - } - if bytes.alias_inexactly(dst, plaintext) { - panic("crypto/aes: dst and plaintext alias inexactly") - } + ensure(len(dst) == len(plaintext), "crypto/aes: invalid destination ciphertext size") + ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aes: dst and plaintext alias inexactly") if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw { gcm_seal_hw(&impl, dst, tag, iv, aad, plaintext) @@ -76,15 +72,11 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) { // dst and plaintext MUST alias exactly or not at all. @(require_results) open_gcm :: proc(ctx: ^Context_GCM, dst, iv, aad, ciphertext, tag: []byte) -> bool { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) gcm_validate_common_slice_sizes(tag, iv, aad, ciphertext) - if len(dst) != len(ciphertext) { - panic("crypto/aes: invalid destination plaintext size") - } - if bytes.alias_inexactly(dst, ciphertext) { - panic("crypto/aes: dst and ciphertext alias inexactly") - } + ensure(len(dst) == len(ciphertext), "crypto/aes: invalid destination plaintext size") + ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aes: dst and ciphertext alias inexactly") if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw { return gcm_open_hw(&impl, dst, iv, aad, ciphertext, tag) @@ -122,21 +114,13 @@ reset_gcm :: proc "contextless" (ctx: ^Context_GCM) { @(private = "file") gcm_validate_common_slice_sizes :: proc(tag, iv, aad, text: []byte) { - if len(tag) != GCM_TAG_SIZE { - panic("crypto/aes: invalid GCM tag size") - } + ensure(len(tag) == GCM_TAG_SIZE, "crypto/aes: invalid GCM tag size") // The specification supports IVs in the range [1, 2^64) bits. - if l := len(iv); l == 0 || u64(l) >= GCM_IV_SIZE_MAX { - panic("crypto/aes: invalid GCM IV size") - } + ensure(len(iv) == 0 || u64(len(iv)) <= GCM_IV_SIZE_MAX, "crypto/aes: invalid GCM IV size") - if aad_len := u64(len(aad)); aad_len > GCM_A_MAX { - panic("crypto/aes: oversized GCM aad") - } - if text_len := u64(len(text)); text_len > GCM_P_MAX { - panic("crypto/aes: oversized GCM src data") - } + ensure(u64(len(aad)) <= GCM_A_MAX, "crypto/aes: oversized GCM aad") + ensure(u64(len(text)) <= GCM_P_MAX, "crypto/aes: oversized GCM data") } @(private = "file") diff --git a/core/crypto/aes/aes_gcm_hw_intel.odin b/core/crypto/aes/aes_gcm_hw_intel.odin index 4cb5ab3b2..3982d1452 100644 --- a/core/crypto/aes/aes_gcm_hw_intel.odin +++ b/core/crypto/aes/aes_gcm_hw_intel.odin @@ -235,7 +235,7 @@ gctr_hw :: proc( // BUG: Sticking this in gctr_hw (like the other implementations) crashes // the compiler. // -// src/check_expr.cpp(7892): Assertion Failure: `c->curr_proc_decl->entity` +// src/check_expr.cpp(8104): Assertion Failure: `c->curr_proc_decl->entity` @(private = "file", enable_target_feature = "sse4.1") hw_inc_ctr32 :: #force_inline proc "contextless" (src: ^x86.__m128i, ctr: u32) -> (x86.__m128i, u32) { ret := x86._mm_insert_epi32(src^, i32(intrinsics.byte_swap(ctr)), 3) diff --git a/core/crypto/blake2b/blake2b.odin b/core/crypto/blake2b/blake2b.odin index 74396b103..3b3fc6649 100644 --- a/core/crypto/blake2b/blake2b.odin +++ b/core/crypto/blake2b/blake2b.odin @@ -18,7 +18,7 @@ package blake2b import "../_blake2" // DIGEST_SIZE is the BLAKE2b digest size in bytes. -DIGEST_SIZE :: 64 +DIGEST_SIZE :: _blake2.BLAKE2B_SIZE // BLOCK_SIZE is the BLAKE2b block size in bytes. BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE @@ -27,9 +27,11 @@ BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE Context :: _blake2.Blake2b_Context // init initializes a Context with the default BLAKE2b config. -init :: proc(ctx: ^Context) { +init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) { + ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2b: invalid digest size") + cfg: _blake2.Blake2_Config - cfg.size = _blake2.BLAKE2B_SIZE + cfg.size = u8(digest_size) _blake2.init(ctx, &cfg) } diff --git a/core/crypto/blake2s/blake2s.odin b/core/crypto/blake2s/blake2s.odin index 339ddf027..9bbd44541 100644 --- a/core/crypto/blake2s/blake2s.odin +++ b/core/crypto/blake2s/blake2s.odin @@ -18,7 +18,7 @@ package blake2s import "../_blake2" // DIGEST_SIZE is the BLAKE2s digest size in bytes. -DIGEST_SIZE :: 32 +DIGEST_SIZE :: _blake2.BLAKE2S_SIZE // BLOCK_SIZE is the BLAKE2s block size in bytes. BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE @@ -27,9 +27,11 @@ BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE Context :: _blake2.Blake2s_Context // init initializes a Context with the default BLAKE2s config. -init :: proc(ctx: ^Context) { +init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) { + ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2s: invalid digest size") + cfg: _blake2.Blake2_Config - cfg.size = _blake2.BLAKE2S_SIZE + cfg.size = u8(digest_size) _blake2.init(ctx, &cfg) } diff --git a/core/crypto/chacha20/chacha20.odin b/core/crypto/chacha20/chacha20.odin index dfab2bc65..e8d67eb3e 100644 --- a/core/crypto/chacha20/chacha20.odin +++ b/core/crypto/chacha20/chacha20.odin @@ -27,12 +27,8 @@ Context :: struct { // init inititializes a Context for ChaCha20 or XChaCha20 with the provided // key and iv. init :: proc(ctx: ^Context, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) { - if len(key) != KEY_SIZE { - panic("crypto/chacha20: invalid (X)ChaCha20 key size") - } - if l := len(iv); l != IV_SIZE && l != XIV_SIZE { - panic("crypto/chacha20: invalid (X)ChaCha20 IV size") - } + ensure(len(key) == KEY_SIZE, "crypto/chacha20: invalid (X)ChaCha20 key size") + ensure(len(iv) == IV_SIZE || len(iv) == XIV_SIZE, "crypto/chacha20: invalid (X)ChaCha20 IV size") k, n := key, iv @@ -67,16 +63,14 @@ seek :: proc(ctx: ^Context, block_nr: u64) { // keystream, and writes the resulting output to dst. Dst and src MUST // alias exactly or not at all. xor_bytes :: proc(ctx: ^Context, dst, src: []byte) { - assert(ctx._state._is_initialized) + ensure(ctx._state._is_initialized) src, dst := src, dst if dst_len := len(dst); dst_len < len(src) { src = src[:dst_len] } - if bytes.alias_inexactly(dst, src) { - panic("crypto/chacha20: dst and src alias inexactly") - } + ensure(!bytes.alias_inexactly(dst, src), "crypto/chacha20: dst and src alias inexactly") st := &ctx._state #no_bounds_check for remaining := len(src); remaining > 0; { @@ -114,7 +108,7 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) { // keystream_bytes fills dst with the raw (X)ChaCha20 keystream output. keystream_bytes :: proc(ctx: ^Context, dst: []byte) { - assert(ctx._state._is_initialized) + ensure(ctx._state._is_initialized) dst, st := dst, &ctx._state #no_bounds_check for remaining := len(dst); remaining > 0; { diff --git a/core/crypto/chacha20poly1305/chacha20poly1305.odin b/core/crypto/chacha20poly1305/chacha20poly1305.odin index 3de2532dd..6706b3820 100644 --- a/core/crypto/chacha20poly1305/chacha20poly1305.odin +++ b/core/crypto/chacha20poly1305/chacha20poly1305.odin @@ -29,13 +29,9 @@ _P_MAX :: 64 * 0xffffffff // 64 * (2^32-1) @(private) _validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bool) { - if len(tag) != TAG_SIZE { - panic("crypto/chacha20poly1305: invalid destination tag size") - } expected_iv_len := is_xchacha ? XIV_SIZE : IV_SIZE - if len(iv) != expected_iv_len { - panic("crypto/chacha20poly1305: invalid IV size") - } + ensure(len(tag) == TAG_SIZE, "crypto/chacha20poly1305: invalid destination tag size") + ensure(len(iv) == expected_iv_len, "crypto/chacha20poly1305: invalid IV size") #assert(size_of(int) == 8 || size_of(int) <= 4) when size_of(int) == 8 { @@ -45,13 +41,11 @@ _validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bo // A_MAX is limited by size_of(int), so there is no need to // enforce it. P_MAX only needs to be checked on 64-bit targets, // for reasons that should be obvious. - if text_len := len(text); text_len > _P_MAX { - panic("crypto/chacha20poly1305: oversized src data") - } + ensure(len(text) <= _P_MAX, "crypto/chacha20poly1305: oversized src data") } } -@(private) +@(private, rodata) _PAD: [16]byte @(private) @@ -71,9 +65,7 @@ Context :: struct { // init initializes a Context with the provided key, for AEAD_CHACHA20_POLY1305. init :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) { - if len(key) != KEY_SIZE { - panic("crypto/chacha20poly1305: invalid key size") - } + ensure(len(key) == KEY_SIZE, "crypto/chacha20poly1305: invalid key size") copy(ctx._key[:], key) ctx._impl = impl @@ -96,11 +88,11 @@ init_xchacha :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEM // // dst and plaintext MUST alias exactly or not at all. seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { + ensure(ctx._is_initialized) + ciphertext := dst _validate_common_slice_sizes(tag, iv, aad, plaintext, ctx._is_xchacha) - if len(ciphertext) != len(plaintext) { - panic("crypto/chacha20poly1305: invalid destination ciphertext size") - } + ensure(len(ciphertext) == len(plaintext), "crypto/chacha20poly1305: invalid destination ciphertext size") stream_ctx: chacha20.Context = --- chacha20.init(&stream_ctx, ctx._key[:],iv, ctx._impl) @@ -151,11 +143,11 @@ seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { // dst and plaintext MUST alias exactly or not at all. @(require_results) open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + ensure(ctx._is_initialized) + plaintext := dst _validate_common_slice_sizes(tag, iv, aad, ciphertext, ctx._is_xchacha) - if len(ciphertext) != len(plaintext) { - panic("crypto/chacha20poly1305: invalid destination plaintext size") - } + ensure(len(ciphertext) == len(plaintext), "crypto/chacha20poly1305: invalid destination plaintext size") // Note: Unlike encrypt, this can fail early, so use defer for // sanitization rather than assuming control flow reaches certain diff --git a/core/crypto/deoxysii/deoxysii.odin b/core/crypto/deoxysii/deoxysii.odin new file mode 100644 index 000000000..cead770e2 --- /dev/null +++ b/core/crypto/deoxysii/deoxysii.odin @@ -0,0 +1,280 @@ +/* +package deoxysii implements the Deoxys-II-256 Authenticated Encryption +with Additional Data algorithm. + +- [[ https://sites.google.com/view/deoxyscipher ]] +- [[ https://thomaspeyrin.github.io/web/assets/docs/papers/Jean-etal-JoC2021.pdf ]] +*/ +package deoxysii + +import "base:intrinsics" +import "core:bytes" +import "core:crypto/aes" +import "core:mem" +import "core:simd" + +// KEY_SIZE is the Deoxys-II-256 key size in bytes. +KEY_SIZE :: 32 +// IV_SIZE iss the Deoxys-II-256 IV size in bytes. +IV_SIZE :: 15 // 120-bits +// TAG_SIZE is the Deoxys-II-256 tag size in bytes. +TAG_SIZE :: 16 + +@(private) +PREFIX_AD_BLOCK :: 0b0010 +@(private) +PREFIX_AD_FINAL :: 0b0110 +@(private) +PREFIX_MSG_BLOCK :: 0b0000 +@(private) +PREFIX_MSG_FINAL :: 0b0100 +@(private) +PREFIX_TAG :: 0b0001 +@(private) +PREFIX_SHIFT :: 4 + +@(private) +BC_ROUNDS :: 16 +@(private) +BLOCK_SIZE :: aes.BLOCK_SIZE + +@(private = "file") +_LFSR2_MASK :: simd.u8x16{ + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, +} +@(private = "file") +_LFSR3_MASK :: simd.u8x16{ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +} +@(private = "file") +_LFSR_SH1 :: _LFSR2_MASK +@(private = "file") +_LFSR_SH5 :: simd.u8x16{ + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, +} +@(private = "file") +_LFSR_SH7 :: simd.u8x16{ + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, +} +@(private = "file", rodata) +_RCONS := []byte { + 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, + 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, + 0x72, +} + +// Context is a keyed Deoxys-II-256 instance. +Context :: struct { + _subkeys: [BC_ROUNDS+1][16]byte, + _impl: aes.Implementation, + _is_initialized: bool, +} + +@(private) +_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) { + ensure(len(tag) == TAG_SIZE, "crypto/deoxysii: invalid tag size") + ensure(len(iv) == IV_SIZE, "crypto/deoxysii: invalid IV size") + + #assert(size_of(int) == 8 || size_of(int) <= 4) + // For the nonce-misuse resistant mode, the total size of the + // associated data and the total size of the message do not exceed + // `16 * 2^max_l * 2^max_m bytes`, thus 2^128 bytes for all variants + // of Deoxys-II. Moreover, the maximum number of messages that can + // be handled for a same key is 2^max_m, that is 2^64 for all variants + // of Deoxys. +} + +// init initializes a Context with the provided key. +init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) { + ensure(len(key) == KEY_SIZE, "crypto/deoxysii: invalid key size") + + ctx._impl = impl + if ctx._impl == .Hardware && !is_hardware_accelerated() { + ctx._impl = .Portable + } + + derive_ks(ctx, key) + + ctx._is_initialized = true +} + +// seal encrypts the plaintext and authenticates the aad and ciphertext, +// with the provided Context and iv, stores the output in dst and tag. +// +// dst and plaintext MUST alias exactly or not at all. +seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { + ensure(ctx._is_initialized) + + _validate_common_slice_sizes(ctx, tag, iv, aad, plaintext) + ensure(len(dst) == len(plaintext), "crypto/deoxysii: invalid destination ciphertext size") + ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/deoxysii: dst and plaintext alias inexactly") + + switch ctx._impl { + case .Hardware: + e_hw(ctx, dst, tag, iv, aad, plaintext) + case .Portable: + e_ref(ctx, dst, tag, iv, aad, plaintext) + } +} + +// open authenticates the aad and ciphertext, and decrypts the ciphertext, +// with the provided Context, iv, and tag, and stores the output in dst, +// returning true iff the authentication was successful. If authentication +// fails, the destination buffer will be zeroed. +// +// dst and plaintext MUST alias exactly or not at all. +@(require_results) +open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + ensure(ctx._is_initialized) + + _validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext) + ensure(len(dst) == len(ciphertext), "crypto/deoxysii: invalid destination plaintext size") + ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/deoxysii: dst and ciphertext alias inexactly") + + ok: bool + switch ctx._impl { + case .Hardware: + ok = d_hw(ctx, dst, iv, aad, ciphertext, tag) + case .Portable: + ok = d_ref(ctx, dst, iv, aad, ciphertext, tag) + } + if !ok { + mem.zero_explicit(raw_data(dst), len(ciphertext)) + } + + return ok +} + +// reset sanitizes the Context. The Context must be +// re-initialized to be used again. +reset :: proc "contextless" (ctx: ^Context) { + mem.zero_explicit(&ctx._subkeys, len(ctx._subkeys)) + ctx._is_initialized = false +} + +@(private = "file") +derive_ks :: proc "contextless" (ctx: ^Context, key: []byte) { + // Derive the constant component of each subtweakkey. + // + // The key schedule is as thus: + // + // STK_i = TK1_i ^ TK2_i ^ TK3_i ^ RC_i + // + // TK1_i = h(TK1_(i-1)) + // TK2_i = h(LFSR2(TK2_(i-1))) + // TK3_i = h(LFSR3(TK2_(i-1))) + // + // where: + // + // KT = K || T + // W3 = KT[:16] + // W2 = KT[16:32] + // W1 = KT[32:] + // + // TK1_0 = W1 + // TK2_0 = W2 + // TK3_0 = W3 + // + // As `K` is fixed per Context, the XORs of `TK3_0 .. TK3_n`, + // `TK2_0 .. TK2_n` and RC_i can be precomputed in advance like + // thus: + // + // subkey_i = TK3_i ^ TK2_i ^ RC_i + // + // When it is time to actually call Deoxys-BC-384, it is then + // a simple matter of deriving each round subtweakkey via: + // + // TK1_0 = T (Tweak) + // STK_0 = subkey_0 ^ TK1_0 + // STK_i = subkey_i (precomputed) ^ H(TK1_(i-1)) + // + // We opt to use SIMD here and for the subtweakkey deriviation + // as `H()` is typically a single vector instruction. + + tk2 := intrinsics.unaligned_load((^simd.u8x16)(raw_data(key[16:]))) + tk3 := intrinsics.unaligned_load((^simd.u8x16)(raw_data(key))) + + // subkey_0 does not apply LFSR2/3 or H. + intrinsics.unaligned_store( + (^simd.u8x16)(&ctx._subkeys[0]), + simd.bit_xor( + tk2, + simd.bit_xor( + tk3, + rcon(0), + ), + ), + ) + + // Precompute k_1 .. k_16. + for i in 1 ..< BC_ROUNDS+1 { + tk2 = h(lfsr2(tk2)) + tk3 = h(lfsr3(tk3)) + intrinsics.unaligned_store( + (^simd.u8x16)(&ctx._subkeys[i]), + simd.bit_xor( + tk2, + simd.bit_xor( + tk3, + rcon(i), + ), + ), + ) + } +} + +@(private = "file") +lfsr2 :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 { + // LFSR2 is a application of the following LFSR to each byte of input. + // (x7||x6||x5||x4||x3||x2||x1||x0) -> (x6||x5||x4||x3||x2||x1||x0||x7 ^ x5) + return simd.bit_or( + simd.shl(tk, _LFSR_SH1), + simd.bit_and( + simd.bit_xor( + simd.shr(tk, _LFSR_SH7), // x7 + simd.shr(tk, _LFSR_SH5), // x5 + ), + _LFSR2_MASK, + ), + ) +} + +@(private = "file") +lfsr3 :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 { + // LFSR3 is a application of the following LFSR to each byte of input. + // (x7||x6||x5||x4||x3||x2||x1||x0) -> (x0 ^ x6||x7||x6||x5||x4||x3||x2||x1) + return simd.bit_or( + simd.shr(tk, _LFSR_SH1), + simd.bit_and( + simd.bit_xor( + simd.shl(tk, _LFSR_SH7), // x0 + simd.shl(tk, _LFSR_SH1), // x6 + ), + _LFSR3_MASK, + ), + ) +} + +@(private) +h :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 { + return simd.swizzle( + tk, + 0x01, 0x06, 0x0b, 0x0c, 0x05, 0x0a, 0x0f, 0x00, + 0x09, 0x0e, 0x03, 0x04, 0x0d, 0x02, 0x07, 0x08, + ) +} + +@(private = "file") +rcon :: #force_inline proc "contextless" (rd: int) -> simd.u8x16 #no_bounds_check { + rc := _RCONS[rd] + return simd.u8x16{ + 1, 2, 4, 8, + rc, rc, rc, rc, + 0, 0, 0, 0, + 0, 0, 0, 0, + } +} \ No newline at end of file diff --git a/core/crypto/deoxysii/deoxysii_impl_ct64.odin b/core/crypto/deoxysii/deoxysii_impl_ct64.odin new file mode 100644 index 000000000..c4d0edb03 --- /dev/null +++ b/core/crypto/deoxysii/deoxysii_impl_ct64.odin @@ -0,0 +1,399 @@ +package deoxysii + +import "base:intrinsics" +import "core:crypto" +import aes "core:crypto/_aes/ct64" +import "core:encoding/endian" +import "core:mem" +import "core:simd" + +// This uses the bitlsiced 64-bit general purpose register SWAR AES +// round function. The encryption pass skips orthogonalizing the +// AES round function input as it is aways going to be the leading 0 +// padded IV, and doing a 64-byte copy is faster. + +@(private = "file") +TWEAK_SIZE :: 16 + +@(private = "file") +State_SW :: struct { + ctx: ^Context, + q_stk, q_b: [8]u64, +} + +@(private = "file") +auth_tweak :: #force_inline proc "contextless" ( + dst: ^[TWEAK_SIZE]byte, + prefix: byte, + block_nr: int, +) { + endian.unchecked_put_u64be(dst[8:], u64(block_nr)) + endian.unchecked_put_u64le(dst[0:], u64(prefix) << PREFIX_SHIFT) // dst[0] = prefix << PREFIX_SHIFT +} + +@(private = "file") +enc_tweak :: #force_inline proc "contextless" ( + dst: ^[TWEAK_SIZE]byte, + tag: ^[TAG_SIZE]byte, + block_nr: int, +) { + tmp: [8]byte + endian.unchecked_put_u64be(tmp[:], u64(block_nr)) + + copy(dst[:], tag[:]) + dst[0] |= 0x80 + for i in 0 ..< 8 { + dst[i+8] ~= tmp[i] + } +} + +@(private = "file") +enc_plaintext :: #force_inline proc "contextless" ( + dst: ^[8]u64, + iv: []byte, +) { + tmp: [BLOCK_SIZE]byte = --- + tmp[0] = 0 + copy(tmp[1:], iv[:]) + + q_0, q_1 := aes.load_interleaved(tmp[:]) + for i in 0 ..< 4 { + dst[i], dst[i+4] = q_0, q_1 + } + aes.orthogonalize(dst) +} + +@(private = "file") +bc_x4 :: proc "contextless" ( + ctx: ^Context, + dst: []byte, + tweaks: ^[4][TWEAK_SIZE]byte, + q_stk: ^[8]u64, + q_b: ^[8]u64, // Orthogonalized + n: int, +) { + tk1s: [4]simd.u8x16 + for j in 0 ..< n { + tk1s[j] = intrinsics.unaligned_load((^simd.u8x16)(&tweaks[j])) + } + + // Deoxys-BC-384 + for i in 0 ..= BC_ROUNDS { + // Derive the round's subtweakkey + sk := intrinsics.unaligned_load((^simd.u8x16)(&ctx._subkeys[i])) + for j in 0 ..< n { + if i != 0 { + tk1s[j] = h(tk1s[j]) + } + intrinsics.unaligned_store( + (^simd.u8x16)(raw_data(dst)), + simd.bit_xor(sk, tk1s[j]), + ) + q_stk[j], q_stk[j+4] = aes.load_interleaved(dst[:]) + } + aes.orthogonalize(q_stk) + + if i != 0 { + aes.sub_bytes(q_b) + aes.shift_rows(q_b) + aes.mix_columns(q_b) + } + aes.add_round_key(q_b, q_stk[:]) + } + + aes.orthogonalize(q_b) + for i in 0 ..< n { + aes.store_interleaved(dst[i*BLOCK_SIZE:], q_b[i], q_b[i+4]) + } +} + +@(private = "file", require_results) +bc_absorb :: proc "contextless" ( + st: ^State_SW, + dst: []byte, + src: []byte, + tweak_prefix: byte, + stk_block_nr: int, +) -> int { + tweaks: [4][TWEAK_SIZE]byte = --- + tmp: [BLOCK_SIZE*4]byte = --- + + src, stk_block_nr := src, stk_block_nr + dst_ := intrinsics.unaligned_load((^simd.u8x16)(raw_data(dst))) + + nr_blocks := len(src) / BLOCK_SIZE + for nr_blocks > 0 { + // Derive the tweak(s), orthogonalize the plaintext + n := min(nr_blocks, 4) + for i in 0 ..< n { + auth_tweak(&tweaks[i], tweak_prefix, stk_block_nr + i) + st.q_b[i], st.q_b[i + 4] = aes.load_interleaved(src) + src = src[BLOCK_SIZE:] + } + aes.orthogonalize(&st.q_b) + + // Deoxys-BC-384 + bc_x4(st.ctx, tmp[:], &tweaks, &st.q_stk, &st.q_b, n) + + // XOR in the existing Auth/tag + for i in 0 ..< n { + dst_ = simd.bit_xor( + dst_, + intrinsics.unaligned_load((^simd.u8x16)(raw_data(tmp[i*BLOCK_SIZE:]))), + ) + } + + stk_block_nr += n + nr_blocks -= n + } + + intrinsics.unaligned_store((^simd.u8x16)(raw_data(dst)), dst_) + + mem.zero_explicit(&tweaks, size_of(tweaks)) + mem.zero_explicit(&tmp, size_of(tmp)) + + return stk_block_nr +} + +@(private = "file") +bc_final :: proc "contextless" ( + st: ^State_SW, + dst: []byte, + iv: []byte, +) { + tweaks: [4][TWEAK_SIZE]byte = --- + + tweaks[0][0] = PREFIX_TAG << PREFIX_SHIFT + copy(tweaks[0][1:], iv) + + st.q_b[0], st.q_b[4] = aes.load_interleaved(dst) + aes.orthogonalize(&st.q_b) + + bc_x4(st.ctx, dst, &tweaks, &st.q_stk, &st.q_b, 1) +} + +@(private = "file", require_results) +bc_encrypt :: proc "contextless" ( + st: ^State_SW, + dst: []byte, + src: []byte, + q_n: ^[8]u64, // Orthogonalized + tweak_tag: ^[TAG_SIZE]byte, + stk_block_nr: int, +) -> int { + tweaks: [4][TWEAK_SIZE]byte = --- + tmp: [BLOCK_SIZE*4]byte = --- + + dst, src, stk_block_nr := dst, src, stk_block_nr + + nr_blocks := len(src) / BLOCK_SIZE + for nr_blocks > 0 { + // Derive the tweak(s) + n := min(nr_blocks, 4) + for i in 0 ..< n { + enc_tweak(&tweaks[i], tweak_tag, stk_block_nr + i) + } + st.q_b = q_n^ // The plaintext is always `0^8 || N` + + // Deoxys-BC-384 + bc_x4(st.ctx, tmp[:], &tweaks, &st.q_stk, &st.q_b, n) + + // XOR the ciphertext + for i in 0 ..< n { + intrinsics.unaligned_store( + (^simd.u8x16)(raw_data(dst[i*BLOCK_SIZE:])), + simd.bit_xor( + intrinsics.unaligned_load((^simd.u8x16)(raw_data(src[i*BLOCK_SIZE:]))), + intrinsics.unaligned_load((^simd.u8x16)(raw_data(tmp[i*BLOCK_SIZE:]))), + ), + ) + } + + dst, src = dst[n*BLOCK_SIZE:], src[n*BLOCK_SIZE:] + stk_block_nr += n + nr_blocks -= n + } + + mem.zero_explicit(&tweaks, size_of(tweaks)) + mem.zero_explicit(&tmp, size_of(tmp)) + + return stk_block_nr +} + +@(private) +e_ref :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check { + st: State_SW = --- + st.ctx = ctx + + // Algorithm 3 + // + // Associated data + // A_1 || ... || A_la || A_∗ <- A where each |A_i| = n and |A_∗| < n + // Auth <- 0^n + // for i = 0 to la − 1 do + // Auth <- Auth ^ EK(0010 || i, A_i+1) + // end + // if A_∗ != nil then + // Auth <- Auth ^ EK(0110 || la, pad10∗(A_∗)) + // end + auth: [TAG_SIZE]byte + aad := aad + n := bc_absorb(&st, auth[:], aad, PREFIX_AD_BLOCK, 0) + aad = aad[n*BLOCK_SIZE:] + if l := len(aad); l > 0 { + a_star: [BLOCK_SIZE]byte + + copy(a_star[:], aad) + a_star[l] = 0x80 + + _ = bc_absorb(&st, auth[:], a_star[:], PREFIX_AD_FINAL, n) + } + + // Message authentication and tag generation + // M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n + // tag <- Auth + // for j = 0 to l − 1 do + // tag <- tag ^ EK(0000 || j, M_j+1) + // end + // if M_∗ != nil then + // tag <- tag ^ EK(0100 || l, pad10∗(M_∗)) + // end + // tag <- EK(0001 || 0^4 || N, tag) + m := plaintext + n = bc_absorb(&st, auth[:], m, PREFIX_MSG_BLOCK, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + m_star[l] = 0x80 + + _ = bc_absorb(&st, auth[:], m_star[:], PREFIX_MSG_FINAL, n) + } + bc_final(&st, auth[:], iv) + + // Message encryption + // for j = 0 to l − 1 do + // C_j <- M_j ^ EK(1 || tag ^ j, 0^8 || N) + // end + // if M_∗ != nil then + // C_∗ <- M_* ^ EK(1 || tag ^ l, 0^8 || N) + // end + // + // return (C_1 || ... || C_l || C_∗, tag) + q_iv: [8]u64 = --- + enc_plaintext(&q_iv, iv) + + m = plaintext + n = bc_encrypt(&st, dst, m, &q_iv, &auth, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + _ = bc_encrypt(&st, m_star[:], m_star[:], &q_iv, &auth, n) + + copy(dst[n*BLOCK_SIZE:], m_star[:]) + + mem.zero_explicit(&m_star, size_of(m_star)) + } + + copy(tag, auth[:]) + + mem.zero_explicit(&st.q_stk, size_of(st.q_stk)) + mem.zero_explicit(&st.q_b, size_of(st.q_b)) +} + +@(private, require_results) +d_ref :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + st: State_SW = --- + st.ctx = ctx + + // Algorithm 4 + // + // Message decryption + // C_1 || ... || C_l || C_∗ <- C where each |C_j| = n and |C_∗| < n + // for j = 0 to l − 1 do + // M_j <- C_j ^ EK(1 || tag ^ j, 0^8 || N) + // end + // if C_∗ != nil then + // M_∗ <- C_∗ ^ EK(1 || tag ^ l, 0^8 || N) + // end + q_iv: [8]u64 = --- + enc_plaintext(&q_iv, iv) + + auth: [TAG_SIZE]byte + copy(auth[:], tag) + + m := ciphertext + n := bc_encrypt(&st, dst, m, &q_iv, &auth, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + _ = bc_encrypt(&st, m_star[:], m_star[:], &q_iv, &auth, n) + + copy(dst[n*BLOCK_SIZE:], m_star[:]) + + mem.zero_explicit(&m_star, size_of(m_star)) + } + + // Associated data + // A_1 || ... || Al_a || A_∗ <- A where each |Ai_| = n and |A_∗| < n + // Auth <- 0 + // for i = 0 to la − 1 do + // Auth <- Auth ^ EK(0010 || i, A_i+1) + // end + // if A∗ != nil then + // Auth <- Auth ^ EK(0110| | l_a, pad10∗(A_∗)) + // end + auth = 0 + aad := aad + n = bc_absorb(&st, auth[:], aad, PREFIX_AD_BLOCK, 0) + aad = aad[n*BLOCK_SIZE:] + if l := len(aad); l > 0 { + a_star: [BLOCK_SIZE]byte + + copy(a_star[:], aad) + a_star[l] = 0x80 + + _ = bc_absorb(&st, auth[:], a_star[:], PREFIX_AD_FINAL, n) + } + + // Message authentication and tag generation + // M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n + // tag0 <- Auth + // for j = 0 to l − 1 do + // tag0 <- tag0 ^ EK(0000 || j, M_j+1) + // end + // if M_∗ != nil then + // tag0 <- tag0 ^ EK(0100 || l, pad10∗(M_∗)) + // end + // tag0 <- EK(0001 || 0^4 || N, tag0) + m = dst[:len(ciphertext)] + n = bc_absorb(&st, auth[:], m, PREFIX_MSG_BLOCK, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + m_star[l] = 0x80 + + _ = bc_absorb(&st, auth[:], m_star[:], PREFIX_MSG_FINAL, n) + + mem.zero_explicit(&m_star, size_of(m_star)) + } + bc_final(&st, auth[:], iv) + + // Tag verification + // if tag0 = tag then return (M_1 || ... || M_l || M_∗) + // else return false + ok := crypto.compare_constant_time(auth[:], tag) == 1 + + mem.zero_explicit(&auth, size_of(auth)) + mem.zero_explicit(&st.q_stk, size_of(st.q_stk)) + mem.zero_explicit(&st.q_b, size_of(st.q_b)) + + return ok +} diff --git a/core/crypto/deoxysii/deoxysii_impl_hw_gen.odin b/core/crypto/deoxysii/deoxysii_impl_hw_gen.odin new file mode 100644 index 000000000..b0705ca62 --- /dev/null +++ b/core/crypto/deoxysii/deoxysii_impl_hw_gen.odin @@ -0,0 +1,21 @@ +#+build !amd64 +package deoxysii + +@(private = "file") +ERR_HW_NOT_SUPPORTED :: "crypto/deoxysii: hardware implementation unsupported" + +// is_hardware_accelerated returns true iff hardware accelerated Deoxys-II +// is supported. +is_hardware_accelerated :: proc "contextless" () -> bool { + return false +} + +@(private) +e_hw :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private, require_results) +d_hw :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} diff --git a/core/crypto/deoxysii/deoxysii_impl_hw_intel.odin b/core/crypto/deoxysii/deoxysii_impl_hw_intel.odin new file mode 100644 index 000000000..d268009a2 --- /dev/null +++ b/core/crypto/deoxysii/deoxysii_impl_hw_intel.odin @@ -0,0 +1,434 @@ +#+build amd64 +package deoxysii + +import "base:intrinsics" +import "core:crypto" +import "core:crypto/aes" +import "core:mem" +import "core:simd" +import "core:simd/x86" + +// This processes a maximum of 4 blocks at a time, as that is suitable +// for most current hardware that doesn't say "Xeon". + +@(private = "file") +_BIT_ENC :: x86.__m128i{0x80, 0} +@(private = "file") +_PREFIX_AD_BLOCK :: x86.__m128i{PREFIX_AD_BLOCK << PREFIX_SHIFT, 0} +@(private = "file") +_PREFIX_AD_FINAL :: x86.__m128i{PREFIX_AD_FINAL << PREFIX_SHIFT, 0} +@(private = "file") +_PREFIX_MSG_BLOCK :: x86.__m128i{PREFIX_MSG_BLOCK << PREFIX_SHIFT, 0} +@(private = "file") +_PREFIX_MSG_FINAL :: x86.__m128i{PREFIX_MSG_FINAL << PREFIX_SHIFT, 0} + +// is_hardware_accelerated returns true iff hardware accelerated Deoxys-II +// is supported. +is_hardware_accelerated :: proc "contextless" () -> bool { + return aes.is_hardware_accelerated() +} + +@(private = "file", enable_target_feature = "sse4.1", require_results) +auth_tweak :: #force_inline proc "contextless" ( + prefix: x86.__m128i, + block_nr: int, +) -> x86.__m128i { + return x86._mm_insert_epi64(prefix, i64(intrinsics.byte_swap(u64(block_nr))), 1) +} + +@(private = "file", enable_target_feature = "sse2", require_results) +enc_tweak :: #force_inline proc "contextless" ( + tag: x86.__m128i, + block_nr: int, +) -> x86.__m128i { + return x86._mm_xor_si128( + x86._mm_or_si128(tag, _BIT_ENC), + x86.__m128i{0, i64(intrinsics.byte_swap(u64(block_nr)))}, + ) +} + +@(private = "file", enable_target_feature = "ssse3", require_results) +h_ :: #force_inline proc "contextless" (tk1: x86.__m128i) -> x86.__m128i { + return transmute(x86.__m128i)h(transmute(simd.u8x16)tk1) +} + +@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results) +bc_x4 :: #force_inline proc "contextless" ( + ctx: ^Context, + s_0, s_1, s_2, s_3: x86.__m128i, + tweak_0, tweak_1, tweak_2, tweak_3: x86.__m128i, +) -> (x86.__m128i, x86.__m128i, x86.__m128i, x86.__m128i) #no_bounds_check { + s_0, s_1, s_2, s_3 := s_0, s_1, s_2, s_3 + tk1_0, tk1_1, tk1_2, tk1_3 := tweak_0, tweak_1, tweak_2, tweak_3 + + sk := intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[0])) + stk_0 := x86._mm_xor_si128(tk1_0, sk) + stk_1 := x86._mm_xor_si128(tk1_1, sk) + stk_2 := x86._mm_xor_si128(tk1_2, sk) + stk_3 := x86._mm_xor_si128(tk1_3, sk) + + s_0 = x86._mm_xor_si128(s_0, stk_0) + s_1 = x86._mm_xor_si128(s_1, stk_1) + s_2 = x86._mm_xor_si128(s_2, stk_2) + s_3 = x86._mm_xor_si128(s_3, stk_3) + + for i in 1 ..= BC_ROUNDS { + sk = intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[i])) + + tk1_0 = h_(tk1_0) + tk1_1 = h_(tk1_1) + tk1_2 = h_(tk1_2) + tk1_3 = h_(tk1_3) + + stk_0 = x86._mm_xor_si128(tk1_0, sk) + stk_1 = x86._mm_xor_si128(tk1_1, sk) + stk_2 = x86._mm_xor_si128(tk1_2, sk) + stk_3 = x86._mm_xor_si128(tk1_3, sk) + + s_0 = x86._mm_aesenc_si128(s_0, stk_0) + s_1 = x86._mm_aesenc_si128(s_1, stk_1) + s_2 = x86._mm_aesenc_si128(s_2, stk_2) + s_3 = x86._mm_aesenc_si128(s_3, stk_3) + } + + return s_0, s_1, s_2, s_3 +} + +@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results) +bc_x1 :: #force_inline proc "contextless" ( + ctx: ^Context, + s: x86.__m128i, + tweak: x86.__m128i, +) -> x86.__m128i #no_bounds_check { + s, tk1 := s, tweak + + sk := intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[0])) + stk := x86._mm_xor_si128(tk1, sk) + + s = x86._mm_xor_si128(s, stk) + + for i in 1 ..= BC_ROUNDS { + sk = intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[i])) + + tk1 = h_(tk1) + + stk = x86._mm_xor_si128(tk1, sk) + + s = x86._mm_aesenc_si128(s, stk) + } + + return s +} + +@(private = "file", enable_target_feature = "sse2,ssse3,sse4.1,aes", require_results) +bc_absorb :: proc "contextless" ( + ctx: ^Context, + tag: x86.__m128i, + src: []byte, + tweak_prefix: x86.__m128i, + stk_block_nr: int, +) -> (x86.__m128i, int) #no_bounds_check { + src, stk_block_nr, tag := src, stk_block_nr, tag + + nr_blocks := len(src) / BLOCK_SIZE + for nr_blocks >= 4 { + d_0, d_1, d_2, d_3 := bc_x4( + ctx, + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))), + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[BLOCK_SIZE:]))), + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[2*BLOCK_SIZE:]))), + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[3*BLOCK_SIZE:]))), + auth_tweak(tweak_prefix, stk_block_nr), + auth_tweak(tweak_prefix, stk_block_nr + 1), + auth_tweak(tweak_prefix, stk_block_nr + 2), + auth_tweak(tweak_prefix, stk_block_nr + 3), + ) + + tag = x86._mm_xor_si128(tag, d_0) + tag = x86._mm_xor_si128(tag, d_1) + tag = x86._mm_xor_si128(tag, d_2) + tag = x86._mm_xor_si128(tag, d_3) + + src = src[4*BLOCK_SIZE:] + stk_block_nr += 4 + nr_blocks -= 4 + } + + for nr_blocks > 0 { + d := bc_x1( + ctx, + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))), + auth_tweak(tweak_prefix, stk_block_nr), + ) + + tag = x86._mm_xor_si128(tag, d) + + src = src[BLOCK_SIZE:] + stk_block_nr += 1 + nr_blocks -= 1 + } + + return tag, stk_block_nr +} + +@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results) +bc_final :: proc "contextless" ( + ctx: ^Context, + tag: x86.__m128i, + iv: []byte, +) -> x86.__m128i { + tmp: [BLOCK_SIZE]byte + + tmp[0] = PREFIX_TAG << PREFIX_SHIFT + copy(tmp[1:], iv) + + tweak := intrinsics.unaligned_load((^x86.__m128i)(&tmp)) + + return bc_x1(ctx, tag, tweak) +} + +@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results) +bc_encrypt :: proc "contextless" ( + ctx: ^Context, + dst: []byte, + src: []byte, + iv: x86.__m128i, + tweak_tag: x86.__m128i, + stk_block_nr: int, +) -> int { + dst, src, stk_block_nr := dst, src, stk_block_nr + + nr_blocks := len(src) / BLOCK_SIZE + for nr_blocks >= 4 { + d_0, d_1, d_2, d_3 := bc_x4( + ctx, + iv, iv, iv, iv, + enc_tweak(tweak_tag, stk_block_nr), + enc_tweak(tweak_tag, stk_block_nr + 1), + enc_tweak(tweak_tag, stk_block_nr + 2), + enc_tweak(tweak_tag, stk_block_nr + 3), + ) + + intrinsics.unaligned_store( + (^x86.__m128i)(raw_data(dst)), + x86._mm_xor_si128( + d_0, + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))), + ), + ) + intrinsics.unaligned_store( + (^x86.__m128i)(raw_data(dst[BLOCK_SIZE:])), + x86._mm_xor_si128( + d_1, + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[BLOCK_SIZE:]))), + ), + ) + intrinsics.unaligned_store( + (^x86.__m128i)(raw_data(dst[2*BLOCK_SIZE:])), + x86._mm_xor_si128( + d_2, + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[2*BLOCK_SIZE:]))), + ), + ) + intrinsics.unaligned_store( + (^x86.__m128i)(raw_data(dst[3*BLOCK_SIZE:])), + x86._mm_xor_si128( + d_3, + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[3*BLOCK_SIZE:]))), + ), + ) + + src, dst = src[4*BLOCK_SIZE:], dst[4*BLOCK_SIZE:] + stk_block_nr += 4 + nr_blocks -= 4 + } + + for nr_blocks > 0 { + d := bc_x1( + ctx, + iv, + enc_tweak(tweak_tag, stk_block_nr), + ) + + intrinsics.unaligned_store( + (^x86.__m128i)(raw_data(dst)), + x86._mm_xor_si128( + d, + intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))), + ), + ) + + src, dst = src[BLOCK_SIZE:], dst[BLOCK_SIZE:] + stk_block_nr += 1 + nr_blocks -= 1 + } + + return stk_block_nr +} + +@(private) +e_hw :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check { + tmp: [BLOCK_SIZE]byte + copy(tmp[1:], iv) + iv_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(&tmp))) + + // Algorithm 3 + // + // Associated data + // A_1 || ... || A_la || A_∗ <- A where each |A_i| = n and |A_∗| < n + // Auth <- 0^n + // for i = 0 to la − 1 do + // Auth <- Auth ^ EK(0010 || i, A_i+1) + // end + // if A_∗ != nil then + // Auth <- Auth ^ EK(0110 || la, pad10∗(A_∗)) + // end + auth: x86.__m128i + n: int + + aad := aad + auth, n = bc_absorb(ctx, auth, aad, _PREFIX_AD_BLOCK, 0) + aad = aad[n*BLOCK_SIZE:] + if l := len(aad); l > 0 { + a_star: [BLOCK_SIZE]byte + + copy(a_star[:], aad) + a_star[l] = 0x80 + + auth, _ = bc_absorb(ctx, auth, a_star[:], _PREFIX_AD_FINAL, n) + } + + // Message authentication and tag generation + // M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n + // tag <- Auth + // for j = 0 to l − 1 do + // tag <- tag ^ EK(0000 || j, M_j+1) + // end + // if M_∗ != nil then + // tag <- tag ^ EK(0100 || l, pad10∗(M_∗)) + // end + // tag <- EK(0001 || 0^4 ||N, tag) + m := plaintext + auth, n = bc_absorb(ctx, auth, m, _PREFIX_MSG_BLOCK, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + m_star[l] = 0x80 + + auth, _ = bc_absorb(ctx, auth, m_star[:], _PREFIX_MSG_FINAL, n) + } + auth = bc_final(ctx, auth, iv) + + // Message encryption + // for j = 0 to l − 1 do + // C_j <- M_j ^ EK(1 || tag ^ j, 0^8 || N) + // end + // if M_∗ != nil then + // C_∗ <- M_* ^ EK(1 || tag ^ l, 0^8 || N) + // end + // + // return (C_1 || ... || C_l || C_∗, tag) + m = plaintext + n = bc_encrypt(ctx, dst, m, iv_, auth, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + _ = bc_encrypt(ctx, m_star[:], m_star[:], iv_, auth, n) + + copy(dst[n*BLOCK_SIZE:], m_star[:]) + } + + intrinsics.unaligned_store((^x86.__m128i)(raw_data(tag)), auth) +} + +@(private, require_results) +d_hw :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + tmp: [BLOCK_SIZE]byte + copy(tmp[1:], iv) + iv_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(&tmp))) + + // Algorithm 4 + // + // Message decryption + // C_1 || ... || C_l || C_∗ <- C where each |C_j| = n and |C_∗| < n + // for j = 0 to l − 1 do + // M_j <- C_j ^ EK(1 || tag ^ j, 0^8 || N) + // end + // if C_∗ != nil then + // M_∗ <- C_∗ ^ EK(1 || tag ^ l, 0^8 || N) + // end + auth := intrinsics.unaligned_load((^x86.__m128i)(raw_data(tag))) + + m := ciphertext + n := bc_encrypt(ctx, dst, m, iv_, auth, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + _ = bc_encrypt(ctx, m_star[:], m_star[:], iv_, auth, n) + + copy(dst[n*BLOCK_SIZE:], m_star[:]) + + mem.zero_explicit(&m_star, size_of(m_star)) + } + + // Associated data + // A_1 || ... || Al_a || A_∗ <- A where each |Ai_| = n and |A_∗| < n + // Auth <- 0 + // for i = 0 to la − 1 do + // Auth <- Auth ^ EK(0010 || i, A_i+1) + // end + // if A∗ != nil then + // Auth <- Auth ^ EK(0110| | l_a, pad10∗(A_∗)) + // end + auth = x86.__m128i{0, 0} + aad := aad + auth, n = bc_absorb(ctx, auth, aad, _PREFIX_AD_BLOCK, 0) + aad = aad[BLOCK_SIZE*n:] + if l := len(aad); l > 0 { + a_star: [BLOCK_SIZE]byte + + copy(a_star[:], aad) + a_star[l] = 0x80 + + auth, _ = bc_absorb(ctx, auth, a_star[:], _PREFIX_AD_FINAL, n) + } + + // Message authentication and tag generation + // M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n + // tag0 <- Auth + // for j = 0 to l − 1 do + // tag0 <- tag0 ^ EK(0000 || j, M_j+1) + // end + // if M_∗ != nil then + // tag0 <- tag0 ^ EK(0100 || l, pad10∗(M_∗)) + // end + // tag0 <- EK(0001 || 0^4 || N, tag0) + m = dst[:len(ciphertext)] + auth, n = bc_absorb(ctx, auth, m, _PREFIX_MSG_BLOCK, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + m_star[l] = 0x80 + + auth, _ = bc_absorb(ctx, auth, m_star[:], _PREFIX_MSG_FINAL, n) + } + auth = bc_final(ctx, auth, iv) + + // Tag verification + // if tag0 = tag then return (M_1 || ... || M_l || M_∗) + // else return false + intrinsics.unaligned_store((^x86.__m128i)(raw_data(&tmp)), auth) + ok := crypto.compare_constant_time(tmp[:], tag) == 1 + + mem.zero_explicit(&tmp, size_of(tmp)) + + return ok +} diff --git a/core/crypto/ed25519/ed25519.odin b/core/crypto/ed25519/ed25519.odin index 460a19563..deeb80685 100644 --- a/core/crypto/ed25519/ed25519.odin +++ b/core/crypto/ed25519/ed25519.odin @@ -81,12 +81,8 @@ private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool { // private_key_bytes sets dst to byte-encoding of priv_key. private_key_bytes :: proc(priv_key: ^Private_Key, dst: []byte) { - if !priv_key._is_initialized { - panic("crypto/ed25519: uninitialized private key") - } - if len(dst) != PRIVATE_KEY_SIZE { - panic("crypto/ed25519: invalid destination size") - } + ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized private key") + ensure(len(dst) == PRIVATE_KEY_SIZE, "crypto/ed25519: invalid destination size") copy(dst, priv_key._b[:]) } @@ -98,12 +94,8 @@ private_key_clear :: proc "contextless" (priv_key: ^Private_Key) { // sign writes the signature by priv_key over msg to sig. sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) { - if !priv_key._is_initialized { - panic("crypto/ed25519: uninitialized private key") - } - if len(sig) != SIGNATURE_SIZE { - panic("crypto/ed25519: invalid destination size") - } + ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized private key") + ensure(len(sig) == SIGNATURE_SIZE, "crypto/ed25519: invalid destination size") // 1. Compute the hash of the private key d, H(d) = (h_0, h_1, ..., h_2b-1) // using SHA-512 for Ed25519. H(d) may be precomputed. @@ -178,9 +170,7 @@ public_key_set_bytes :: proc "contextless" (pub_key: ^Public_Key, b: []byte) -> // public_key_set_priv sets pub_key to the public component of priv_key. public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) { - if !priv_key._is_initialized { - panic("crypto/ed25519: uninitialized public key") - } + ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized public key") src := &priv_key._pub_key copy(pub_key._b[:], src._b[:]) @@ -191,21 +181,15 @@ public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) { // public_key_bytes sets dst to byte-encoding of pub_key. public_key_bytes :: proc(pub_key: ^Public_Key, dst: []byte) { - if !pub_key._is_initialized { - panic("crypto/ed25519: uninitialized public key") - } - if len(dst) != PUBLIC_KEY_SIZE { - panic("crypto/ed25519: invalid destination size") - } + ensure(pub_key._is_initialized, "crypto/ed25519: uninitialized public key") + ensure(len(dst) == PUBLIC_KEY_SIZE, "crypto/ed25519: invalid destination size") copy(dst, pub_key._b[:]) } // public_key_equal returns true iff pub_key is equal to other. public_key_equal :: proc(pub_key, other: ^Public_Key) -> bool { - if !pub_key._is_initialized || !other._is_initialized { - panic("crypto/ed25519: uninitialized public key") - } + ensure(pub_key._is_initialized && other._is_initialized, "crypto/ed25519: uninitialized public key") return crypto.compare_constant_time(pub_key._b[:], other._b[:]) == 1 } diff --git a/core/crypto/hmac/hmac.odin b/core/crypto/hmac/hmac.odin index 4813a9938..f74d6492f 100644 --- a/core/crypto/hmac/hmac.odin +++ b/core/crypto/hmac/hmac.odin @@ -56,7 +56,7 @@ init :: proc(ctx: ^Context, algorithm: hash.Algorithm, key: []byte) { // update adds more data to the Context. update :: proc(ctx: ^Context, data: []byte) { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) hash.update(&ctx._i_hash, data) } @@ -64,13 +64,10 @@ update :: proc(ctx: ^Context, data: []byte) { // final finalizes the Context, writes the tag to dst, and calls // reset on the Context. final :: proc(ctx: ^Context, dst: []byte) { - assert(ctx._is_initialized) - defer (reset(ctx)) - if len(dst) != ctx._tag_sz { - panic("crypto/hmac: invalid destination tag size") - } + ensure(ctx._is_initialized) + ensure(len(dst) == ctx._tag_sz, "crypto/hmac: invalid destination tag size") hash.final(&ctx._i_hash, dst) // H((k ^ ipad) || text) @@ -105,14 +102,14 @@ reset :: proc(ctx: ^Context) { // algorithm returns the Algorithm used by a Context instance. algorithm :: proc(ctx: ^Context) -> hash.Algorithm { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) return hash.algorithm(&ctx._i_hash) } // tag_size returns the tag size of a Context instance in bytes. tag_size :: proc(ctx: ^Context) -> int { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) return ctx._tag_sz } diff --git a/core/crypto/kmac/kmac.odin b/core/crypto/kmac/kmac.odin index e8bf42946..6f58e20a7 100644 --- a/core/crypto/kmac/kmac.odin +++ b/core/crypto/kmac/kmac.odin @@ -36,6 +36,7 @@ sum :: proc(sec_strength: int, dst, msg, key, domain_sep: []byte) { // tag is valid. verify :: proc(sec_strength: int, tag, msg, key, domain_sep: []byte, allocator := context.temp_allocator) -> bool { derived_tag := make([]byte, len(tag), allocator) + defer(delete(derived_tag)) sum(sec_strength, derived_tag, msg, key, domain_sep) @@ -59,8 +60,6 @@ init_256 :: proc(ctx: ^Context, key, domain_sep: []byte) { // update adds more data to the Context. update :: proc(ctx: ^Context, data: []byte) { - assert(ctx.is_initialized) - shake.write((^shake.Context)(ctx), data) } @@ -68,12 +67,9 @@ update :: proc(ctx: ^Context, data: []byte) { // on the Context. This routine will panic if the dst length is less than // MIN_TAG_SIZE. final :: proc(ctx: ^Context, dst: []byte) { - assert(ctx.is_initialized) defer reset(ctx) - if len(dst) < MIN_TAG_SIZE { - panic("crypto/kmac: invalid KMAC tag_size, too short") - } + ensure(len(dst) >= MIN_TAG_SIZE, "crypto/kmac: invalid KMAC tag_size, too short") _sha3.final_cshake((^_sha3.Context)(ctx), dst) } @@ -103,14 +99,12 @@ _init_kmac :: proc(ctx: ^Context, key, s: []byte, sec_strength: int) { reset(ctx) } - if len(key) < sec_strength / 8 { - panic("crypto/kmac: invalid KMAC key, too short") - } + ensure(len(key) >= sec_strength / 8, "crypto/kmac: invalid KMAC key, too short") ctx_ := (^_sha3.Context)(ctx) _sha3.init_cshake(ctx_, N_KMAC, s, sec_strength) _sha3.bytepad(ctx_, [][]byte{key}, _sha3.rate_cshake(sec_strength)) } -@(private) +@(private, rodata) N_KMAC := []byte{'K', 'M', 'A', 'C'} diff --git a/core/crypto/legacy/keccak/keccak.odin b/core/crypto/legacy/keccak/keccak.odin index 6ca66b7ca..40fc2729f 100644 --- a/core/crypto/legacy/keccak/keccak.odin +++ b/core/crypto/legacy/keccak/keccak.odin @@ -40,37 +40,37 @@ BLOCK_SIZE_512 :: _sha3.RATE_512 Context :: distinct _sha3.Context // init_224 initializes a Context for Keccak-224. -init_224 :: proc(ctx: ^Context) { +init_224 :: proc "contextless" (ctx: ^Context) { ctx.mdlen = DIGEST_SIZE_224 _init(ctx) } // init_256 initializes a Context for Keccak-256. -init_256 :: proc(ctx: ^Context) { +init_256 :: proc "contextless" (ctx: ^Context) { ctx.mdlen = DIGEST_SIZE_256 _init(ctx) } // init_384 initializes a Context for Keccak-384. -init_384 :: proc(ctx: ^Context) { +init_384 :: proc "contextless" (ctx: ^Context) { ctx.mdlen = DIGEST_SIZE_384 _init(ctx) } // init_512 initializes a Context for Keccak-512. -init_512 :: proc(ctx: ^Context) { +init_512 :: proc "contextless" (ctx: ^Context) { ctx.mdlen = DIGEST_SIZE_512 _init(ctx) } @(private) -_init :: proc(ctx: ^Context) { +_init :: proc "contextless" (ctx: ^Context) { ctx.dsbyte = _sha3.DS_KECCAK _sha3.init((^_sha3.Context)(ctx)) } // update adds more data to the Context. -update :: proc(ctx: ^Context, data: []byte) { +update :: proc "contextless" (ctx: ^Context, data: []byte) { _sha3.update((^_sha3.Context)(ctx), data) } @@ -79,17 +79,17 @@ update :: proc(ctx: ^Context, data: []byte) { // // Iff finalize_clone is set, final will work on a copy of the Context, // which is useful for for calculating rolling digests. -final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) { +final :: proc "contextless" (ctx: ^Context, hash: []byte, finalize_clone: bool = false) { _sha3.final((^_sha3.Context)(ctx), hash, finalize_clone) } // clone clones the Context other into ctx. -clone :: proc(ctx, other: ^Context) { +clone :: proc "contextless" (ctx, other: ^Context) { _sha3.clone((^_sha3.Context)(ctx), (^_sha3.Context)(other)) } // reset sanitizes the Context. The Context must be re-initialized to // be used again. -reset :: proc(ctx: ^Context) { +reset :: proc "contextless" (ctx: ^Context) { _sha3.reset((^_sha3.Context)(ctx)) } diff --git a/core/crypto/legacy/md5/md5.odin b/core/crypto/legacy/md5/md5.odin index 28b47e0b3..050501d98 100644 --- a/core/crypto/legacy/md5/md5.odin +++ b/core/crypto/legacy/md5/md5.odin @@ -53,7 +53,7 @@ init :: proc(ctx: ^Context) { // update adds more data to the Context. update :: proc(ctx: ^Context, data: []byte) { - assert(ctx.is_initialized) + ensure(ctx.is_initialized) for i := 0; i < len(data); i += 1 { ctx.data[ctx.datalen] = data[i] @@ -72,11 +72,8 @@ update :: proc(ctx: ^Context, data: []byte) { // Iff finalize_clone is set, final will work on a copy of the Context, // which is useful for for calculating rolling digests. final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) { - assert(ctx.is_initialized) - - if len(hash) < DIGEST_SIZE { - panic("crypto/md5: invalid destination digest size") - } + ensure(ctx.is_initialized) + ensure(len(hash) >= DIGEST_SIZE, "crypto/md5: invalid destination digest size") ctx := ctx if finalize_clone { diff --git a/core/crypto/legacy/sha1/sha1.odin b/core/crypto/legacy/sha1/sha1.odin index 1025ecb5b..5a2b57005 100644 --- a/core/crypto/legacy/sha1/sha1.odin +++ b/core/crypto/legacy/sha1/sha1.odin @@ -60,7 +60,7 @@ init :: proc(ctx: ^Context) { // update adds more data to the Context. update :: proc(ctx: ^Context, data: []byte) { - assert(ctx.is_initialized) + ensure(ctx.is_initialized) for i := 0; i < len(data); i += 1 { ctx.data[ctx.datalen] = data[i] @@ -79,11 +79,8 @@ update :: proc(ctx: ^Context, data: []byte) { // Iff finalize_clone is set, final will work on a copy of the Context, // which is useful for for calculating rolling digests. final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) { - assert(ctx.is_initialized) - - if len(hash) < DIGEST_SIZE { - panic("crypto/sha1: invalid destination digest size") - } + ensure(ctx.is_initialized) + ensure(len(hash) >= DIGEST_SIZE, "crypto/sha1: invalid destination digest size") ctx := ctx if finalize_clone { diff --git a/core/crypto/poly1305/poly1305.odin b/core/crypto/poly1305/poly1305.odin index ea0e6c907..3dd915da7 100644 --- a/core/crypto/poly1305/poly1305.odin +++ b/core/crypto/poly1305/poly1305.odin @@ -60,9 +60,7 @@ Context :: struct { // init initializes a Context with the specified key. The key SHOULD be // unique and MUST be unpredictable for each invocation. init :: proc(ctx: ^Context, key: []byte) { - if len(key) != KEY_SIZE { - panic("crypto/poly1305: invalid key size") - } + ensure(len(key) == KEY_SIZE, "crypto/poly1305: invalid key size") // r = le_bytes_to_num(key[0..15]) // r = clamp(r) (r &= 0xffffffc0ffffffc0ffffffc0fffffff) @@ -85,7 +83,7 @@ init :: proc(ctx: ^Context, key: []byte) { // update adds more data to the Context. update :: proc(ctx: ^Context, data: []byte) { - assert(ctx._is_initialized) + ensure(ctx._is_initialized) msg := data msg_len := len(data) @@ -124,12 +122,10 @@ update :: proc(ctx: ^Context, data: []byte) { // final finalizes the Context, writes the tag to dst, and calls // reset on the Context. final :: proc(ctx: ^Context, dst: []byte) { - assert(ctx._is_initialized) defer reset(ctx) - if len(dst) != TAG_SIZE { - panic("poly1305: invalid destination tag size") - } + ensure(ctx._is_initialized) + ensure(len(dst) == TAG_SIZE, "poly1305: invalid destination tag size") // Process remaining block if ctx._leftover > 0 { diff --git a/core/crypto/ristretto255/ristretto255.odin b/core/crypto/ristretto255/ristretto255.odin index 7b0944e33..20a002900 100644 --- a/core/crypto/ristretto255/ristretto255.odin +++ b/core/crypto/ristretto255/ristretto255.odin @@ -16,7 +16,7 @@ ELEMENT_SIZE :: 32 // group element. WIDE_ELEMENT_SIZE :: 64 -@(private) +@(private, rodata) FE_NEG_ONE := field.Tight_Field_Element { 2251799813685228, 2251799813685247, @@ -24,7 +24,7 @@ FE_NEG_ONE := field.Tight_Field_Element { 2251799813685247, 2251799813685247, } -@(private) +@(private, rodata) FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element { 278908739862762, 821645201101625, @@ -32,7 +32,7 @@ FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element { 1777959178193151, 2118520810568447, } -@(private) +@(private, rodata) FE_ONE_MINUS_D_SQ := field.Tight_Field_Element { 1136626929484150, 1998550399581263, @@ -40,7 +40,7 @@ FE_ONE_MINUS_D_SQ := field.Tight_Field_Element { 118527312129759, 45110755273534, } -@(private) +@(private, rodata) FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element { 1507062230895904, 1572317787530805, @@ -48,7 +48,7 @@ FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element { 317374165784489, 1572899562415810, } -@(private) +@(private, rodata) FE_SQRT_AD_MINUS_ONE := field.Tight_Field_Element { 2241493124984347, 425987919032274, @@ -76,7 +76,7 @@ ge_clear :: proc "contextless" (ge: ^Group_Element) { // ge_set sets `ge = a`. ge_set :: proc(ge, a: ^Group_Element) { - _ge_assert_initialized([]^Group_Element{a}) + _ge_ensure_initialized([]^Group_Element{a}) grp.ge_set(&ge._p, &a._p) ge._is_initialized = true @@ -199,9 +199,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool { // ge_set_wide_bytes sets ge to the result of deriving a ristretto255 // group element, from a wide (512-bit) byte string. ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) { - if len(b) != WIDE_ELEMENT_SIZE { - panic("crypto/ristretto255: invalid wide input size") - } + ensure(len(b) == WIDE_ELEMENT_SIZE, "crypto/ristretto255: invalid wide input size") // The element derivation function on an input string b proceeds as // follows: @@ -222,10 +220,8 @@ ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) { // ge_bytes sets dst to the canonical encoding of ge. ge_bytes :: proc(ge: ^Group_Element, dst: []byte) { - _ge_assert_initialized([]^Group_Element{ge}) - if len(dst) != ELEMENT_SIZE { - panic("crypto/ristretto255: invalid destination size") - } + _ge_ensure_initialized([]^Group_Element{ge}) + ensure(len(dst) == ELEMENT_SIZE, "crypto/ristretto255: invalid destination size") x0, y0, z0, t0 := &ge._p.x, &ge._p.y, &ge._p.z, &ge._p.t @@ -306,7 +302,7 @@ ge_bytes :: proc(ge: ^Group_Element, dst: []byte) { // ge_add sets `ge = a + b`. ge_add :: proc(ge, a, b: ^Group_Element) { - _ge_assert_initialized([]^Group_Element{a, b}) + _ge_ensure_initialized([]^Group_Element{a, b}) grp.ge_add(&ge._p, &a._p, &b._p) ge._is_initialized = true @@ -314,7 +310,7 @@ ge_add :: proc(ge, a, b: ^Group_Element) { // ge_double sets `ge = a + a`. ge_double :: proc(ge, a: ^Group_Element) { - _ge_assert_initialized([]^Group_Element{a}) + _ge_ensure_initialized([]^Group_Element{a}) grp.ge_double(&ge._p, &a._p) ge._is_initialized = true @@ -322,7 +318,7 @@ ge_double :: proc(ge, a: ^Group_Element) { // ge_negate sets `ge = -a`. ge_negate :: proc(ge, a: ^Group_Element) { - _ge_assert_initialized([]^Group_Element{a}) + _ge_ensure_initialized([]^Group_Element{a}) grp.ge_negate(&ge._p, &a._p) ge._is_initialized = true @@ -330,7 +326,7 @@ ge_negate :: proc(ge, a: ^Group_Element) { // ge_scalarmult sets `ge = A * sc`. ge_scalarmult :: proc(ge, A: ^Group_Element, sc: ^Scalar) { - _ge_assert_initialized([]^Group_Element{A}) + _ge_ensure_initialized([]^Group_Element{A}) grp.ge_scalarmult(&ge._p, &A._p, sc) ge._is_initialized = true @@ -344,7 +340,7 @@ ge_scalarmult_generator :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) // ge_scalarmult_vartime sets `ge = A * sc` in variable time. ge_scalarmult_vartime :: proc(ge, A: ^Group_Element, sc: ^Scalar) { - _ge_assert_initialized([]^Group_Element{A}) + _ge_ensure_initialized([]^Group_Element{A}) grp.ge_scalarmult_vartime(&ge._p, &A._p, sc) ge._is_initialized = true @@ -358,7 +354,7 @@ ge_double_scalarmult_generator_vartime :: proc( A: ^Group_Element, b: ^Scalar, ) { - _ge_assert_initialized([]^Group_Element{A}) + _ge_ensure_initialized([]^Group_Element{A}) grp.ge_double_scalarmult_basepoint_vartime(&ge._p, a, &A._p, b) ge._is_initialized = true @@ -367,7 +363,7 @@ ge_double_scalarmult_generator_vartime :: proc( // ge_cond_negate sets `ge = a` iff `ctrl == 0` and `ge = -a` iff `ctrl == 1`. // Behavior for all other values of ctrl are undefined, ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) { - _ge_assert_initialized([]^Group_Element{a}) + _ge_ensure_initialized([]^Group_Element{a}) grp.ge_cond_negate(&ge._p, &a._p, ctrl) ge._is_initialized = true @@ -376,7 +372,7 @@ ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) { // ge_cond_assign sets `ge = ge` iff `ctrl == 0` and `ge = a` iff `ctrl == 1`. // Behavior for all other values of ctrl are undefined, ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) { - _ge_assert_initialized([]^Group_Element{ge, a}) + _ge_ensure_initialized([]^Group_Element{ge, a}) grp.ge_cond_assign(&ge._p, &a._p, ctrl) } @@ -384,7 +380,7 @@ ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) { // ge_cond_select sets `ge = a` iff `ctrl == 0` and `ge = b` iff `ctrl == 1`. // Behavior for all other values of ctrl are undefined, ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) { - _ge_assert_initialized([]^Group_Element{a, b}) + _ge_ensure_initialized([]^Group_Element{a, b}) grp.ge_cond_select(&ge._p, &a._p, &b._p, ctrl) ge._is_initialized = true @@ -393,7 +389,7 @@ ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) { // ge_equal returns 1 iff `a == b`, and 0 otherwise. @(require_results) ge_equal :: proc(a, b: ^Group_Element) -> int { - _ge_assert_initialized([]^Group_Element{a, b}) + _ge_ensure_initialized([]^Group_Element{a, b}) // CT_EQ(x1 * y2, y1 * x2) | CT_EQ(y1 * y2, x1 * x2) ax_by, ay_bx, ay_by, ax_bx: field.Tight_Field_Element = ---, ---, ---, --- @@ -501,10 +497,8 @@ ge_map :: proc "contextless" (ge: ^Group_Element, b: []byte) { } @(private) -_ge_assert_initialized :: proc(ges: []^Group_Element) { +_ge_ensure_initialized :: proc(ges: []^Group_Element) { for ge in ges { - if !ge._is_initialized { - panic("crypto/ristretto255: uninitialized group element") - } + ensure(ge._is_initialized, "crypto/ristretto255: uninitialized group element") } } diff --git a/core/crypto/ristretto255/ristretto255_scalar.odin b/core/crypto/ristretto255/ristretto255_scalar.odin index 1ecb490e0..75844b3f4 100644 --- a/core/crypto/ristretto255/ristretto255_scalar.odin +++ b/core/crypto/ristretto255/ristretto255_scalar.odin @@ -42,9 +42,7 @@ sc_set_bytes :: proc(sc: ^Scalar, b: []byte) -> bool { // scalar, from a wide (512-bit) byte string by interpreting b as a // little-endian value, and reducing it mod the group order. sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) { - if len(b) != WIDE_SCALAR_SIZE { - panic("crypto/ristretto255: invalid wide input size") - } + ensure(len(b) == WIDE_SCALAR_SIZE, "crypto/ristretto255: invalid wide input size") b_ := (^[WIDE_SCALAR_SIZE]byte)(raw_data(b)) grp.sc_set_bytes_wide(sc, b_) @@ -52,9 +50,7 @@ sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) { // sc_bytes sets dst to the canonical encoding of sc. sc_bytes :: proc(sc: ^Scalar, dst: []byte) { - if len(dst) != SCALAR_SIZE { - panic("crypto/ristretto255: invalid destination size") - } + ensure(len(dst) == SCALAR_SIZE, "crypto/ristretto255: invalid destination size") grp.sc_bytes(dst, sc) } diff --git a/core/crypto/sha2/sha2.odin b/core/crypto/sha2/sha2.odin index 4230851ab..bf9b81601 100644 --- a/core/crypto/sha2/sha2.odin +++ b/core/crypto/sha2/sha2.odin @@ -15,9 +15,9 @@ package sha2 zhibog, dotbmp: Initial implementation. */ -import "core:encoding/endian" +@(require) import "core:encoding/endian" import "core:math/bits" -import "core:mem" +@(require) import "core:mem" // DIGEST_SIZE_224 is the SHA-224 digest size in bytes. DIGEST_SIZE_224 :: 28 @@ -158,7 +158,7 @@ _init :: proc(ctx: ^$T) { // update adds more data to the Context. update :: proc(ctx: ^$T, data: []byte) { - assert(ctx.is_initialized) + ensure(ctx.is_initialized) when T == Context_256 { CURR_BLOCK_SIZE :: BLOCK_SIZE_256 @@ -194,11 +194,8 @@ update :: proc(ctx: ^$T, data: []byte) { // Iff finalize_clone is set, final will work on a copy of the Context, // which is useful for for calculating rolling digests. final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) { - assert(ctx.is_initialized) - - if len(hash) * 8 < ctx.md_bits { - panic("crypto/sha2: invalid destination digest size") - } + ensure(ctx.is_initialized) + ensure(len(hash) * 8 >= ctx.md_bits, "crypto/sha2: invalid destination digest size") ctx := ctx if finalize_clone { @@ -238,7 +235,7 @@ final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) { endian.unchecked_put_u64be(pad[8:], length_lo) update(ctx, pad[0:16]) } - assert(ctx.bitlength == 0) + assert(ctx.bitlength == 0) // Check for bugs when T == Context_256 { for i := 0; i < ctx.md_bits / 32; i += 1 { @@ -270,8 +267,8 @@ reset :: proc(ctx: ^$T) { SHA2 implementation */ -@(private) -sha256_k := [64]u32 { +@(private, rodata) +SHA256_K := [64]u32 { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -290,8 +287,8 @@ sha256_k := [64]u32 { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, } -@(private) -sha512_k := [80]u64 { +@(private, rodata) +SHA512_K := [80]u64 { 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, 0x3956c25bf348b538, 0x59f111f1b605d019, @@ -334,6 +331,11 @@ sha512_k := [80]u64 { 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, } +@(private) +SHA256_ROUNDS :: 64 +@(private) +SHA512_ROUNDS :: 80 + @(private) SHA256_CH :: #force_inline proc "contextless" (x, y, z: u32) -> u32 { return (x & y) ~ (~x & z) @@ -395,22 +397,29 @@ SHA512_F4 :: #force_inline proc "contextless" (x: u64) -> u64 { } @(private) -sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) { +sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) #no_bounds_check { when T == Context_256 { - w: [64]u32 + if is_hardware_accelerated_256() { + sha256_transf_hw(ctx, data) + return + } + + w: [SHA256_ROUNDS]u32 wv: [8]u32 t1, t2: u32 + CURR_BLOCK_SIZE :: BLOCK_SIZE_256 } else when T == Context_512 { - w: [80]u64 + w: [SHA512_ROUNDS]u64 wv: [8]u64 t1, t2: u64 + CURR_BLOCK_SIZE :: BLOCK_SIZE_512 } data := data for len(data) >= CURR_BLOCK_SIZE { - for i := 0; i < 16; i += 1 { + for i in 0 ..< 16 { when T == Context_256 { w[i] = endian.unchecked_get_u32be(data[i * 4:]) } else when T == Context_512 { @@ -419,22 +428,22 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) { } when T == Context_256 { - for i := 16; i < 64; i += 1 { + for i in 16 ..< SHA256_ROUNDS { w[i] = SHA256_F4(w[i - 2]) + w[i - 7] + SHA256_F3(w[i - 15]) + w[i - 16] } } else when T == Context_512 { - for i := 16; i < 80; i += 1 { + for i in 16 ..< SHA512_ROUNDS { w[i] = SHA512_F4(w[i - 2]) + w[i - 7] + SHA512_F3(w[i - 15]) + w[i - 16] } } - for i := 0; i < 8; i += 1 { + for i in 0 ..< 8 { wv[i] = ctx.h[i] } when T == Context_256 { - for i := 0; i < 64; i += 1 { - t1 = wv[7] + SHA256_F2(wv[4]) + SHA256_CH(wv[4], wv[5], wv[6]) + sha256_k[i] + w[i] + for i in 0 ..< SHA256_ROUNDS { + t1 = wv[7] + SHA256_F2(wv[4]) + SHA256_CH(wv[4], wv[5], wv[6]) + SHA256_K[i] + w[i] t2 = SHA256_F1(wv[0]) + SHA256_MAJ(wv[0], wv[1], wv[2]) wv[7] = wv[6] wv[6] = wv[5] @@ -446,8 +455,8 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) { wv[0] = t1 + t2 } } else when T == Context_512 { - for i := 0; i < 80; i += 1 { - t1 = wv[7] + SHA512_F2(wv[4]) + SHA512_CH(wv[4], wv[5], wv[6]) + sha512_k[i] + w[i] + for i in 0 ..< SHA512_ROUNDS { + t1 = wv[7] + SHA512_F2(wv[4]) + SHA512_CH(wv[4], wv[5], wv[6]) + SHA512_K[i] + w[i] t2 = SHA512_F1(wv[0]) + SHA512_MAJ(wv[0], wv[1], wv[2]) wv[7] = wv[6] wv[6] = wv[5] @@ -460,7 +469,7 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) { } } - for i := 0; i < 8; i += 1 { + for i in 0 ..< 8 { ctx.h[i] += wv[i] } diff --git a/core/crypto/sha2/sha2_impl_hw_gen.odin b/core/crypto/sha2/sha2_impl_hw_gen.odin new file mode 100644 index 000000000..85c7f8b28 --- /dev/null +++ b/core/crypto/sha2/sha2_impl_hw_gen.odin @@ -0,0 +1,15 @@ +#+build !amd64 +package sha2 + +@(private = "file") +ERR_HW_NOT_SUPPORTED :: "crypto/sha2: hardware implementation unsupported" + +// is_hardware_accelerated_256 returns true iff hardware accelerated +// SHA-224/SHA-256 is supported. +is_hardware_accelerated_256 :: proc "contextless" () -> bool { + return false +} + +sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} diff --git a/core/crypto/sha2/sha2_impl_hw_intel.odin b/core/crypto/sha2/sha2_impl_hw_intel.odin new file mode 100644 index 000000000..f16f353df --- /dev/null +++ b/core/crypto/sha2/sha2_impl_hw_intel.odin @@ -0,0 +1,260 @@ +#+build amd64 +package sha2 + +// Based on the public domain code by Jeffrey Walton, though +// realistically, there only is one sensible way to write this +// and Intel's whitepaper covers it. +// +// See: https://github.com/noloader/SHA-Intrinsics + +import "base:intrinsics" +import "core:simd" +import "core:simd/x86" +import "core:sys/info" + +@(private = "file") +MASK :: x86.__m128i{0x0405060700010203, 0x0c0d0e0f08090a0b} + +@(private = "file") +K_0 :: simd.u64x2{0x71374491428a2f98, 0xe9b5dba5b5c0fbcf} +@(private = "file") +K_1 :: simd.u64x2{0x59f111f13956c25b, 0xab1c5ed5923f82a4} +@(private = "file") +K_2 :: simd.u64x2{0x12835b01d807aa98, 0x550c7dc3243185be} +@(private = "file") +K_3 :: simd.u64x2{0x80deb1fe72be5d74, 0xc19bf1749bdc06a7} +@(private = "file") +K_4 :: simd.u64x2{0xefbe4786e49b69c1, 0x240ca1cc0fc19dc6} +@(private = "file") +K_5 :: simd.u64x2{0x4a7484aa2de92c6f, 0x76f988da5cb0a9dc} +@(private = "file") +K_6 :: simd.u64x2{0xa831c66d983e5152, 0xbf597fc7b00327c8} +@(private = "file") +K_7 :: simd.u64x2{0xd5a79147c6e00bf3, 0x1429296706ca6351} +@(private = "file") +K_8 :: simd.u64x2{0x2e1b213827b70a85, 0x53380d134d2c6dfc} +@(private = "file") +K_9 :: simd.u64x2{0x766a0abb650a7354, 0x92722c8581c2c92e} +@(private = "file") +K_10 :: simd.u64x2{0xa81a664ba2bfe8a1, 0xc76c51a3c24b8b70} +@(private = "file") +K_11 :: simd.u64x2{0xd6990624d192e819, 0x106aa070f40e3585} +@(private = "file") +K_12 :: simd.u64x2{0x1e376c0819a4c116, 0x34b0bcb52748774c} +@(private = "file") +K_13 :: simd.u64x2{0x4ed8aa4a391c0cb3, 0x682e6ff35b9cca4f} +@(private = "file") +K_14 :: simd.u64x2{0x78a5636f748f82ee, 0x8cc7020884c87814} +@(private = "file") +K_15 :: simd.u64x2{0xa4506ceb90befffa, 0xc67178f2bef9a3f7} + + +// is_hardware_accelerated_256 returns true iff hardware accelerated +// SHA-224/SHA-256 is supported. +is_hardware_accelerated_256 :: proc "contextless" () -> bool { + features, ok := info.cpu_features.? + if !ok { + return false + } + + req_features :: info.CPU_Features{ + .sse2, + .ssse3, + .sse41, + .sha, + } + return features >= req_features +} + +@(private, enable_target_feature="sse2,ssse3,sse4.1,sha") +sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) #no_bounds_check { + // Load the state + tmp := intrinsics.unaligned_load((^x86.__m128i)(&ctx.h[0])) + state_1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx.h[4])) + + tmp = x86._mm_shuffle_epi32(tmp, 0xb1) // CDAB + state_1 = x86._mm_shuffle_epi32(state_1, 0x1b) // EFGH + state_0 := x86._mm_alignr_epi8(tmp, state_1, 8) // ABEF + // state_1 = x86._mm_blend_epi16(state_1, tmp, 0xf0) // CDGH + state_1 = kludge_mm_blend_epi16_0xf0(state_1, tmp) + + data := data + for len(data) >= BLOCK_SIZE_256 { + state_0_save, state_1_save := state_0, state_1 + + // Rounds 0-3 + msg := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data))) + msg_0 := x86._mm_shuffle_epi8(msg, MASK) + msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_0)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + msg = x86._mm_shuffle_epi32(msg, 0xe) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + + // Rounds 4-7 + msg_1 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[16:]))) + msg_1 = x86._mm_shuffle_epi8(msg_1, MASK) + msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_1)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + msg = x86._mm_shuffle_epi32(msg, 0xe) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1) + + // Rounds 8-11 + msg_2 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[32:]))) + msg_2 = x86._mm_shuffle_epi8(msg_2, MASK) + msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_2)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + msg = x86._mm_shuffle_epi32(msg, 0xe) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2) + + // Rounds 12-15 + msg_3 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[48:]))) + msg_3 = x86._mm_shuffle_epi8(msg_3, MASK) + msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_3)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4) + msg_0 = x86._mm_add_epi32(msg_0, tmp) + msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3) + + // Rounds 16-19 + msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_4)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4) + msg_1 = x86._mm_add_epi32(msg_1, tmp) + msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0) + + // Rounds 20-23 + msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_5)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4) + msg_2 = x86._mm_add_epi32(msg_2, tmp) + msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1) + + // Rounds 24-27 + msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_6)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4) + msg_3 = x86._mm_add_epi32(msg_3, tmp) + msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2) + + // Rounds 28-31 + msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_7)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4) + msg_0 = x86._mm_add_epi32(msg_0, tmp) + msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3) + + // Rounds 32-35 + msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_8)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4) + msg_1 = x86._mm_add_epi32(msg_1, tmp) + msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0) + + // Rounds 36-39 + msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_9)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4) + msg_2 = x86._mm_add_epi32(msg_2, tmp) + msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1) + + // Rounds 40-43 + msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_10)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4) + msg_3 = x86._mm_add_epi32(msg_3, tmp) + msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2) + + // Rounds 44-47 + msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_11)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4) + msg_0 = x86._mm_add_epi32(msg_0, tmp) + msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3) + + // Rounds 48-51 + msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_12)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4) + msg_1 = x86._mm_add_epi32(msg_1, tmp) + msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0) + + // Rounds 52-55 + msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_13)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4) + msg_2 = x86._mm_add_epi32(msg_2, tmp) + msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + + /* Rounds 56-59 */ + msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_14)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4) + msg_3 = x86._mm_add_epi32(msg_3, tmp) + msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + + // Rounds 60-63 + msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_15)) + state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg) + msg = x86._mm_shuffle_epi32(msg, 0x0e) + state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg) + + state_0 = x86._mm_add_epi32(state_0, state_0_save) + state_1 = x86._mm_add_epi32(state_1, state_1_save) + + data = data[BLOCK_SIZE_256:] + } + + // Write back the updated state + tmp = x86._mm_shuffle_epi32(state_0, 0x1b) // FEBA + state_1 = x86._mm_shuffle_epi32(state_1, 0xb1) // DCHG + // state_0 = x86._mm_blend_epi16(tmp, state_1, 0xf0) // DCBA + state_0 = kludge_mm_blend_epi16_0xf0(tmp, state_1) + state_1 = x86._mm_alignr_epi8(state_1, tmp, 8) // ABEF + + intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[0]), state_0) + intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[4]), state_1) +} + +@(private = "file") +kludge_mm_blend_epi16_0xf0 :: #force_inline proc "contextless"(a, b: x86.__m128i) -> x86.__m128i { + // HACK HACK HACK: LLVM got rid of `llvm.x86.sse41.pblendw`. + a_ := simd.to_array(a) + b_ := simd.to_array(b) + return x86.__m128i{a_[0], b_[1]} +} diff --git a/core/crypto/siphash/siphash.odin b/core/crypto/siphash/siphash.odin index c145ab3f0..f9fe50cb0 100644 --- a/core/crypto/siphash/siphash.odin +++ b/core/crypto/siphash/siphash.odin @@ -219,18 +219,14 @@ verify_4_8 :: proc { */ init :: proc(ctx: ^Context, key: []byte, c_rounds, d_rounds: int) { - if len(key) != KEY_SIZE { - panic("crypto/siphash; invalid key size") - } + ensure(len(key) == KEY_SIZE,"crypto/siphash; invalid key size") ctx.c_rounds = c_rounds ctx.d_rounds = d_rounds is_valid_setting := (ctx.c_rounds == 1 && ctx.d_rounds == 3) || (ctx.c_rounds == 2 && ctx.d_rounds == 4) || (ctx.c_rounds == 4 && ctx.d_rounds == 8) - if !is_valid_setting { - panic("crypto/siphash: incorrect rounds set up") - } + ensure(is_valid_setting, "crypto/siphash: incorrect rounds set up") ctx.k0 = endian.unchecked_get_u64le(key[:8]) ctx.k1 = endian.unchecked_get_u64le(key[8:]) ctx.v0 = 0x736f6d6570736575 ~ ctx.k0 @@ -245,7 +241,7 @@ init :: proc(ctx: ^Context, key: []byte, c_rounds, d_rounds: int) { } update :: proc(ctx: ^Context, data: []byte) { - assert(ctx.is_initialized, "crypto/siphash: context is not initialized") + ensure(ctx.is_initialized) data := data ctx.total_length += len(data) @@ -269,7 +265,7 @@ update :: proc(ctx: ^Context, data: []byte) { } final :: proc(ctx: ^Context, dst: ^u64) { - assert(ctx.is_initialized, "crypto/siphash: context is not initialized") + ensure(ctx.is_initialized) tmp: [BLOCK_SIZE]byte copy(tmp[:], ctx.buf[:ctx.last_block]) @@ -336,9 +332,8 @@ _get_byte :: #force_inline proc "contextless" (byte_num: byte, into: u64) -> byt @(private) _collect_output :: #force_inline proc(dst: []byte, hash: u64) { - if len(dst) < DIGEST_SIZE { - panic("crypto/siphash: invalid tag size") - } + ensure(len(dst) >= DIGEST_SIZE, "crypto/siphash: invalid tag size") + dst[0] = _get_byte(7, hash) dst[1] = _get_byte(6, hash) dst[2] = _get_byte(5, hash) diff --git a/core/crypto/sm3/sm3.odin b/core/crypto/sm3/sm3.odin index f910d735b..6487c5e8c 100644 --- a/core/crypto/sm3/sm3.odin +++ b/core/crypto/sm3/sm3.odin @@ -53,7 +53,7 @@ init :: proc(ctx: ^Context) { // update adds more data to the Context. update :: proc(ctx: ^Context, data: []byte) { - assert(ctx.is_initialized) + ensure(ctx.is_initialized) data := data ctx.length += u64(len(data)) @@ -83,11 +83,8 @@ update :: proc(ctx: ^Context, data: []byte) { // Iff finalize_clone is set, final will work on a copy of the Context, // which is useful for for calculating rolling digests. final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) { - assert(ctx.is_initialized) - - if len(hash) < DIGEST_SIZE { - panic("crypto/sm3: invalid destination digest size") - } + ensure(ctx.is_initialized) + ensure(len(hash) >= DIGEST_SIZE, "crypto/sm3: invalid destination digest size") ctx := ctx if finalize_clone { @@ -110,7 +107,7 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) { length <<= 3 endian.unchecked_put_u64be(pad[:], length) update(ctx, pad[0:8]) - assert(ctx.bitlength == 0) + assert(ctx.bitlength == 0) // Check for bugs for i := 0; i < DIGEST_SIZE / 4; i += 1 { endian.unchecked_put_u32be(hash[i * 4:], ctx.state[i]) @@ -136,7 +133,7 @@ reset :: proc(ctx: ^Context) { SM3 implementation */ -@(private) +@(private, rodata) IV := [8]u32 { 0x7380166f, 0x4914b2b9, 0x172442d7, 0xda8a0600, 0xa96f30bc, 0x163138aa, 0xe38dee4d, 0xb0fb0e4e, diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin index 412a767b8..6805c3ff8 100644 --- a/core/crypto/x25519/x25519.odin +++ b/core/crypto/x25519/x25519.odin @@ -15,7 +15,7 @@ SCALAR_SIZE :: 32 // POINT_SIZE is the size of a X25519 point (public key/shared secret) in bytes. POINT_SIZE :: 32 -@(private) +@(private, rodata) _BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} @(private) @@ -101,15 +101,9 @@ _scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) { // scalarmult "multiplies" the provided scalar and point, and writes the // resulting point to dst. scalarmult :: proc(dst, scalar, point: []byte) { - if len(scalar) != SCALAR_SIZE { - panic("crypto/x25519: invalid scalar size") - } - if len(point) != POINT_SIZE { - panic("crypto/x25519: invalid point size") - } - if len(dst) != POINT_SIZE { - panic("crypto/x25519: invalid destination point size") - } + ensure(len(scalar) == SCALAR_SIZE, "crypto/x25519: invalid scalar size") + ensure(len(point) == POINT_SIZE, "crypto/x25519: invalid point size") + ensure(len(dst) == POINT_SIZE, "crypto/x25519: invalid destination point size") // "clamp" the scalar e: [32]byte = --- diff --git a/core/crypto/x448/x448.odin b/core/crypto/x448/x448.odin new file mode 100644 index 000000000..43c5d25e0 --- /dev/null +++ b/core/crypto/x448/x448.odin @@ -0,0 +1,155 @@ +/* +package x448 implements the X448 (aka curve448) Elliptic-Curve +Diffie-Hellman key exchange protocol. + +See: +- [[ https://www.rfc-editor.org/rfc/rfc7748 ]] +*/ +package x448 + +import field "core:crypto/_fiat/field_curve448" +import "core:mem" + +// SCALAR_SIZE is the size of a X448 scalar (private key) in bytes. +SCALAR_SIZE :: 56 +// POINT_SIZE is the size of a X448 point (public key/shared secret) in bytes. +POINT_SIZE :: 56 + +@(private, rodata) +_BASE_POINT: [56]byte = { + 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +} + +@(private) +_scalar_bit :: #force_inline proc "contextless" (s: ^[56]byte, i: int) -> u8 { + if i < 0 { + return 0 + } + return (s[i >> 3] >> uint(i & 7)) & 1 +} + +@(private) +_scalarmult :: proc "contextless" (out, scalar, point: ^[56]byte) { + // Montgomery pseudo-multiplication, using the RFC 7748 formula. + t1, t2: field.Loose_Field_Element = ---, --- + + // x_1 = u + // x_2 = 1 + // z_2 = 0 + // x_3 = u + // z_3 = 1 + x1: field.Tight_Field_Element = --- + field.fe_from_bytes(&x1, point) + + x2, x3, z2, z3: field.Tight_Field_Element = ---, ---, ---, --- + field.fe_one(&x2) + field.fe_zero(&z2) + field.fe_set(&x3, &x1) + field.fe_one(&z3) + + // swap = 0 + swap: int + + // For t = bits-1 down to 0:a + for t := 448 - 1; t >= 0; t -= 1 { + // k_t = (k >> t) & 1 + k_t := int(_scalar_bit(scalar, t)) + // swap ^= k_t + swap ~= k_t + // Conditional swap; see text below. + // (x_2, x_3) = cswap(swap, x_2, x_3) + field.fe_cond_swap(&x2, &x3, swap) + // (z_2, z_3) = cswap(swap, z_2, z_3) + field.fe_cond_swap(&z2, &z3, swap) + // swap = k_t + swap = k_t + + // Note: This deliberately omits reductions after add/sub operations + // if the result is only ever used as the input to a mul/square since + // the implementations of those can deal with non-reduced inputs. + // + // fe_tighten_cast is only used to store a fully reduced + // output in a Loose_Field_Element, or to provide such a + // Loose_Field_Element as a Tight_Field_Element argument. + + // A = x_2 + z_2 + field.fe_add(&t1, &x2, &z2) + // B = x_2 - z_2 + field.fe_sub(&t2, &x2, &z2) + // D = x_3 - z_3 + field.fe_sub(field.fe_relax_cast(&z2), &x3, &z3) // (z2 unreduced) + // DA = D * A + field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1) + // C = x_3 + z_3 + field.fe_add(field.fe_relax_cast(&z3), &x3, &z3) // (z3 unreduced) + // CB = C * B + field.fe_carry_mul(&x3, &t2, field.fe_relax_cast(&z3)) + // z_3 = x_1 * (DA - CB)^2 + field.fe_sub(field.fe_relax_cast(&z3), &x2, &x3) // (z3 unreduced) + field.fe_carry_square(&z3, field.fe_relax_cast(&z3)) + field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z3)) + // x_3 = (DA + CB)^2 + field.fe_add(field.fe_relax_cast(&z2), &x2, &x3) // (z2 unreduced) + field.fe_carry_square(&x3, field.fe_relax_cast(&z2)) + + // AA = A^2 + field.fe_carry_square(&z2, &t1) + // BB = B^2 + field.fe_carry_square(field.fe_tighten_cast(&t1), &t2) // (t1 reduced) + // x_2 = AA * BB + field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1) + // E = AA - BB + field.fe_sub(&t2, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced) + // z_2 = E * (AA + a24 * E) + field.fe_carry_mul_small(field.fe_tighten_cast(&t1), &t2, 39081) // (t1 reduced) + field.fe_add(&t1, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced) + field.fe_carry_mul(&z2, &t2, &t1) + } + + // Conditional swap; see text below. + // (x_2, x_3) = cswap(swap, x_2, x_3) + field.fe_cond_swap(&x2, &x3, swap) + // (z_2, z_3) = cswap(swap, z_2, z_3) + field.fe_cond_swap(&z2, &z3, swap) + + // Return x_2 * (z_2^(p - 2)) + field.fe_carry_inv(&z2, field.fe_relax_cast(&z2)) + field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2)) + field.fe_to_bytes(out, &x2) + + field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3}) + field.fe_clear_vec([]^field.Loose_Field_Element{&t1, &t2}) +} + +// scalarmult "multiplies" the provided scalar and point, and writes the +// resulting point to dst. +scalarmult :: proc(dst, scalar, point: []byte) { + ensure(len(scalar) == SCALAR_SIZE, "crypto/x448: invalid scalar size") + ensure(len(point) == POINT_SIZE, "crypto/x448: invalid point size") + ensure(len(dst) == POINT_SIZE, "crypto/x448: invalid destination point size") + + // "clamp" the scalar + e: [56]byte = --- + copy_slice(e[:], scalar) + e[0] &= 252 + e[55] |= 128 + + p: [56]byte = --- + copy_slice(p[:], point) + + d: [56]byte = --- + _scalarmult(&d, &e, &p) + copy_slice(dst, d[:]) + + mem.zero_explicit(&e, size_of(e)) + mem.zero_explicit(&d, size_of(d)) +} + +// scalarmult_basepoint "multiplies" the provided scalar with the X448 +// base point and writes the resulting point to dst. +scalarmult_basepoint :: proc(dst, scalar: []byte) { + scalarmult(dst, scalar, _BASE_POINT[:]) +} diff --git a/core/debug/trace/trace_windows.odin b/core/debug/trace/trace_windows.odin index c9868e338..96507714c 100644 --- a/core/debug/trace/trace_windows.odin +++ b/core/debug/trace/trace_windows.odin @@ -49,7 +49,9 @@ _resolve :: proc(ctx: ^Context, frame: Frame, allocator: runtime.Allocator) -> ( data: [size_of(win32.SYMBOL_INFOW) + size_of([256]win32.WCHAR)]byte symbol := (^win32.SYMBOL_INFOW)(&data[0]) - symbol.SizeOfStruct = size_of(symbol) + // The value of SizeOfStruct must be the size of the whole struct, + // not just the size of the pointer + symbol.SizeOfStruct = size_of(symbol^) symbol.MaxNameLen = 255 if win32.SymFromAddrW(ctx.impl.hProcess, win32.DWORD64(frame), &{}, symbol) { fl.procedure, _ = win32.wstring_to_utf8(&symbol.Name[0], -1, allocator) diff --git a/core/mem/alloc.odin b/core/mem/alloc.odin index 6dcfb7888..48cc39245 100644 --- a/core/mem/alloc.odin +++ b/core/mem/alloc.odin @@ -888,6 +888,34 @@ make_aligned :: proc( return runtime.make_aligned(T, len, alignment, allocator, loc) } + +/* +Allocate a new slice with alignment for allocators that might not support the +specified alignment requirement. + +This procedure allocates a new slice of type `T` with length `len`, aligned +on a boundary specified by `alignment` from an allocator specified by +`allocator`, and returns the allocated slice. + +The user should `delete` the return `original_data` slice not the typed `slice`. +*/ +@(require_results) +make_over_aligned :: proc( + $T: typeid/[]$E, + #any_int len: int, + alignment: int, + allocator: runtime.Allocator, + loc := #caller_location, +) -> (slice: T, original_data: []byte, err: Allocator_Error) { + size := size_of(E)*len + alignment-1 + original_data, err = runtime.make([]byte, size, allocator, loc) + if err == nil { + ptr := align_forward(raw_data(original_data), uintptr(alignment)) + slice = ([^]E)(ptr)[:len] + } + return +} + /* Allocate a new slice. diff --git a/core/mem/mem.odin b/core/mem/mem.odin index b2a7158a1..5789309f7 100644 --- a/core/mem/mem.odin +++ b/core/mem/mem.odin @@ -171,16 +171,15 @@ If the return value is: The comparison is performed as follows: 1. Each byte, upto `min(len(a), len(b))` bytes is compared between `a` and `b`. - - If the byte in slice `a` is smaller than a byte in slice `b`, then comparison - stops and this procedure returns `-1`. - - If the byte in slice `a` is bigger than a byte in slice `b`, then comparison - stops and this procedure returns `+1`. - - Otherwise the comparison continues until `min(len(a), len(b))` are compared. -2. If all the bytes in the range are equal, then the lengths of the slices are - compared. - - If the length of slice `a` is smaller than the length of slice `b`, then `-1` is returned. - - If the length of slice `b` is smaller than the length of slice `b`, then `+1` is returned. - - Otherwise `0` is returned. + - If the byte in slice `a` is smaller than a byte in slice `b`, then comparison + stops and this procedure returns `-1`. + - If the byte in slice `a` is bigger than a byte in slice `b`, then comparison + stops and this procedure returns `+1`. + - Otherwise the comparison continues until `min(len(a), len(b))` are compared. +2. If all the bytes in the range are equal, then the lengths of the slices are compared. + - If the length of slice `a` is smaller than the length of slice `b`, then `-1` is returned. + - If the length of slice `b` is smaller than the length of slice `b`, then `+1` is returned. + - Otherwise `0` is returned. */ @(require_results) compare :: proc "contextless" (a, b: []byte) -> int { @@ -207,11 +206,11 @@ If the return value is: The comparison is performed as follows: 1. Each byte, upto `n` bytes is compared between `a` and `b`. - - If the byte in `a` is smaller than a byte in `b`, then comparison stops - and this procedure returns `-1`. - - If the byte in `a` is bigger than a byte in `b`, then comparison stops - and this procedure returns `+1`. - - Otherwise the comparison continues until `n` bytes are compared. + - If the byte in `a` is smaller than a byte in `b`, then comparison stops + and this procedure returns `-1`. + - If the byte in `a` is bigger than a byte in `b`, then comparison stops + and this procedure returns `+1`. + - Otherwise the comparison continues until `n` bytes are compared. 2. If all the bytes in the range are equal, this procedure returns `0`. */ @(require_results) @@ -233,11 +232,11 @@ If the return value is: The comparison is performed as follows: 1. Each byte, upto `n` bytes is compared between `a` and `b`. - - If the byte in `a` is smaller than a byte in `b`, then comparison stops - and this procedure returns `-1`. - - If the byte in `a` is bigger than a byte in `b`, then comparison stops - and this procedure returns `+1`. - - Otherwise the comparison continues until `n` bytes are compared. + - If the byte in `a` is smaller than a byte in `b`, then comparison stops + and this procedure returns `-1`. + - If the byte in `a` is bigger than a byte in `b`, then comparison stops + and this procedure returns `+1`. + - Otherwise the comparison continues until `n` bytes are compared. 2. If all the bytes in the range are equal, this procedure returns `0`. */ @(require_results) diff --git a/core/net/errors_darwin.odin b/core/net/errors_darwin.odin index 2905b44bc..ccf1e0f7f 100644 --- a/core/net/errors_darwin.odin +++ b/core/net/errors_darwin.odin @@ -21,188 +21,191 @@ package net */ import "core:c" -import "core:os" +import "core:sys/posix" + +@(private) +ESHUTDOWN :: 58 Create_Socket_Error :: enum c.int { None = 0, - Family_Not_Supported_For_This_Socket = c.int(os.EAFNOSUPPORT), - No_Socket_Descriptors_Available = c.int(os.EMFILE), - No_Buffer_Space_Available = c.int(os.ENOBUFS), - No_Memory_Available_Available = c.int(os.ENOMEM), - Protocol_Unsupported_By_System = c.int(os.EPROTONOSUPPORT), - Wrong_Protocol_For_Socket = c.int(os.EPROTONOSUPPORT), - Family_And_Socket_Type_Mismatch = c.int(os.EPROTONOSUPPORT), + Family_Not_Supported_For_This_Socket = c.int(posix.EAFNOSUPPORT), + No_Socket_Descriptors_Available = c.int(posix.EMFILE), + No_Buffer_Space_Available = c.int(posix.ENOBUFS), + No_Memory_Available = c.int(posix.ENOMEM), + Protocol_Unsupported_By_System = c.int(posix.EPROTONOSUPPORT), + Wrong_Protocol_For_Socket = c.int(posix.EPROTONOSUPPORT), + Family_And_Socket_Type_Mismatch = c.int(posix.EPROTONOSUPPORT), } Dial_Error :: enum c.int { None = 0, Port_Required = -1, // Attempted to dial an endpointing without a port being set. - Address_In_Use = c.int(os.EADDRINUSE), - In_Progress = c.int(os.EINPROGRESS), - Cannot_Use_Any_Address = c.int(os.EADDRNOTAVAIL), - Wrong_Family_For_Socket = c.int(os.EAFNOSUPPORT), - Refused = c.int(os.ECONNREFUSED), - Is_Listening_Socket = c.int(os.EACCES), - Already_Connected = c.int(os.EISCONN), - Network_Unreachable = c.int(os.ENETUNREACH), // Device is offline - Host_Unreachable = c.int(os.EHOSTUNREACH), // Remote host cannot be reached - No_Buffer_Space_Available = c.int(os.ENOBUFS), - Not_Socket = c.int(os.ENOTSOCK), - Timeout = c.int(os.ETIMEDOUT), + Address_In_Use = c.int(posix.EADDRINUSE), + In_Progress = c.int(posix.EINPROGRESS), + Cannot_Use_Any_Address = c.int(posix.EADDRNOTAVAIL), + Wrong_Family_For_Socket = c.int(posix.EAFNOSUPPORT), + Refused = c.int(posix.ECONNREFUSED), + Is_Listening_Socket = c.int(posix.EACCES), + Already_Connected = c.int(posix.EISCONN), + Network_Unreachable = c.int(posix.ENETUNREACH), // Device is offline + Host_Unreachable = c.int(posix.EHOSTUNREACH), // Remote host cannot be reached + No_Buffer_Space_Available = c.int(posix.ENOBUFS), + Not_Socket = c.int(posix.ENOTSOCK), + Timeout = c.int(posix.ETIMEDOUT), // TODO: we may need special handling for this; maybe make a socket a struct with metadata? - Would_Block = c.int(os.EWOULDBLOCK), + Would_Block = c.int(posix.EWOULDBLOCK), } Bind_Error :: enum c.int { None = 0, Privileged_Port_Without_Root = -1, // Attempted to bind to a port less than 1024 without root access. - Address_In_Use = c.int(os.EADDRINUSE), // Another application is currently bound to this endpoint. - Given_Nonlocal_Address = c.int(os.EADDRNOTAVAIL), // The address is not a local address on this machine. - Broadcast_Disabled = c.int(os.EACCES), // To bind a UDP socket to the broadcast address, the appropriate socket option must be set. - Address_Family_Mismatch = c.int(os.EFAULT), // The address family of the address does not match that of the socket. - Already_Bound = c.int(os.EINVAL), // The socket is already bound to an address. - No_Ports_Available = c.int(os.ENOBUFS), // There are not enough ephemeral ports available. + Address_In_Use = c.int(posix.EADDRINUSE), // Another application is currently bound to this endpoint. + Given_Nonlocal_Address = c.int(posix.EADDRNOTAVAIL), // The address is not a local address on this machine. + Broadcast_Disabled = c.int(posix.EACCES), // To bind a UDP socket to the broadcast address, the appropriate socket option must be set. + Address_Family_Mismatch = c.int(posix.EFAULT), // The address family of the address does not match that of the socket. + Already_Bound = c.int(posix.EINVAL), // The socket is already bound to an address. + No_Ports_Available = c.int(posix.ENOBUFS), // There are not enough ephemeral ports available. } Listen_Error :: enum c.int { None = 0, - Address_In_Use = c.int(os.EADDRINUSE), - Already_Connected = c.int(os.EISCONN), - No_Socket_Descriptors_Available = c.int(os.EMFILE), - No_Buffer_Space_Available = c.int(os.ENOBUFS), - Nonlocal_Address = c.int(os.EADDRNOTAVAIL), - Not_Socket = c.int(os.ENOTSOCK), - Listening_Not_Supported_For_This_Socket = c.int(os.EOPNOTSUPP), + Address_In_Use = c.int(posix.EADDRINUSE), + Already_Connected = c.int(posix.EISCONN), + No_Socket_Descriptors_Available = c.int(posix.EMFILE), + No_Buffer_Space_Available = c.int(posix.ENOBUFS), + Nonlocal_Address = c.int(posix.EADDRNOTAVAIL), + Not_Socket = c.int(posix.ENOTSOCK), + Listening_Not_Supported_For_This_Socket = c.int(posix.EOPNOTSUPP), } Accept_Error :: enum c.int { None = 0, // TODO(tetra): Is this error actually possible here? Or is like Linux, in which case we can remove it. - Reset = c.int(os.ECONNRESET), - Not_Listening = c.int(os.EINVAL), - No_Socket_Descriptors_Available_For_Client_Socket = c.int(os.EMFILE), - No_Buffer_Space_Available = c.int(os.ENOBUFS), - Not_Socket = c.int(os.ENOTSOCK), - Not_Connection_Oriented_Socket = c.int(os.EOPNOTSUPP), + Reset = c.int(posix.ECONNRESET), + Not_Listening = c.int(posix.EINVAL), + No_Socket_Descriptors_Available_For_Client_Socket = c.int(posix.EMFILE), + No_Buffer_Space_Available = c.int(posix.ENOBUFS), + Not_Socket = c.int(posix.ENOTSOCK), + Not_Connection_Oriented_Socket = c.int(posix.EOPNOTSUPP), // TODO: we may need special handling for this; maybe make a socket a struct with metadata? - Would_Block = c.int(os.EWOULDBLOCK), + Would_Block = c.int(posix.EWOULDBLOCK), } TCP_Recv_Error :: enum c.int { None = 0, - Shutdown = c.int(os.ESHUTDOWN), - Not_Connected = c.int(os.ENOTCONN), + Shutdown = ESHUTDOWN, + Not_Connected = c.int(posix.ENOTCONN), // TODO(tetra): Is this error actually possible here? - Connection_Broken = c.int(os.ENETRESET), - Not_Socket = c.int(os.ENOTSOCK), - Aborted = c.int(os.ECONNABORTED), + Connection_Broken = c.int(posix.ENETRESET), + Not_Socket = c.int(posix.ENOTSOCK), + Aborted = c.int(posix.ECONNABORTED), // TODO(tetra): Determine when this is different from the syscall returning n=0 and maybe normalize them? - Connection_Closed = c.int(os.ECONNRESET), - Offline = c.int(os.ENETDOWN), - Host_Unreachable = c.int(os.EHOSTUNREACH), - Interrupted = c.int(os.EINTR), + Connection_Closed = c.int(posix.ECONNRESET), + Offline = c.int(posix.ENETDOWN), + Host_Unreachable = c.int(posix.EHOSTUNREACH), + Interrupted = c.int(posix.EINTR), // NOTE: No, really. Presumably this means something different for nonblocking sockets... - Timeout = c.int(os.EWOULDBLOCK), + Timeout = c.int(posix.EWOULDBLOCK), } UDP_Recv_Error :: enum c.int { None = 0, - Buffer_Too_Small = c.int(os.EMSGSIZE), // The buffer is too small to fit the entire message, and the message was truncated. When this happens, the rest of message is lost. - Not_Socket = c.int(os.ENOTSOCK), // The so-called socket is not an open socket. - Not_Descriptor = c.int(os.EBADF), // The so-called socket is, in fact, not even a valid descriptor. - Bad_Buffer = c.int(os.EFAULT), // The buffer did not point to a valid location in memory. - Interrupted = c.int(os.EINTR), // A signal occurred before any data was transmitted. See signal(7). + Buffer_Too_Small = c.int(posix.EMSGSIZE), // The buffer is too small to fit the entire message, and the message was truncated. When this happens, the rest of message is lost. + Not_Socket = c.int(posix.ENOTSOCK), // The so-called socket is not an open socket. + Not_Descriptor = c.int(posix.EBADF), // The so-called socket is, in fact, not even a valid descriptor. + Bad_Buffer = c.int(posix.EFAULT), // The buffer did not point to a valid location in memory. + Interrupted = c.int(posix.EINTR), // A signal occurred before any data was transmitted. See signal(7). // The send timeout duration passed before all data was sent. See Socket_Option.Send_Timeout. // NOTE: No, really. Presumably this means something different for nonblocking sockets... - Timeout = c.int(os.EWOULDBLOCK), - Socket_Not_Bound = c.int(os.EINVAL), // The socket must be bound for this operation, but isn't. + Timeout = c.int(posix.EWOULDBLOCK), + Socket_Not_Bound = c.int(posix.EINVAL), // The socket must be bound for this operation, but isn't. } TCP_Send_Error :: enum c.int { None = 0, - Aborted = c.int(os.ECONNABORTED), - Connection_Closed = c.int(os.ECONNRESET), - Not_Connected = c.int(os.ENOTCONN), - Shutdown = c.int(os.ESHUTDOWN), + Aborted = c.int(posix.ECONNABORTED), + Connection_Closed = c.int(posix.ECONNRESET), + Not_Connected = c.int(posix.ENOTCONN), + Shutdown = ESHUTDOWN, // The send queue was full. // This is usually a transient issue. // // This also shouldn't normally happen on Linux, as data is dropped if it // doesn't fit in the send queue. - No_Buffer_Space_Available = c.int(os.ENOBUFS), - Offline = c.int(os.ENETDOWN), - Host_Unreachable = c.int(os.EHOSTUNREACH), - Interrupted = c.int(os.EINTR), // A signal occurred before any data was transmitted. See signal(7). + No_Buffer_Space_Available = c.int(posix.ENOBUFS), + Offline = c.int(posix.ENETDOWN), + Host_Unreachable = c.int(posix.EHOSTUNREACH), + Interrupted = c.int(posix.EINTR), // A signal occurred before any data was transmitted. See signal(7). // NOTE: No, really. Presumably this means something different for nonblocking sockets... // The send timeout duration passed before all data was sent. See Socket_Option.Send_Timeout. - Timeout = c.int(os.EWOULDBLOCK), - Not_Socket = c.int(os.ENOTSOCK), // The so-called socket is not an open socket. + Timeout = c.int(posix.EWOULDBLOCK), + Not_Socket = c.int(posix.ENOTSOCK), // The so-called socket is not an open socket. } // TODO UDP_Send_Error :: enum c.int { None = 0, - Message_Too_Long = c.int(os.EMSGSIZE), // The message is larger than the maximum UDP packet size. No data was sent. + Message_Too_Long = c.int(posix.EMSGSIZE), // The message is larger than the maximum UDP packet size. No data was sent. // TODO: not sure what the exact circumstances for this is yet - Network_Unreachable = c.int(os.ENETUNREACH), - No_Outbound_Ports_Available = c.int(os.EAGAIN), // There are no more emphemeral outbound ports available to bind the socket to, in order to send. + Network_Unreachable = c.int(posix.ENETUNREACH), + No_Outbound_Ports_Available = c.int(posix.EAGAIN), // There are no more emphemeral outbound ports available to bind the socket to, in order to send. // The send timeout duration passed before all data was sent. See Socket_Option.Send_Timeout. // NOTE: No, really. Presumably this means something different for nonblocking sockets... - Timeout = c.int(os.EWOULDBLOCK), - Not_Socket = c.int(os.ENOTSOCK), // The so-called socket is not an open socket. - Not_Descriptor = c.int(os.EBADF), // The so-called socket is, in fact, not even a valid descriptor. - Bad_Buffer = c.int(os.EFAULT), // The buffer did not point to a valid location in memory. - Interrupted = c.int(os.EINTR), // A signal occurred before any data was transmitted. See signal(7). + Timeout = c.int(posix.EWOULDBLOCK), + Not_Socket = c.int(posix.ENOTSOCK), // The so-called socket is not an open socket. + Not_Descriptor = c.int(posix.EBADF), // The so-called socket is, in fact, not even a valid descriptor. + Bad_Buffer = c.int(posix.EFAULT), // The buffer did not point to a valid location in memory. + Interrupted = c.int(posix.EINTR), // A signal occurred before any data was transmitted. See signal(7). // The send queue was full. // This is usually a transient issue. // // This also shouldn't normally happen on Linux, as data is dropped if it // doesn't fit in the send queue. - No_Buffer_Space_Available = c.int(os.ENOBUFS), - No_Memory_Available = c.int(os.ENOMEM), // No memory was available to properly manage the send queue. + No_Buffer_Space_Available = c.int(posix.ENOBUFS), + No_Memory_Available = c.int(posix.ENOMEM), // No memory was available to properly manage the send queue. } Shutdown_Manner :: enum c.int { - Receive = c.int(os.SHUT_RD), - Send = c.int(os.SHUT_WR), - Both = c.int(os.SHUT_RDWR), + Receive = c.int(posix.SHUT_RD), + Send = c.int(posix.SHUT_WR), + Both = c.int(posix.SHUT_RDWR), } Shutdown_Error :: enum c.int { None = 0, - Aborted = c.int(os.ECONNABORTED), - Reset = c.int(os.ECONNRESET), - Offline = c.int(os.ENETDOWN), - Not_Connected = c.int(os.ENOTCONN), - Not_Socket = c.int(os.ENOTSOCK), - Invalid_Manner = c.int(os.EINVAL), + Aborted = c.int(posix.ECONNABORTED), + Reset = c.int(posix.ECONNRESET), + Offline = c.int(posix.ENETDOWN), + Not_Connected = c.int(posix.ENOTCONN), + Not_Socket = c.int(posix.ENOTSOCK), + Invalid_Manner = c.int(posix.EINVAL), } Socket_Option_Error :: enum c.int { None = 0, - Offline = c.int(os.ENETDOWN), - Timeout_When_Keepalive_Set = c.int(os.ENETRESET), - Invalid_Option_For_Socket = c.int(os.ENOPROTOOPT), - Reset_When_Keepalive_Set = c.int(os.ENOTCONN), - Not_Socket = c.int(os.ENOTSOCK), + Offline = c.int(posix.ENETDOWN), + Timeout_When_Keepalive_Set = c.int(posix.ENETRESET), + Invalid_Option_For_Socket = c.int(posix.ENOPROTOOPT), + Reset_When_Keepalive_Set = c.int(posix.ENOTCONN), + Not_Socket = c.int(posix.ENOTSOCK), } Set_Blocking_Error :: enum c.int { None = 0, // TODO: Add errors for `set_blocking` -} \ No newline at end of file +} diff --git a/core/net/interface_darwin.odin b/core/net/interface_darwin.odin index 4921bc3fe..9aa6cbd1a 100644 --- a/core/net/interface_darwin.odin +++ b/core/net/interface_darwin.odin @@ -20,60 +20,57 @@ package net Feoramund: FreeBSD platform code */ -import "core:os" import "core:strings" +import "core:sys/posix" + +foreign import lib "system:System.framework" @(private) _enumerate_interfaces :: proc(allocator := context.allocator) -> (interfaces: []Network_Interface, err: Network_Error) { context.allocator = allocator - head: ^os.ifaddrs - - if res := os._getifaddrs(&head); res < 0 { + head: ^ifaddrs + if getifaddrs(&head) != .OK { return {}, .Unable_To_Enumerate_Network_Interfaces } + defer freeifaddrs(head) - /* - Unlike Windows, *nix regrettably doesn't return all it knows about an interface in one big struct. - We're going to have to iterate over a list and coalesce information as we go. - */ - ifaces: map[string]^Network_Interface + ifaces: map[string]Network_Interface defer delete(ifaces) for ifaddr := head; ifaddr != nil; ifaddr = ifaddr.next { adapter_name := string(ifaddr.name) - /* - Check if we have seen this interface name before so we can reuse the `Network_Interface`. - Else, create a new one. - */ - if adapter_name not_in ifaces { - ifaces[adapter_name] = new(Network_Interface) - ifaces[adapter_name].adapter_name = strings.clone(adapter_name) + key_ptr, iface, inserted, mem_err := map_entry(&ifaces, adapter_name) + if mem_err == nil && inserted { + key_ptr^, mem_err = strings.clone(adapter_name) + iface.adapter_name = key_ptr^ + } + if mem_err != nil { + return {}, .Unable_To_Enumerate_Network_Interfaces } - iface := ifaces[adapter_name] address: Address netmask: Netmask - if ifaddr.address != nil { - switch int(ifaddr.address.family) { - case os.AF_INET, os.AF_INET6: - address = _sockaddr_basic_to_endpoint(ifaddr.address).address + if ifaddr.addr != nil { + #partial switch ifaddr.addr.sa_family { + case .INET, .INET6: + address = _sockaddr_basic_to_endpoint(ifaddr.addr).address } } if ifaddr.netmask != nil { - switch int(ifaddr.netmask.family) { - case os.AF_INET, os.AF_INET6: + #partial switch ifaddr.netmask.sa_family { + case .INET, .INET6: netmask = Netmask(_sockaddr_basic_to_endpoint(ifaddr.netmask).address) } } - if ifaddr.broadcast_or_dest != nil && .BROADCAST in ifaddr.flags { - switch int(ifaddr.broadcast_or_dest.family) { - case os.AF_INET, os.AF_INET6: - broadcast := _sockaddr_basic_to_endpoint(ifaddr.broadcast_or_dest).address + if ifaddr.dstaddr != nil && .BROADCAST in ifaddr.flags { + #partial switch ifaddr.dstaddr.sa_family { + case .INET, .INET6: + broadcast := _sockaddr_basic_to_endpoint(ifaddr.dstaddr).address append(&iface.multicast, broadcast) } } @@ -105,18 +102,51 @@ _enumerate_interfaces :: proc(allocator := context.allocator) -> (interfaces: [] iface.link.state = state } - /* - Free the OS structures. - */ - os._freeifaddrs(head) - - /* - Turn the map into a slice to return. - */ - _interfaces := make([dynamic]Network_Interface, 0, allocator) + interfaces = make([]Network_Interface, len(ifaces)) + i: int for _, iface in ifaces { - append(&_interfaces, iface^) - free(iface) + interfaces[i] = iface + i += 1 } - return _interfaces[:], {} + return interfaces, nil +} + +@(private) +IF_Flag :: enum u32 { + UP, + BROADCAST, + DEBUG, + LOOPBACK, + POINTTOPOINT, + NOTRAILERS, + RUNNING, + NOARP, + PROMISC, + ALLMULTI, + OACTIVE, + SIMPLEX, + LINK0, + LINK1, + LINK2, + MULTICAST, +} + +@(private) +IF_Flags :: bit_set[IF_Flag; u32] + +@(private) +ifaddrs :: struct { + next: ^ifaddrs, + name: cstring, + flags: IF_Flags, + addr: ^posix.sockaddr, + netmask: ^posix.sockaddr, + dstaddr: ^posix.sockaddr, + data: rawptr, +} + +@(private) +foreign lib { + getifaddrs :: proc(ifap: ^^ifaddrs) -> posix.result --- + freeifaddrs :: proc(ifp: ^ifaddrs) --- } diff --git a/core/net/socket_darwin.odin b/core/net/socket_darwin.odin index 27927e973..a132a6a95 100644 --- a/core/net/socket_darwin.odin +++ b/core/net/socket_darwin.odin @@ -21,44 +21,45 @@ package net */ import "core:c" -import "core:os" import "core:sys/posix" import "core:time" Socket_Option :: enum c.int { - Broadcast = c.int(os.SO_BROADCAST), - Reuse_Address = c.int(os.SO_REUSEADDR), - Keep_Alive = c.int(os.SO_KEEPALIVE), - Out_Of_Bounds_Data_Inline = c.int(os.SO_OOBINLINE), - TCP_Nodelay = c.int(os.TCP_NODELAY), - Linger = c.int(os.SO_LINGER), - Receive_Buffer_Size = c.int(os.SO_RCVBUF), - Send_Buffer_Size = c.int(os.SO_SNDBUF), - Receive_Timeout = c.int(os.SO_RCVTIMEO), - Send_Timeout = c.int(os.SO_SNDTIMEO), + Broadcast = c.int(posix.Sock_Option.BROADCAST), + Reuse_Address = c.int(posix.Sock_Option.REUSEADDR), + Keep_Alive = c.int(posix.Sock_Option.KEEPALIVE), + Out_Of_Bounds_Data_Inline = c.int(posix.Sock_Option.OOBINLINE), + TCP_Nodelay = c.int(posix.TCP_NODELAY), + Linger = c.int(posix.Sock_Option.LINGER), + Receive_Buffer_Size = c.int(posix.Sock_Option.RCVBUF), + Send_Buffer_Size = c.int(posix.Sock_Option.SNDBUF), + Receive_Timeout = c.int(posix.Sock_Option.RCVTIMEO), + Send_Timeout = c.int(posix.Sock_Option.SNDTIMEO), } @(private) _create_socket :: proc(family: Address_Family, protocol: Socket_Protocol) -> (socket: Any_Socket, err: Network_Error) { - c_type, c_protocol, c_family: int + c_type: posix.Sock + c_protocol: posix.Protocol + c_family: posix.AF switch family { - case .IP4: c_family = os.AF_INET - case .IP6: c_family = os.AF_INET6 + case .IP4: c_family = .INET + case .IP6: c_family = .INET6 case: unreachable() } switch protocol { - case .TCP: c_type = os.SOCK_STREAM; c_protocol = os.IPPROTO_TCP - case .UDP: c_type = os.SOCK_DGRAM; c_protocol = os.IPPROTO_UDP + case .TCP: c_type = .STREAM; c_protocol = .TCP + case .UDP: c_type = .DGRAM; c_protocol = .UDP case: unreachable() } - sock, sock_err := os.socket(c_family, c_type, c_protocol) - if sock_err != nil { - err = Create_Socket_Error(os.is_platform_error(sock_err) or_else -1) + sock := posix.socket(c_family, c_type, c_protocol) + if sock < 0 { + err = Create_Socket_Error(posix.errno()) return } @@ -86,10 +87,10 @@ _dial_tcp_from_endpoint :: proc(endpoint: Endpoint, options := default_tcp_optio _ = set_option(skt, .Reuse_Address, true) sockaddr := _endpoint_to_sockaddr(endpoint) - res := os.connect(os.Socket(skt), (^os.SOCKADDR)(&sockaddr), i32(sockaddr.len)) - if res != nil { + if posix.connect(posix.FD(skt), (^posix.sockaddr)(&sockaddr), posix.socklen_t(sockaddr.ss_len)) != .OK { + errno := posix.errno() close(skt) - return {}, Dial_Error(os.is_platform_error(res) or_else -1) + return {}, Dial_Error(errno) } return @@ -102,14 +103,15 @@ MAX_PRIVILEGED_PORT :: 1023 _bind :: proc(skt: Any_Socket, ep: Endpoint) -> (err: Network_Error) { sockaddr := _endpoint_to_sockaddr(ep) s := any_socket_to_socket(skt) - res := os.bind(os.Socket(s), (^os.SOCKADDR)(&sockaddr), i32(sockaddr.len)) - if res != nil { - if res == os.EACCES && ep.port <= MAX_PRIVILEGED_PORT { + if posix.bind(posix.FD(s), (^posix.sockaddr)(&sockaddr), posix.socklen_t(sockaddr.ss_len)) != .OK { + errno := posix.errno() + if errno == .EACCES && ep.port <= MAX_PRIVILEGED_PORT { err = .Privileged_Port_Without_Root } else { - err = Bind_Error(os.is_platform_error(res) or_else -1) + err = Bind_Error(errno) } } + return } @@ -131,9 +133,8 @@ _listen_tcp :: proc(interface_endpoint: Endpoint, backlog := 1000) -> (skt: TCP_ bind(sock, interface_endpoint) or_return - res := os.listen(os.Socket(skt), backlog) - if res != nil { - err = Listen_Error(os.is_platform_error(res) or_else -1) + if posix.listen(posix.FD(skt), i32(backlog)) != .OK { + err = Listen_Error(posix.errno()) return } @@ -144,34 +145,34 @@ _listen_tcp :: proc(interface_endpoint: Endpoint, backlog := 1000) -> (skt: TCP_ _bound_endpoint :: proc(sock: Any_Socket) -> (ep: Endpoint, err: Network_Error) { addr: posix.sockaddr_storage addr_len := posix.socklen_t(size_of(addr)) - res := posix.getsockname(posix.FD(any_socket_to_socket(sock)), (^posix.sockaddr)(&addr), &addr_len) - if res != .OK { + if posix.getsockname(posix.FD(any_socket_to_socket(sock)), (^posix.sockaddr)(&addr), &addr_len) != .OK { err = Listen_Error(posix.errno()) return } - ep = _sockaddr_to_endpoint((^os.SOCKADDR_STORAGE_LH)(&addr)) + + ep = _sockaddr_to_endpoint(&addr) return } @(private) _accept_tcp :: proc(sock: TCP_Socket, options := default_tcp_options) -> (client: TCP_Socket, source: Endpoint, err: Network_Error) { - sockaddr: os.SOCKADDR_STORAGE_LH - sockaddrlen := c.int(size_of(sockaddr)) - - client_sock, client_sock_err := os.accept(os.Socket(sock), cast(^os.SOCKADDR) &sockaddr, &sockaddrlen) - if client_sock_err != nil { - err = Accept_Error(os.is_platform_error(client_sock_err) or_else -1) + addr: posix.sockaddr_storage + addr_len := posix.socklen_t(size_of(addr)) + client_sock := posix.accept(posix.FD(sock), (^posix.sockaddr)(&addr), &addr_len) + if client_sock < 0 { + err = Accept_Error(posix.errno()) return } + client = TCP_Socket(client_sock) - source = _sockaddr_to_endpoint(&sockaddr) + source = _sockaddr_to_endpoint(&addr) return } @(private) _close :: proc(skt: Any_Socket) { s := any_socket_to_socket(skt) - os.close(os.Handle(os.Socket(s))) + posix.close(posix.FD(s)) } @(private) @@ -179,11 +180,13 @@ _recv_tcp :: proc(skt: TCP_Socket, buf: []byte) -> (bytes_read: int, err: Networ if len(buf) <= 0 { return } - res, res_err := os.recv(os.Socket(skt), buf, 0) - if res_err != nil { - err = TCP_Recv_Error(os.is_platform_error(res_err) or_else -1) + + res := posix.recv(posix.FD(skt), raw_data(buf), len(buf), {}) + if res < 0 { + err = TCP_Recv_Error(posix.errno()) return } + return int(res), nil } @@ -193,11 +196,11 @@ _recv_udp :: proc(skt: UDP_Socket, buf: []byte) -> (bytes_read: int, remote_endp return } - from: os.SOCKADDR_STORAGE_LH - fromsize := c.int(size_of(from)) - res, res_err := os.recvfrom(os.Socket(skt), buf, 0, cast(^os.SOCKADDR) &from, &fromsize) - if res_err != nil { - err = UDP_Recv_Error(os.is_platform_error(res_err) or_else -1) + from: posix.sockaddr_storage + fromsize := posix.socklen_t(size_of(from)) + res := posix.recvfrom(posix.FD(skt), raw_data(buf), len(buf), {}, (^posix.sockaddr)(&from), &fromsize) + if res < 0 { + err = UDP_Recv_Error(posix.errno()) return } @@ -211,15 +214,19 @@ _send_tcp :: proc(skt: TCP_Socket, buf: []byte) -> (bytes_written: int, err: Net for bytes_written < len(buf) { limit := min(int(max(i32)), len(buf) - bytes_written) remaining := buf[bytes_written:][:limit] - res, res_err := os.send(os.Socket(skt), remaining, os.MSG_NOSIGNAL) - if res_err == os.EPIPE { - // EPIPE arises if the socket has been closed remotely. - err = TCP_Send_Error.Connection_Closed - return - } else if res_err != nil { - err = TCP_Send_Error(os.is_platform_error(res_err) or_else -1) + res := posix.send(posix.FD(skt), raw_data(remaining), len(remaining), {.NOSIGNAL}) + if res < 0 { + errno := posix.errno() + if errno == .EPIPE { + // EPIPE arises if the socket has been closed remotely. + err = TCP_Send_Error.Connection_Closed + return + } + + err = TCP_Send_Error(errno) return } + bytes_written += int(res) } return @@ -231,15 +238,19 @@ _send_udp :: proc(skt: UDP_Socket, buf: []byte, to: Endpoint) -> (bytes_written: for bytes_written < len(buf) { limit := min(1<<31, len(buf) - bytes_written) remaining := buf[bytes_written:][:limit] - res, res_err := os.sendto(os.Socket(skt), remaining, os.MSG_NOSIGNAL, cast(^os.SOCKADDR)&toaddr, i32(toaddr.len)) - if res_err == os.EPIPE { - // EPIPE arises if the socket has been closed remotely. - err = UDP_Send_Error.Not_Socket - return - } else if res_err != nil { - err = UDP_Send_Error(os.is_platform_error(res_err) or_else -1) + res := posix.sendto(posix.FD(skt), raw_data(remaining), len(remaining), {.NOSIGNAL}, (^posix.sockaddr)(&toaddr), posix.socklen_t(toaddr.ss_len)) + if res < 0 { + errno := posix.errno() + if errno == .EPIPE { + // EPIPE arises if the socket has been closed remotely. + err = UDP_Send_Error.Not_Socket + return + } + + err = UDP_Send_Error(errno) return } + bytes_written += int(res) } return @@ -248,26 +259,25 @@ _send_udp :: proc(skt: UDP_Socket, buf: []byte, to: Endpoint) -> (bytes_written: @(private) _shutdown :: proc(skt: Any_Socket, manner: Shutdown_Manner) -> (err: Network_Error) { s := any_socket_to_socket(skt) - res := os.shutdown(os.Socket(s), int(manner)) - if res != nil { - return Shutdown_Error(os.is_platform_error(res) or_else -1) + if posix.shutdown(posix.FD(s), posix.Shut(manner)) != .OK { + err = Shutdown_Error(posix.errno()) } return } @(private) _set_option :: proc(s: Any_Socket, option: Socket_Option, value: any, loc := #caller_location) -> Network_Error { - level := os.SOL_SOCKET if option != .TCP_Nodelay else os.IPPROTO_TCP + level := posix.SOL_SOCKET if option != .TCP_Nodelay else posix.IPPROTO_TCP // NOTE(tetra, 2022-02-15): On Linux, you cannot merely give a single byte for a bool; // it _has_ to be a b32. // I haven't tested if you can give more than that. bool_value: b32 - int_value: i32 - timeval_value: os.Timeval + int_value: posix.socklen_t + timeval_value: posix.timeval ptr: rawptr - len: os.socklen_t + len: posix.socklen_t switch option { case @@ -302,8 +312,8 @@ _set_option :: proc(s: Any_Socket, option: Socket_Option, value: any, loc := #ca t := value.(time.Duration) or_else panic("set_option() value must be a time.Duration here", loc) micros := i64(time.duration_microseconds(t)) - timeval_value.microseconds = int(micros % 1e6) - timeval_value.seconds = (micros - i64(timeval_value.microseconds)) / 1e6 + timeval_value.tv_usec = posix.suseconds_t(micros % 1e6) + timeval_value.tv_sec = posix.time_t(micros - i64(timeval_value.tv_usec)) / 1e6 ptr = &timeval_value len = size_of(timeval_value) @@ -312,12 +322,12 @@ _set_option :: proc(s: Any_Socket, option: Socket_Option, value: any, loc := #ca .Send_Buffer_Size: // TODO: check for out of range values and return .Value_Out_Of_Range? switch i in value { - case i8, u8: i2 := i; int_value = os.socklen_t((^u8)(&i2)^) - case i16, u16: i2 := i; int_value = os.socklen_t((^u16)(&i2)^) - case i32, u32: i2 := i; int_value = os.socklen_t((^u32)(&i2)^) - case i64, u64: i2 := i; int_value = os.socklen_t((^u64)(&i2)^) - case i128, u128: i2 := i; int_value = os.socklen_t((^u128)(&i2)^) - case int, uint: i2 := i; int_value = os.socklen_t((^uint)(&i2)^) + case i8, u8: i2 := i; int_value = posix.socklen_t((^u8)(&i2)^) + case i16, u16: i2 := i; int_value = posix.socklen_t((^u16)(&i2)^) + case i32, u32: i2 := i; int_value = posix.socklen_t((^u32)(&i2)^) + case i64, u64: i2 := i; int_value = posix.socklen_t((^u64)(&i2)^) + case i128, u128: i2 := i; int_value = posix.socklen_t((^u128)(&i2)^) + case int, uint: i2 := i; int_value = posix.socklen_t((^uint)(&i2)^) case: panic("set_option() value must be an integer here", loc) } @@ -326,9 +336,8 @@ _set_option :: proc(s: Any_Socket, option: Socket_Option, value: any, loc := #ca } skt := any_socket_to_socket(s) - res := os.setsockopt(os.Socket(skt), int(level), int(option), ptr, len) - if res != nil { - return Socket_Option_Error(os.is_platform_error(res) or_else -1) + if posix.setsockopt(posix.FD(skt), i32(level), posix.Sock_Option(option), ptr, len) != .OK { + return Socket_Option_Error(posix.errno()) } return nil @@ -338,42 +347,42 @@ _set_option :: proc(s: Any_Socket, option: Socket_Option, value: any, loc := #ca _set_blocking :: proc(socket: Any_Socket, should_block: bool) -> (err: Network_Error) { socket := any_socket_to_socket(socket) - flags, getfl_err := os.fcntl(int(socket), os.F_GETFL, 0) - if getfl_err != nil { - return Set_Blocking_Error(os.is_platform_error(getfl_err) or_else -1) + flags_ := posix.fcntl(posix.FD(socket), .GETFL, 0) + if flags_ < 0 { + return Set_Blocking_Error(posix.errno()) } + flags := transmute(posix.O_Flags)flags_ if should_block { - flags &~= int(os.O_NONBLOCK) + flags -= {.NONBLOCK} } else { - flags |= int(os.O_NONBLOCK) + flags += {.NONBLOCK} } - _, setfl_err := os.fcntl(int(socket), os.F_SETFL, flags) - if setfl_err != nil { - return Set_Blocking_Error(os.is_platform_error(setfl_err) or_else -1) + if posix.fcntl(posix.FD(socket), .SETFL, flags) < 0 { + return Set_Blocking_Error(posix.errno()) } return nil } @private -_endpoint_to_sockaddr :: proc(ep: Endpoint) -> (sockaddr: os.SOCKADDR_STORAGE_LH) { +_endpoint_to_sockaddr :: proc(ep: Endpoint) -> (sockaddr: posix.sockaddr_storage) { switch a in ep.address { case IP4_Address: - (^os.sockaddr_in)(&sockaddr)^ = os.sockaddr_in { + (^posix.sockaddr_in)(&sockaddr)^ = posix.sockaddr_in { sin_port = u16be(ep.port), - sin_addr = transmute(os.in_addr) a, - sin_family = u8(os.AF_INET), - sin_len = size_of(os.sockaddr_in), + sin_addr = transmute(posix.in_addr)a, + sin_family = .INET, + sin_len = size_of(posix.sockaddr_in), } return case IP6_Address: - (^os.sockaddr_in6)(&sockaddr)^ = os.sockaddr_in6 { + (^posix.sockaddr_in6)(&sockaddr)^ = posix.sockaddr_in6 { sin6_port = u16be(ep.port), - sin6_addr = transmute(os.in6_addr) a, - sin6_family = u8(os.AF_INET6), - sin6_len = size_of(os.sockaddr_in6), + sin6_addr = transmute(posix.in6_addr)a, + sin6_family = .INET6, + sin6_len = size_of(posix.sockaddr_in6), } return } @@ -381,21 +390,21 @@ _endpoint_to_sockaddr :: proc(ep: Endpoint) -> (sockaddr: os.SOCKADDR_STORAGE_LH } @private -_sockaddr_to_endpoint :: proc(native_addr: ^os.SOCKADDR_STORAGE_LH) -> (ep: Endpoint) { - switch native_addr.family { - case u8(os.AF_INET): - addr := cast(^os.sockaddr_in) native_addr +_sockaddr_to_endpoint :: proc(native_addr: ^posix.sockaddr_storage) -> (ep: Endpoint) { + #partial switch native_addr.ss_family { + case .INET: + addr := cast(^posix.sockaddr_in)native_addr port := int(addr.sin_port) ep = Endpoint { - address = IP4_Address(transmute([4]byte) addr.sin_addr), - port = port, + address = IP4_Address(transmute([4]byte)addr.sin_addr), + port = port, } - case u8(os.AF_INET6): - addr := cast(^os.sockaddr_in6) native_addr + case .INET6: + addr := cast(^posix.sockaddr_in6)native_addr port := int(addr.sin6_port) ep = Endpoint { - address = IP6_Address(transmute([8]u16be) addr.sin6_addr), - port = port, + address = IP6_Address(transmute([8]u16be)addr.sin6_addr), + port = port, } case: panic("native_addr is neither IP4 or IP6 address") @@ -404,21 +413,21 @@ _sockaddr_to_endpoint :: proc(native_addr: ^os.SOCKADDR_STORAGE_LH) -> (ep: Endp } @(private) -_sockaddr_basic_to_endpoint :: proc(native_addr: ^os.SOCKADDR) -> (ep: Endpoint) { - switch u16(native_addr.family) { - case u16(os.AF_INET): - addr := cast(^os.sockaddr_in) native_addr +_sockaddr_basic_to_endpoint :: proc(native_addr: ^posix.sockaddr) -> (ep: Endpoint) { + #partial switch native_addr.sa_family { + case .INET: + addr := cast(^posix.sockaddr_in)native_addr port := int(addr.sin_port) ep = Endpoint { - address = IP4_Address(transmute([4]byte) addr.sin_addr), - port = port, + address = IP4_Address(transmute([4]byte)addr.sin_addr), + port = port, } - case u16(os.AF_INET6): - addr := cast(^os.sockaddr_in6) native_addr + case .INET6: + addr := cast(^posix.sockaddr_in6)native_addr port := int(addr.sin6_port) ep = Endpoint { - address = IP6_Address(transmute([8]u16be) addr.sin6_addr), - port = port, + address = IP6_Address(transmute([8]u16be)addr.sin6_addr), + port = port, } case: panic("native_addr is neither IP4 or IP6 address") diff --git a/core/os/dir_unix.odin b/core/os/dir_unix.odin index f06bf8b37..c3dd844ef 100644 --- a/core/os/dir_unix.odin +++ b/core/os/dir_unix.odin @@ -5,9 +5,10 @@ import "core:strings" @(require_results) read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []File_Info, err: Error) { - dupfd := _dup(fd) or_return + context.allocator = allocator - dirp := _fdopendir(dupfd) or_return + dupfd := _dup(fd) or_return + dirp := _fdopendir(dupfd) or_return defer _closedir(dirp) dirpath := absolute_path_from_handle(dupfd) or_return diff --git a/core/os/os2/path.odin b/core/os/os2/path.odin index 9231307f5..47ac0236d 100644 --- a/core/os/os2/path.odin +++ b/core/os/os2/path.odin @@ -2,12 +2,18 @@ package os2 import "base:runtime" -import "core:path/filepath" +import "core:strings" Path_Separator :: _Path_Separator // OS-Specific Path_Separator_String :: _Path_Separator_String // OS-Specific Path_List_Separator :: _Path_List_Separator // OS-Specific +#assert(_Path_Separator <= rune(0x7F), "The system-specific path separator rune is expected to be within the 7-bit ASCII character set.") + +/* +Return true if `c` is a character used to separate paths into directory and +file hierarchies on the current system. +*/ @(require_results) is_path_separator :: proc(c: byte) -> bool { return _is_path_separator(c) @@ -15,22 +21,42 @@ is_path_separator :: proc(c: byte) -> bool { mkdir :: make_directory +/* +Make a new directory. + +If `path` is relative, it will be relative to the process's current working directory. +*/ make_directory :: proc(name: string, perm: int = 0o755) -> Error { return _mkdir(name, perm) } mkdir_all :: make_directory_all +/* +Make a new directory, creating new intervening directories when needed. + +If `path` is relative, it will be relative to the process's current working directory. +*/ make_directory_all :: proc(path: string, perm: int = 0o755) -> Error { return _mkdir_all(path, perm) } +/* +Delete `path` and all files and directories inside of `path` if it is a directory. + +If `path` is relative, it will be relative to the process's current working directory. +*/ remove_all :: proc(path: string) -> Error { return _remove_all(path) } getwd :: get_working_directory +/* +Get the working directory of the current process. + +*Allocates Using Provided Allocator* +*/ @(require_results) get_working_directory :: proc(allocator: runtime.Allocator) -> (dir: string, err: Error) { return _get_working_directory(allocator) @@ -38,16 +64,399 @@ get_working_directory :: proc(allocator: runtime.Allocator) -> (dir: string, err setwd :: set_working_directory +/* +Change the working directory of the current process. + +*Allocates Using Provided Allocator* +*/ set_working_directory :: proc(dir: string) -> (err: Error) { return _set_working_directory(dir) } +/* +Get the path for the currently running executable. + +*Allocates Using Provided Allocator* +*/ +@(require_results) get_executable_path :: proc(allocator: runtime.Allocator) -> (path: string, err: Error) { return _get_executable_path(allocator) } +/* +Get the directory for the currently running executable. + +*Allocates Using Provided Allocator* +*/ +@(require_results) get_executable_directory :: proc(allocator: runtime.Allocator) -> (path: string, err: Error) { path = _get_executable_path(allocator) or_return - path, _ = filepath.split(path) + path, _ = split_path(path) return } + +/* +Compare two paths for exactness without normalization. + +This procedure takes into account case-sensitivity on differing systems. +*/ +@(require_results) +are_paths_identical :: proc(a, b: string) -> (identical: bool) { + return _are_paths_identical(a, b) +} + +/* +Normalize a path. + +*Allocates Using Provided Allocator* + +This will remove duplicate separators and unneeded references to the current or +parent directory. +*/ +@(require_results) +clean_path :: proc(path: string, allocator: runtime.Allocator) -> (cleaned: string, err: Error) { + if path == "" || path == "." { + return strings.clone(".", allocator) + } + + TEMP_ALLOCATOR_GUARD() + + // The extra byte is to simplify appending path elements by letting the + // loop to end each with a separator. We'll trim the last one when we're done. + buffer := make([]u8, len(path) + 1, temp_allocator()) or_return + + // This is the only point where Windows and POSIX differ, as Windows has + // alphabet-based volumes for root paths. + rooted, start := _clean_path_handle_start(path, buffer) + + head, buffer_i := start, start + for i, j := start, start; i <= len(path); i += 1 { + if i == len(path) || _is_path_separator(path[i]) { + elem := path[j:i] + j = i + 1 + + switch elem { + case "", ".": + // Skip duplicate path separators and current directory references. + case "..": + if !rooted && buffer_i == head { + // Only allow accessing further parent directories when the path is relative. + buffer[buffer_i] = '.' + buffer[buffer_i+1] = '.' + buffer[buffer_i+2] = _Path_Separator + buffer_i += 3 + head = buffer_i + } else { + // Roll back to the last separator or the head of the buffer. + back_to := head + // `buffer_i` will be equal to 1 + the last set byte, so + // skipping two bytes avoids the final separator we just + // added. + for k := buffer_i-2; k >= head; k -= 1 { + if _is_path_separator(buffer[k]) { + back_to = k + 1 + break + } + } + buffer_i = back_to + } + case: + // Copy the path element verbatim and add a separator. + copy(buffer[buffer_i:], elem) + buffer_i += len(elem) + buffer[buffer_i] = _Path_Separator + buffer_i += 1 + } + } + } + + // Trim the final separator. + // NOTE: No need to check if the last byte is a separator, as we always add it. + if buffer_i > start { + buffer_i -= 1 + } + + if buffer_i == 0 { + return strings.clone(".", allocator) + } + + compact := make([]u8, buffer_i, allocator) or_return + copy(compact, buffer) // NOTE(bill): buffer[:buffer_i] is redundant here + return string(compact), nil +} + +/* +Return true if `path` is an absolute path as opposed to a relative one. +*/ +@(require_results) +is_absolute_path :: proc(path: string) -> bool { + return _is_absolute_path(path) +} + +/* +Get the absolute path to `path` with respect to the process's current directory. + +*Allocates Using Provided Allocator* +*/ +@(require_results) +get_absolute_path :: proc(path: string, allocator: runtime.Allocator) -> (absolute_path: string, err: Error) { + return _get_absolute_path(path, allocator) +} + +/* +Get the relative path needed to change directories from `base` to `target`. + +*Allocates Using Provided Allocator* + +The result is such that `join_path(base, get_relative_path(base, target))` is equivalent to `target`. + +NOTE: This procedure expects both `base` and `target` to be normalized first, +which can be done by calling `clean_path` on them if needed. + +This procedure will return an `Invalid_Path` error if `base` begins with a +reference to the parent directory (`".."`). Use `get_working_directory` with +`join_path` to construct absolute paths for both arguments instead. +*/ +@(require_results) +get_relative_path :: proc(base, target: string, allocator: runtime.Allocator) -> (path: string, err: Error) { + if _are_paths_identical(base, target) { + return strings.clone(".", allocator) + } + if base == "." { + return strings.clone(target, allocator) + } + + // This is the first point where Windows and POSIX differ, as Windows has + // alphabet-based volumes for root paths. + if !_get_relative_path_handle_start(base, target) { + return "", .Invalid_Path + } + if strings.has_prefix(base, "..") && (len(base) == 2 || _is_path_separator(base[2])) { + // We could do the work for the user of getting absolute paths for both + // arguments, but that could make something costly (repeatedly + // normalizing paths) convenient, when it would be better for the user + // to store already-finalized paths and operate on those instead. + return "", .Invalid_Path + } + + // This is the other point where Windows and POSIX differ, as Windows is + // case-insensitive. + common := _get_common_path_len(base, target) + + // Get the result of splitting `base` and `target` on _Path_Separator, + // comparing them up to their most common elements, then count how many + // unshared parts are in the split `base`. + seps := 0 + size := 0 + if len(base)-common > 0 { + seps = 1 + size = 2 + } + // This range skips separators on the ends of the string. + for i in common+1.. 0 { + // Account for leading separators on the target after cutting the common part. + // (i.e. base == `/home`, target == `/home/a`) + if _is_path_separator(trailing[0]) { + trailing = trailing[1:] + } + size += len(trailing) + if seps > 0 { + size += 1 + } + } + if trailing == "." { + trailing = "" + size -= 2 + } + + // Build the string. + buf := make([]u8, size, allocator) or_return + n := 0 + if seps > 0 { + buf[0] = '.' + buf[1] = '.' + n = 2 + } + for _ in 1.. 0 { + if seps > 0 { + buf[n] = _Path_Separator + n += 1 + } + copy(buf[n:], trailing) + } + + path = string(buf) + + return +} + +/* +Split a path into a directory hierarchy and a filename. + +For example, `split_path("/home/foo/bar.tar.gz")` will return `"/home/foo"` and `"bar.tar.gz"`. +*/ +@(require_results) +split_path :: proc(path: string) -> (dir, filename: string) { + return _split_path(path) +} + +/* +Join all `elems` with the system's path separator and normalize the result. + +*Allocates Using Provided Allocator* + +For example, `join_path({"/home", "foo", "bar.txt"})` will result in `"/home/foo/bar.txt"`. +*/ +@(require_results) +join_path :: proc(elems: []string, allocator: runtime.Allocator) -> (joined: string, err: Error) { + for e, i in elems { + if e != "" { + TEMP_ALLOCATOR_GUARD() + p := strings.join(elems[i:], Path_Separator_String, temp_allocator()) or_return + return clean_path(p, allocator) + } + } + return "", nil +} + +/* +Split a filename from its extension. + +This procedure splits on the last separator. + +If the filename begins with a separator, such as `".readme.txt"`, the separator +will be included in the filename, resulting in `".readme"` and `"txt"`. + +For example, `split_filename("foo.tar.gz")` will return `"foo.tar"` and `"gz"`. +*/ +@(require_results) +split_filename :: proc(filename: string) -> (base, ext: string) { + i := strings.last_index_byte(filename, '.') + if i <= 0 { + return filename, "" + } + return filename[:i], filename[i+1:] +} + +/* +Split a filename from its extension. + +This procedure splits on the first separator. + +If the filename begins with a separator, such as `".readme.txt.gz"`, the separator +will be included in the filename, resulting in `".readme"` and `"txt.gz"`. + +For example, `split_filename_all("foo.tar.gz")` will return `"foo"` and `"tar.gz"`. +*/ +@(require_results) +split_filename_all :: proc(filename: string) -> (base, ext: string) { + i := strings.index_byte(filename, '.') + if i == 0 { + j := strings.index_byte(filename[1:], '.') + if j != -1 { + j += 1 + } + i = j + } + if i == -1 { + return filename, "" + } + return filename[:i], filename[i+1:] +} + +/* +Join `base` and `ext` with the system's filename extension separator. + +*Allocates Using Provided Allocator* + +For example, `join_filename("foo", "tar.gz")` will result in `"foo.tar.gz"`. +*/ +@(require_results) +join_filename :: proc(base: string, ext: string, allocator: runtime.Allocator) -> (joined: string, err: Error) { + if len(base) == 0 { + return strings.clone(ext, allocator) + } else if len(ext) == 0 { + return strings.clone(base, allocator) + } + + buf := make([]u8, len(base) + 1 + len(ext), allocator) or_return + copy(buf, base) + buf[len(base)] = '.' + copy(buf[1+len(base):], ext) + + return string(buf), nil +} + +/* +Split a string that is separated by a system-specific separator, typically used +for environment variables specifying multiple directories. + +*Allocates Using Provided Allocator* + +For example, there is the "PATH" environment variable on POSIX systems which +this procedure can split into separate entries. +*/ +@(require_results) +split_path_list :: proc(path: string, allocator: runtime.Allocator) -> (list: []string, err: Error) { + if path == "" { + return nil, nil + } + + start: int + quote: bool + + start, quote = 0, false + count := 0 + + for i := 0; i < len(path); i += 1 { + c := path[i] + switch { + case c == '"': + quote = !quote + case c == Path_List_Separator && !quote: + count += 1 + } + } + + start, quote = 0, false + list = make([]string, count + 1, allocator) or_return + index := 0 + for i := 0; i < len(path); i += 1 { + c := path[i] + switch { + case c == '"': + quote = !quote + case c == Path_List_Separator && !quote: + list[index] = path[start:i] + index += 1 + start = i + 1 + } + } + assert(index == count) + list[index] = path[start:] + + for s0, i in list { + s, new := strings.replace_all(s0, `"`, ``, allocator) + if !new { + s = strings.clone(s, allocator) or_return + } + list[i] = s + } + + return list, nil +} diff --git a/core/os/os2/path_linux.odin b/core/os/os2/path_linux.odin index e3e7f8a7c..410b4cb28 100644 --- a/core/os/os2/path_linux.odin +++ b/core/os/os2/path_linux.odin @@ -14,7 +14,7 @@ _Path_List_Separator :: ':' _OPENDIR_FLAGS : linux.Open_Flags : {.NONBLOCK, .DIRECTORY, .LARGEFILE, .CLOEXEC} _is_path_separator :: proc(c: byte) -> bool { - return c == '/' + return c == _Path_Separator } _mkdir :: proc(path: string, perm: int) -> Error { diff --git a/core/os/os2/path_posix.odin b/core/os/os2/path_posix.odin index e6b95c0d4..39bd0a188 100644 --- a/core/os/os2/path_posix.odin +++ b/core/os/os2/path_posix.odin @@ -3,7 +3,6 @@ package os2 import "base:runtime" -import "core:path/filepath" import "core:sys/posix" @@ -35,11 +34,11 @@ _mkdir_all :: proc(path: string, perm: int) -> Error { return .Exist } - clean_path := filepath.clean(path, temp_allocator()) + clean_path := clean_path(path, temp_allocator()) or_return return internal_mkdir_all(clean_path, perm) internal_mkdir_all :: proc(path: string, perm: int) -> Error { - dir, file := filepath.split(path) + dir, file := split_path(path) if file != path && dir != "/" { if len(dir) > 1 && dir[len(dir) - 1] == '/' { dir = dir[:len(dir) - 1] diff --git a/core/os/os2/path_posixfs.odin b/core/os/os2/path_posixfs.odin new file mode 100644 index 000000000..8f9d43d63 --- /dev/null +++ b/core/os/os2/path_posixfs.odin @@ -0,0 +1,78 @@ +#+private +#+build linux, darwin, netbsd, freebsd, openbsd, wasi +package os2 + +// This implementation is for all systems that have POSIX-compliant filesystem paths. + +import "base:runtime" +import "core:strings" +import "core:sys/posix" + +_are_paths_identical :: proc(a, b: string) -> (identical: bool) { + return a == b +} + +_clean_path_handle_start :: proc(path: string, buffer: []u8) -> (rooted: bool, start: int) { + // Preserve rooted paths. + if _is_path_separator(path[0]) { + rooted = true + buffer[0] = _Path_Separator + start = 1 + } + return +} + +_is_absolute_path :: proc(path: string) -> bool { + return len(path) > 0 && _is_path_separator(path[0]) +} + +_get_absolute_path :: proc(path: string, allocator: runtime.Allocator) -> (absolute_path: string, err: Error) { + rel := path + if rel == "" { + rel = "." + } + TEMP_ALLOCATOR_GUARD() + rel_cstr := strings.clone_to_cstring(rel, temp_allocator()) + path_ptr := posix.realpath(rel_cstr, nil) + if path_ptr == nil { + return "", Platform_Error(posix.errno()) + } + defer posix.free(path_ptr) + + path_str := strings.clone(string(path_ptr), allocator) + return path_str, nil +} + +_get_relative_path_handle_start :: proc(base, target: string) -> bool { + base_rooted := len(base) > 0 && _is_path_separator(base[0]) + target_rooted := len(target) > 0 && _is_path_separator(target[0]) + return base_rooted == target_rooted +} + +_get_common_path_len :: proc(base, target: string) -> int { + i := 0 + end := min(len(base), len(target)) + for j in 0..=end { + if j == end || _is_path_separator(base[j]) { + if base[i:j] == target[i:j] { + i = j + } else { + break + } + } + } + return i +} + +_split_path :: proc(path: string) -> (dir, file: string) { + i := len(path) - 1 + for i >= 0 && !_is_path_separator(path[i]) { + i -= 1 + } + if i == 0 { + return path[:i+1], path[i+1:] + } else if i > 0 { + return path[:i], path[i+1:] + } + return "", path +} diff --git a/core/os/os2/path_wasi.odin b/core/os/os2/path_wasi.odin index 1c4fafa17..7aee8fcc0 100644 --- a/core/os/os2/path_wasi.odin +++ b/core/os/os2/path_wasi.odin @@ -3,7 +3,6 @@ package os2 import "base:runtime" -import "core:path/filepath" import "core:sync" import "core:sys/wasm/wasi" @@ -35,11 +34,11 @@ _mkdir_all :: proc(path: string, perm: int) -> Error { return .Exist } - clean_path := filepath.clean(path, temp_allocator()) + clean_path := clean_path(path, temp_allocator()) return internal_mkdir_all(clean_path) internal_mkdir_all :: proc(path: string) -> Error { - dir, file := filepath.split(path) + dir, file := split_path(path) if file != path && dir != "/" { if len(dir) > 1 && dir[len(dir) - 1] == '/' { dir = dir[:len(dir) - 1] diff --git a/core/os/os2/path_windows.odin b/core/os/os2/path_windows.odin index 041a4d1e3..dd9b7748c 100644 --- a/core/os/os2/path_windows.odin +++ b/core/os/os2/path_windows.odin @@ -1,8 +1,9 @@ #+private package os2 -import win32 "core:sys/windows" import "base:runtime" +import "core:strings" +import win32 "core:sys/windows" _Path_Separator :: '\\' _Path_Separator_String :: "\\" @@ -217,7 +218,7 @@ _fix_long_path_internal :: proc(path: string) -> string { return path } - if !_is_abs(path) { // relative path + if !_is_absolute_path(path) { // relative path return path } @@ -257,3 +258,93 @@ _fix_long_path_internal :: proc(path: string) -> string { return string(path_buf[:w]) } + +_are_paths_identical :: strings.equal_fold + +_clean_path_handle_start :: proc(path: string, buffer: []u8) -> (rooted: bool, start: int) { + // Preserve rooted paths. + start = _volume_name_len(path) + if start > 0 { + rooted = true + if len(path) > start && _is_path_separator(path[start]) { + // Take `C:` to `C:\`. + start += 1 + } + copy(buffer, path[:start]) + } + return +} + +_is_absolute_path :: proc(path: string) -> bool { + if _is_reserved_name(path) { + return true + } + l := _volume_name_len(path) + if l == 0 { + return false + } + + path := path + path = path[l:] + if path == "" { + return false + } + return _is_path_separator(path[0]) +} + +_get_absolute_path :: proc(path: string, allocator: runtime.Allocator) -> (absolute_path: string, err: Error) { + rel := path + if rel == "" { + rel = "." + } + TEMP_ALLOCATOR_GUARD() + rel_utf16 := win32.utf8_to_utf16(rel, temp_allocator()) + n := win32.GetFullPathNameW(raw_data(rel_utf16), 0, nil, nil) + if n == 0 { + return "", Platform_Error(win32.GetLastError()) + } + + buf := make([]u16, n, temp_allocator()) or_return + n = win32.GetFullPathNameW(raw_data(rel_utf16), u32(n), raw_data(buf), nil) + if n == 0 { + return "", Platform_Error(win32.GetLastError()) + } + + return win32.utf16_to_utf8(buf, allocator) +} + +_get_relative_path_handle_start :: proc(base, target: string) -> bool { + base_root := base[:_volume_name_len(base)] + target_root := target[:_volume_name_len(target)] + return strings.equal_fold(base_root, target_root) +} + +_get_common_path_len :: proc(base, target: string) -> int { + i := 0 + end := min(len(base), len(target)) + for j in 0..=end { + if j == end || _is_path_separator(base[j]) { + if strings.equal_fold(base[i:j], target[i:j]) { + i = j + } else { + break + } + } + } + return i +} + +_split_path :: proc(path: string) -> (dir, file: string) { + vol_len := _volume_name_len(path) + + i := len(path) - 1 + for i >= vol_len && !_is_path_separator(path[i]) { + i -= 1 + } + if i == vol_len { + return path[:i+1], path[i+1:] + } else if i > vol_len { + return path[:i], path[i+1:] + } + return "", path +} diff --git a/core/os/os2/process_linux.odin b/core/os/os2/process_linux.odin index 632bde6ba..6d654008b 100644 --- a/core/os/os2/process_linux.odin +++ b/core/os/os2/process_linux.odin @@ -10,7 +10,6 @@ import "core:slice" import "core:strings" import "core:strconv" import "core:sys/linux" -import "core:path/filepath" PIDFD_UNASSIGNED :: ~uintptr(0) @@ -205,7 +204,7 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator info.executable_path = strings.clone(cmdline[:terminator], allocator) or_return info.fields += {.Executable_Path} } else if cwd_err == nil { - info.executable_path = filepath.join({ cwd, cmdline[:terminator] }, allocator) or_return + info.executable_path = join_path({ cwd, cmdline[:terminator] }, allocator) or_return info.fields += {.Executable_Path} } else { break cmdline_if @@ -407,7 +406,7 @@ _process_start :: proc(desc: Process_Desc) -> (process: Process, err: Error) { executable_name := desc.command[0] if strings.index_byte(executable_name, '/') < 0 { path_env := get_env("PATH", temp_allocator()) - path_dirs := filepath.split_list(path_env, temp_allocator()) or_return + path_dirs := split_path_list(path_env, temp_allocator()) or_return exe_builder := strings.builder_make(temp_allocator()) or_return diff --git a/core/os/os2/process_posix.odin b/core/os/os2/process_posix.odin index 3fa429cbe..cd451781f 100644 --- a/core/os/os2/process_posix.odin +++ b/core/os/os2/process_posix.odin @@ -6,7 +6,6 @@ import "base:runtime" import "core:time" import "core:strings" -import "core:path/filepath" import kq "core:sys/kqueue" import "core:sys/posix" @@ -62,7 +61,7 @@ _process_start :: proc(desc: Process_Desc) -> (process: Process, err: Error) { exe_name := desc.command[0] if strings.index_byte(exe_name, '/') < 0 { path_env := get_env("PATH", temp_allocator()) - path_dirs := filepath.split_list(path_env, temp_allocator()) + path_dirs := split_path_list(path_env, temp_allocator()) or_return found: bool for dir in path_dirs { diff --git a/core/os/os2/stat.odin b/core/os/os2/stat.odin index d0a5a659d..7d76902eb 100644 --- a/core/os/os2/stat.odin +++ b/core/os/os2/stat.odin @@ -1,7 +1,6 @@ package os2 import "base:runtime" -import "core:path/filepath" import "core:strings" import "core:time" @@ -25,7 +24,7 @@ File_Info :: struct { file_info_clone :: proc(fi: File_Info, allocator: runtime.Allocator) -> (cloned: File_Info, err: runtime.Allocator_Error) { cloned = fi cloned.fullpath = strings.clone(fi.fullpath, allocator) or_return - cloned.name = filepath.base(cloned.fullpath) + _, cloned.name = split_path(cloned.fullpath) return } diff --git a/core/os/os2/stat_linux.odin b/core/os/os2/stat_linux.odin index 0433c1a61..7bff08f29 100644 --- a/core/os/os2/stat_linux.odin +++ b/core/os/os2/stat_linux.odin @@ -4,7 +4,6 @@ package os2 import "core:time" import "base:runtime" import "core:sys/linux" -import "core:path/filepath" _fstat :: proc(f: ^File, allocator: runtime.Allocator) -> (File_Info, Error) { impl := (^File_Impl)(f.impl) @@ -42,7 +41,7 @@ _fstat_internal :: proc(fd: linux.Fd, allocator: runtime.Allocator) -> (fi: File creation_time = time.Time{i64(s.ctime.time_sec) * i64(time.Second) + i64(s.ctime.time_nsec)}, // regular stat does not provide this } fi.creation_time = fi.modification_time - fi.name = filepath.base(fi.fullpath) + _, fi.name = split_path(fi.fullpath) return } diff --git a/core/os/os2/stat_posix.odin b/core/os/os2/stat_posix.odin index 88029c1f5..260dc7b52 100644 --- a/core/os/os2/stat_posix.odin +++ b/core/os/os2/stat_posix.odin @@ -4,13 +4,12 @@ package os2 import "base:runtime" -import "core:path/filepath" import "core:sys/posix" import "core:time" internal_stat :: proc(stat: posix.stat_t, fullpath: string) -> (fi: File_Info) { fi.fullpath = fullpath - fi.name = filepath.base(fi.fullpath) + _, fi.name = split_path(fi.fullpath) fi.inode = u128(stat.st_ino) fi.size = i64(stat.st_size) @@ -104,7 +103,7 @@ _lstat :: proc(name: string, allocator: runtime.Allocator) -> (fi: File_Info, er // NOTE: This might not be correct when given "/symlink/foo.txt", // you would want that to resolve "/symlink", but not resolve "foo.txt". - fullpath := filepath.clean(name, temp_allocator()) + fullpath := clean_path(name, temp_allocator()) or_return assert(len(fullpath) > 0) switch { case fullpath[0] == '/': diff --git a/core/os/os2/stat_wasi.odin b/core/os/os2/stat_wasi.odin index 2992c6267..bf18d8273 100644 --- a/core/os/os2/stat_wasi.odin +++ b/core/os/os2/stat_wasi.odin @@ -3,13 +3,12 @@ package os2 import "base:runtime" -import "core:path/filepath" import "core:sys/wasm/wasi" import "core:time" internal_stat :: proc(stat: wasi.filestat_t, fullpath: string) -> (fi: File_Info) { fi.fullpath = fullpath - fi.name = filepath.base(fi.fullpath) + _, fi.name = split_path(fi.fullpath) fi.inode = u128(stat.ino) fi.size = i64(stat.size) diff --git a/core/os/os2/stat_windows.odin b/core/os/os2/stat_windows.odin index 31f5d9e88..7d8dd3843 100644 --- a/core/os/os2/stat_windows.odin +++ b/core/os/os2/stat_windows.odin @@ -315,57 +315,37 @@ _is_UNC :: proc(path: string) -> bool { } _volume_name_len :: proc(path: string) -> int { - if ODIN_OS == .Windows { - if len(path) < 2 { - return 0 - } - c := path[0] - if path[1] == ':' { - switch c { - case 'a'..='z', 'A'..='Z': - return 2 - } + if len(path) < 2 { + return 0 + } + c := path[0] + if path[1] == ':' { + switch c { + case 'a'..='z', 'A'..='Z': + return 2 } + } - // URL: https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx - if l := len(path); l >= 5 && _is_path_separator(path[0]) && _is_path_separator(path[1]) && - !_is_path_separator(path[2]) && path[2] != '.' { - for n := 3; n < l-1; n += 1 { - if _is_path_separator(path[n]) { - n += 1 - if !_is_path_separator(path[n]) { - if path[n] == '.' { - break - } + // URL: https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx + if l := len(path); l >= 5 && _is_path_separator(path[0]) && _is_path_separator(path[1]) && + !_is_path_separator(path[2]) && path[2] != '.' { + for n := 3; n < l-1; n += 1 { + if _is_path_separator(path[n]) { + n += 1 + if !_is_path_separator(path[n]) { + if path[n] == '.' { + break } - for ; n < l; n += 1 { - if _is_path_separator(path[n]) { - break - } - } - return n } - break + for ; n < l; n += 1 { + if _is_path_separator(path[n]) { + break + } + } + return n } + break } } return 0 } - -_is_abs :: proc(path: string) -> bool { - if _is_reserved_name(path) { - return true - } - l := _volume_name_len(path) - if l == 0 { - return false - } - - path := path - path = path[l:] - if path == "" { - return false - } - return is_path_separator(path[0]) -} - diff --git a/core/os/os_openbsd.odin b/core/os/os_openbsd.odin index 3c377968c..6548a57dc 100644 --- a/core/os/os_openbsd.odin +++ b/core/os/os_openbsd.odin @@ -343,7 +343,7 @@ AT_REMOVEDIR :: 0x08 @(default_calling_convention="c") foreign libc { - @(link_name="__error") __error :: proc() -> ^c.int --- + @(link_name="__errno") __error :: proc() -> ^c.int --- @(link_name="fork") _unix_fork :: proc() -> pid_t --- @(link_name="getthrid") _unix_getthrid :: proc() -> int --- diff --git a/core/simd/x86/ssse3.odin b/core/simd/x86/ssse3.odin index 07c846e7b..03ba5dcfb 100644 --- a/core/simd/x86/ssse3.odin +++ b/core/simd/x86/ssse3.odin @@ -21,7 +21,7 @@ _mm_abs_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i { _mm_shuffle_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { return transmute(__m128i)pshufb128(transmute(u8x16)a, transmute(u8x16)b) } -@(require_results, enable_target_feature="ssse3") +@(require_results, enable_target_feature="sse2,ssse3") _mm_alignr_epi8 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u32) -> __m128i { shift :: IMM8 diff --git a/core/sys/freebsd/syscalls.odin b/core/sys/freebsd/syscalls.odin index 83b51138a..405d1e47c 100644 --- a/core/sys/freebsd/syscalls.odin +++ b/core/sys/freebsd/syscalls.odin @@ -21,6 +21,7 @@ SYS_close : uintptr : 6 SYS_getpid : uintptr : 20 SYS_recvfrom : uintptr : 29 SYS_accept : uintptr : 30 +SYS_getpeername: uintptr : 31 SYS_getsockname: uintptr : 32 SYS_fcntl : uintptr : 92 SYS_fsync : uintptr : 95 @@ -202,24 +203,36 @@ accept_nil :: proc "contextless" (s: Fd) -> (Fd, Errno) { accept :: proc { accept_T, accept_nil } +getsockname_or_peername :: proc "contextless" (s: Fd, sockaddr: ^$T, is_peer: bool) -> Errno { + // sockaddr must contain a valid pointer, or this will segfault because + // we're telling the syscall that there's memory available to write to. + addrlen: socklen_t = size_of(T) + + result, ok := intrinsics.syscall_bsd( + is_peer ? SYS_getpeername : SYS_getsockname, + cast(uintptr)s, + cast(uintptr)sockaddr, + cast(uintptr)&addrlen) + + if !ok { + return cast(Errno)result + } + + return nil +} + +// Get name of connected peer +// +// The getpeername() system call appeared in 4.2BSD. +getpeername :: proc "contextless" (s: Fd, sockaddr: ^$T) -> Errno { + return getsockname_or_peername(s, sockaddr, true) +} + // Get socket name. // // The getsockname() system call appeared in 4.2BSD. getsockname :: proc "contextless" (s: Fd, sockaddr: ^$T) -> Errno { - // sockaddr must contain a valid pointer, or this will segfault because - // we're telling the syscall that there's memory available to write to. - addrlen: socklen_t = size_of(T) - - result, ok := intrinsics.syscall_bsd(SYS_getsockname, - cast(uintptr)s, - cast(uintptr)sockaddr, - cast(uintptr)&addrlen) - - if !ok { - return cast(Errno)result - } - - return nil + return getsockname_or_peername(s, sockaddr, false) } // Synchronize changes to a file. diff --git a/core/sys/info/cpu_intel.odin b/core/sys/info/cpu_intel.odin index 95b53dda0..c8b8282fe 100644 --- a/core/sys/info/cpu_intel.odin +++ b/core/sys/info/cpu_intel.odin @@ -23,6 +23,7 @@ CPU_Feature :: enum u64 { popcnt, // Hamming weight instruction POPCNT. rdrand, // RDRAND instruction (on-chip random number generator) rdseed, // RDSEED instruction (on-chip random number generator) + sha, // SHA Extensions (SHA-1, SHA-224, SHA-256) sse2, // Streaming SIMD extension 2 (always available on amd64) sse3, // Streaming SIMD extension 3 ssse3, // Supplemental streaming SIMD extension 3 @@ -115,6 +116,7 @@ init_cpu_features :: proc "c" () { _, ebx7, ecx7, edx7 := cpuid(7, 0) try_set(&set, .bmi1, 3, ebx7) + try_set(&set, .sha, 29, ebx7) if os_supports_avx { try_set(&set, .avx2, 5, ebx7) } diff --git a/core/sys/linux/sys.odin b/core/sys/linux/sys.odin index 5fc4a0efa..985623e85 100644 --- a/core/sys/linux/sys.odin +++ b/core/sys/linux/sys.odin @@ -1,3 +1,4 @@ +#+build linux #+no-instrumentation package linux diff --git a/core/sys/wasm/js/odin.js b/core/sys/wasm/js/odin.js index b3a49523b..d5faa5210 100644 --- a/core/sys/wasm/js/odin.js +++ b/core/sys/wasm/js/odin.js @@ -1325,18 +1325,20 @@ function odinSetupDefaultImports(wasmMemoryInterface, consoleElement, memory) { } else if (!line.includes("\n")) { currentLine[isError] = currentLine[isError].concat(line); } else { - let lines = line.split("\n"); + let lines = line.trimEnd().split("\n"); let printLast = lines.length > 1 && line.endsWith("\n"); println(currentLine[isError].concat(lines[0])); currentLine[isError] = ""; for (let i = 1; i < lines.length-1; i++) { println(lines[i]); } - let last = lines[lines.length-1]; - if (printLast) { - println(last); - } else { - currentLine[isError] = last; + if (lines.length > 1) { + let last = lines[lines.length-1]; + if (printLast) { + println(last); + } else { + currentLine[isError] = last; + } } } diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index 4a8a198d3..0e7648f96 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -26,12 +26,14 @@ import topological_sort "core:container/topological_sort" import crypto "core:crypto" import aead "core:crypto/aead" +import aegis "core:crypto/aegis" import aes "core:crypto/aes" import blake2b "core:crypto/blake2b" import blake2s "core:crypto/blake2s" import chacha20 "core:crypto/chacha20" import chacha20poly1305 "core:crypto/chacha20poly1305" import crypto_hash "core:crypto/hash" +import deoxysii "core:crypto/deoxysii" import ed25519 "core:crypto/ed25519" import hkdf "core:crypto/hkdf" import hmac "core:crypto/hmac" @@ -48,6 +50,7 @@ import shake "core:crypto/shake" import sm3 "core:crypto/sm3" import tuplehash "core:crypto/tuplehash" import x25519 "core:crypto/x25519" +import x448 "core:crypto/x448" import pe "core:debug/pe" import trace "core:debug/trace" @@ -169,11 +172,13 @@ _ :: topological_sort _ :: crypto _ :: crypto_hash _ :: aead +_ :: aegis _ :: aes _ :: blake2b _ :: blake2s _ :: chacha20 _ :: chacha20poly1305 +_ :: deoxysii _ :: ed25519 _ :: hmac _ :: hkdf @@ -190,6 +195,7 @@ _ :: shake _ :: sm3 _ :: tuplehash _ :: x25519 +_ :: x448 _ :: pe _ :: trace _ :: dynlib diff --git a/shell.nix b/shell.nix index 040c7696e..33e90018c 100644 --- a/shell.nix +++ b/shell.nix @@ -4,9 +4,9 @@ pkgs.mkShell { nativeBuildInputs = with pkgs; [ git which - clang_17 - llvmPackages_17.llvm - llvmPackages_17.bintools + clang_20 + llvmPackages_20.llvm + llvmPackages_20.bintools ]; shellHook="CXX=clang++"; } diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 6bee10674..1f5aba254 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -171,6 +171,7 @@ struct TargetMetrics { enum Subtarget : u32 { Subtarget_Default, Subtarget_iOS, + Subtarget_Android, Subtarget_COUNT, }; @@ -178,6 +179,7 @@ enum Subtarget : u32 { gb_global String subtarget_strings[Subtarget_COUNT] = { str_lit(""), str_lit("ios"), + str_lit("android"), }; @@ -204,20 +206,25 @@ enum BuildModeKind { BuildMode_COUNT, }; -enum CommandKind : u32 { +enum CommandKind : u64 { Command_run = 1<<0, Command_build = 1<<1, - Command_check = 1<<3, - Command_doc = 1<<5, - Command_version = 1<<6, - Command_test = 1<<7, + Command_check = 1<<2, + Command_doc = 1<<3, + Command_version = 1<<4, + Command_test = 1<<5, - Command_strip_semicolon = 1<<8, - Command_bug_report = 1<<9, + Command_strip_semicolon = 1<<6, + Command_bug_report = 1<<7, + + Command_bundle_android = 1<<8, + Command_bundle_macos = 1<<9, + Command_bundle_ios = 1<<10, + Command_bundle_orca = 1<<11, Command__does_check = Command_run|Command_build|Command_check|Command_doc|Command_test|Command_strip_semicolon, Command__does_build = Command_run|Command_build|Command_test, - Command_all = ~(u32)0, + Command_all = ~(CommandKind)0, }; gb_global char const *odin_command_strings[32] = { @@ -228,6 +235,11 @@ gb_global char const *odin_command_strings[32] = { "version", "test", "strip-semicolon", + "", + "bundle android", + "bundle macos", + "bundle ios", + "bundle orca", }; @@ -527,6 +539,22 @@ struct BuildContext { String minimum_os_version_string; bool minimum_os_version_string_given; + + + int ODIN_ANDROID_API_LEVEL; + + String ODIN_ANDROID_SDK; + + String ODIN_ANDROID_NDK; + String ODIN_ANDROID_NDK_TOOLCHAIN; + String ODIN_ANDROID_NDK_TOOLCHAIN_LIB; + String ODIN_ANDROID_NDK_TOOLCHAIN_LIB_LEVEL; + String ODIN_ANDROID_NDK_TOOLCHAIN_SYSROOT; + + String ODIN_ANDROID_JAR_SIGNER; + String android_keystore; + String android_keystore_alias; + String android_manifest; }; gb_global BuildContext build_context = {0}; @@ -946,6 +974,14 @@ gb_internal bool is_arch_x86(void) { gb_global String const WIN32_SEPARATOR_STRING = {cast(u8 *)"\\", 1}; gb_global String const NIX_SEPARATOR_STRING = {cast(u8 *)"/", 1}; +gb_global String const SEPARATOR_STRING = +#if defined(GB_SYSTEM_WINDOWS) + WIN32_SEPARATOR_STRING; +#else + NIX_SEPARATOR_STRING; +#endif + + gb_global String const WASM_MODULE_NAME_SEPARATOR = str_lit(".."); gb_internal String internal_odin_root_dir(void); @@ -1461,6 +1497,103 @@ gb_internal bool has_ansi_terminal_colours(void) { return build_context.has_ansi_terminal_colours && !json_errors(); } +gb_internal void init_android_values(bool with_sdk) { + auto *bc = &build_context; + { // Android SDK/API Level + String default_level = str_lit("34"); + if (!bc->minimum_os_version_string_given) { + bc->minimum_os_version_string = default_level; + } + BigInt level = {}; + bool success = false; + big_int_from_string(&level, bc->minimum_os_version_string, &success); + if (!success) { + gb_printf_err("Warning: Invalid -minimum-os-version:%.*s for -subtarget:Android, defaulting to %.*s\n", LIT(bc->minimum_os_version_string), LIT(default_level)); + bc->minimum_os_version_string = default_level; + big_int_from_string(&level, bc->minimum_os_version_string, &success); + GB_ASSERT(success); + } + + i64 new_level = big_int_to_i64(&level); + + if (new_level >= 21) { + bc->ODIN_ANDROID_API_LEVEL = cast(int)new_level; + } else { + gb_printf_err("Warning: Invalid -minimum-os-version:%.*s for -subtarget:Android, defaulting to %.*s\n", LIT(bc->minimum_os_version_string), LIT(default_level)); + bc->ODIN_ANDROID_API_LEVEL = atoi(cast(char const *)default_level.text); + } + } + bc->ODIN_ANDROID_NDK = normalize_path(permanent_allocator(), make_string_c(gb_get_env("ODIN_ANDROID_NDK", permanent_allocator())), NIX_SEPARATOR_STRING); + bc->ODIN_ANDROID_NDK_TOOLCHAIN = normalize_path(permanent_allocator(), make_string_c(gb_get_env("ODIN_ANDROID_NDK_TOOLCHAIN", permanent_allocator())), NIX_SEPARATOR_STRING); + bc->ODIN_ANDROID_SDK = normalize_path(permanent_allocator(), make_string_c(gb_get_env("ODIN_ANDROID_SDK", permanent_allocator())), NIX_SEPARATOR_STRING); + + #if defined(GB_SYSTEM_WINDOWS) + if (bc->ODIN_ANDROID_SDK.len == 0) { + bc->ODIN_ANDROID_SDK = normalize_path(permanent_allocator(), + path_to_fullpath(permanent_allocator(), str_lit("%LocalAppData%/Android/Sdk"), nullptr), + NIX_SEPARATOR_STRING); + } + #endif + + if (bc->ODIN_ANDROID_NDK.len != 0 && bc->ODIN_ANDROID_NDK_TOOLCHAIN.len == 0) { + String arch = str_lit("x86_64"); + #if defined (GB_CPU_ARM) + // TODO(bill): this is a complete guess + arch = str_lit("aarch64"); + #endif + #if defined(GB_SYSTEM_WINDOWS) + bc->ODIN_ANDROID_NDK_TOOLCHAIN = concatenate4_strings(temporary_allocator(), bc->ODIN_ANDROID_NDK, str_lit("toolchains/llvm/prebuilt/"), str_lit("windows-"), arch); + #elif defined(GB_SYSTEM_OSX) + // TODO(bill): is this name even correct? + bc->ODIN_ANDROID_NDK_TOOLCHAIN = concatenate4_strings(temporary_allocator(), bc->ODIN_ANDROID_NDK, str_lit("toolchains/llvm/prebuilt/"), str_lit("darwin-"), arch); + #elif defined(GB_SYSTEM_LINUX) + bc->ODIN_ANDROID_NDK_TOOLCHAIN = concatenate4_strings(temporary_allocator(), bc->ODIN_ANDROID_NDK, str_lit("toolchains/llvm/prebuilt/"), str_lit("linux-"), arch); + #endif + + bc->ODIN_ANDROID_NDK_TOOLCHAIN = normalize_path(permanent_allocator(), bc->ODIN_ANDROID_NDK_TOOLCHAIN, NIX_SEPARATOR_STRING); + } + + if (bc->ODIN_ANDROID_NDK.len == 0 && !with_sdk) { + gb_printf_err("Error: ODIN_ANDROID_NDK not set"); + gb_exit(1); + + } + + if (bc->ODIN_ANDROID_NDK_TOOLCHAIN.len == 0 && !with_sdk) { + gb_printf_err("Error: ODIN_ANDROID_NDK not set"); + gb_exit(1); + } + + bc->ODIN_ANDROID_NDK_TOOLCHAIN_LIB = concatenate_strings(permanent_allocator(), bc->ODIN_ANDROID_NDK_TOOLCHAIN, str_lit("sysroot/usr/lib/aarch64-linux-android/")); + + char buf[32] = {}; + gb_snprintf(buf, gb_size_of(buf), "%d/", bc->ODIN_ANDROID_API_LEVEL); + bc->ODIN_ANDROID_NDK_TOOLCHAIN_LIB_LEVEL = concatenate_strings(permanent_allocator(), bc->ODIN_ANDROID_NDK_TOOLCHAIN_LIB, make_string_c(buf)); + + bc->ODIN_ANDROID_NDK_TOOLCHAIN_SYSROOT = concatenate_strings(permanent_allocator(), bc->ODIN_ANDROID_NDK_TOOLCHAIN, str_lit("sysroot/")); + + + bc->ODIN_ANDROID_JAR_SIGNER = normalize_path(permanent_allocator(), make_string_c(gb_get_env("ODIN_ANDROID_JAR_SIGNER", permanent_allocator())), NIX_SEPARATOR_STRING); + if (with_sdk) { + if (bc->ODIN_ANDROID_SDK.len == 0) { + gb_printf_err("Error: ODIN_ANDROID_SDK not set, which is required for -build-mode:executable for -subtarget:android"); + gb_exit(1); + } + if (bc->ODIN_ANDROID_JAR_SIGNER.len == 0) { + gb_printf_err("Error: ODIN_ANDROID_JAR_SIGNER not set, which is required for -build-mode:executable for -subtarget:android"); + gb_exit(1); + } + if (bc->android_keystore.len == 0) { + gb_printf_err("Error: -android-keystore: has not been set\n"); + gb_exit(1); + } + if (bc->android_keystore_alias.len == 0) { + gb_printf_err("Error: -android-keystore_alias: has not been set\n"); + gb_exit(1); + } + } +} + gb_internal bool has_asm_extension(String const &path) { String ext = path_extension(path); if (ext == ".asm") { @@ -1652,6 +1785,15 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta default: GB_PANIC("Unknown architecture for darwin"); } + } else if (metrics->os == TargetOs_linux && subtarget == Subtarget_Android) { + switch (metrics->arch) { + case TargetArch_arm64: + bc->metrics.target_triplet = str_lit("aarch64-none-linux-android"); + bc->reloc_mode = RelocMode_PIC; + break; + default: + GB_PANIC("Unknown architecture for -subtarget:android"); + } } if (bc->metrics.os == TargetOs_windows) { @@ -1706,6 +1848,8 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta if (subtarget == Subtarget_Default) { bc->metrics.target_triplet = concatenate_strings(permanent_allocator(), bc->metrics.target_triplet, bc->minimum_os_version_string); } + } else if (selected_subtarget == Subtarget_Android) { + init_android_values(bc->build_mode == BuildMode_Executable); } if (!bc->custom_optimization_level) { @@ -1749,6 +1893,30 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta if (bc->metrics.os == TargetOs_freestanding) { bc->ODIN_DEFAULT_TO_NIL_ALLOCATOR = !bc->ODIN_DEFAULT_TO_PANIC_ALLOCATOR; } + + if (subtarget == Subtarget_Android) { + switch (build_context.build_mode) { + case BuildMode_DynamicLibrary: + case BuildMode_Object: + case BuildMode_Assembly: + case BuildMode_LLVM_IR: + break; + default: + case BuildMode_Executable: + case BuildMode_StaticLibrary: + if ((build_context.command_kind & Command__does_build) != 0) { + gb_printf_err("Unsupported -build-mode for -subtarget:android\n"); + gb_printf_err("\tCurrently only supporting: \n"); + // gb_printf_err("\t\texe\n"); + gb_printf_err("\t\tshared\n"); + gb_printf_err("\t\tobject\n"); + gb_printf_err("\t\tassembly\n"); + gb_printf_err("\t\tllvm-ir\n"); + gb_exit(1); + } + break; + } + } } #if defined(GB_SYSTEM_WINDOWS) @@ -1947,7 +2115,10 @@ gb_internal bool init_build_paths(String init_filename) { output_extension = make_string(nullptr, 0); String const single_file_extension = str_lit(".odin"); - if (build_context.metrics.os == TargetOs_windows) { + if (selected_subtarget == Subtarget_Android) { + // NOTE(bill): It's always shared! + output_extension = STR_LIT("so"); + } else if (build_context.metrics.os == TargetOs_windows) { output_extension = STR_LIT("exe"); } else if (build_context.cross_compiling && selected_target_metrics->metrics == &target_essence_amd64) { // Do nothing: we don't want the .bin extension diff --git a/src/bundle_command.cpp b/src/bundle_command.cpp new file mode 100644 index 000000000..b3bca2b51 --- /dev/null +++ b/src/bundle_command.cpp @@ -0,0 +1,209 @@ +i32 bundle_android(String init_directory); + +i32 bundle(String init_directory) { + switch (build_context.command_kind) { + case Command_bundle_android: + return bundle_android(init_directory); + } + gb_printf_err("Unknown odin package \n"); + return 1; +} + + +i32 bundle_android(String original_init_directory) { + TEMPORARY_ALLOCATOR_GUARD(); + + i32 result = 0; + init_android_values(/*with_sdk*/true); + + bool init_directory_ok = false; + String init_directory = path_to_fullpath(temporary_allocator(), original_init_directory, &init_directory_ok); + if (!init_directory_ok) { + gb_printf_err("Error: '%.*s' is not a valid directory", LIT(original_init_directory)); + return 1; + } + init_directory = normalize_path(temporary_allocator(), init_directory, NIX_SEPARATOR_STRING); + + int const ODIN_ANDROID_API_LEVEL = build_context.ODIN_ANDROID_API_LEVEL; + + String android_sdk_build_tools = concatenate3_strings(temporary_allocator(), + build_context.ODIN_ANDROID_SDK, str_lit("build-tools"), NIX_SEPARATOR_STRING); + + Array list = {}; + ReadDirectoryError rd_err = read_directory(android_sdk_build_tools, &list); + defer (array_free(&list)); + + switch (rd_err) { + case ReadDirectory_InvalidPath: + gb_printf_err("Invalid path: %.*s\n", LIT(android_sdk_build_tools)); + return 1; + case ReadDirectory_NotExists: + gb_printf_err("Path does not exist: %.*s\n", LIT(android_sdk_build_tools)); + return 1; + case ReadDirectory_Permission: + gb_printf_err("Unknown error whilst reading path %.*s\n", LIT(android_sdk_build_tools)); + return 1; + case ReadDirectory_NotDir: + gb_printf_err("Expected a directory for a package, got a file: %.*s\n", LIT(android_sdk_build_tools)); + return 1; + case ReadDirectory_Empty: + gb_printf_err("Empty directory: %.*s\n", LIT(android_sdk_build_tools)); + return 1; + case ReadDirectory_Unknown: + gb_printf_err("Unknown error whilst reading path %.*s\n", LIT(android_sdk_build_tools)); + return 1; + } + + auto possible_valid_dirs = array_make(heap_allocator(), 0, list.count); + defer (array_free(&possible_valid_dirs)); + + + for (FileInfo fi : list) if (fi.is_dir) { + bool all_numbers = true; + for (isize i = 0; i < fi.name.len; i++) { + u8 c = fi.name[i]; + if ('0' <= c && c <= '9') { + // true + } else if (i == 0) { + all_numbers = false; + } else if (c == '.') { + break; + } else { + all_numbers = false; + } + } + + if (all_numbers) { + array_add(&possible_valid_dirs, fi); + } + } + + if (possible_valid_dirs.count == 0) { + gb_printf_err("Unable to find any Android SDK/API Level in %.*s\n", LIT(android_sdk_build_tools)); + return 1; + } + + int *dir_numbers = gb_alloc_array(temporary_allocator(), int, possible_valid_dirs.count); + + char buf[1024] = {}; + for_array(i, possible_valid_dirs) { + FileInfo fi = possible_valid_dirs[i]; + isize n = gb_min(gb_size_of(buf)-1, fi.name.len); + memcpy(buf, fi.name.text, n); + buf[n] = 0; + + dir_numbers[i] = atoi(buf); + } + + isize closest_number_idx = -1; + for (isize i = 0; i < possible_valid_dirs.count; i++) { + if (dir_numbers[i] >= ODIN_ANDROID_API_LEVEL) { + if (closest_number_idx < 0) { + closest_number_idx = i; + } else if (dir_numbers[i] < dir_numbers[closest_number_idx]) { + closest_number_idx = i; + } + } + } + + if (closest_number_idx < 0) { + gb_printf_err("Unable to find any Android SDK/API Level in %.*s meeting the minimum API level of %d\n", LIT(android_sdk_build_tools), ODIN_ANDROID_API_LEVEL); + return 1; + } + + String api_number = possible_valid_dirs[closest_number_idx].name; + + android_sdk_build_tools = concatenate_strings(temporary_allocator(), android_sdk_build_tools, api_number); + String android_sdk_platforms = concatenate_strings(temporary_allocator(), + build_context.ODIN_ANDROID_SDK, + make_string_c(gb_bprintf("platforms/android-%d/", dir_numbers[closest_number_idx])) + ); + + android_sdk_build_tools = normalize_path(temporary_allocator(), android_sdk_build_tools, NIX_SEPARATOR_STRING); + android_sdk_platforms = normalize_path(temporary_allocator(), android_sdk_platforms, NIX_SEPARATOR_STRING); + + gbString cmd = gb_string_make(heap_allocator(), ""); + defer (gb_string_free(cmd)); + + + String current_directory = normalize_path(temporary_allocator(), get_working_directory(temporary_allocator()), NIX_SEPARATOR_STRING); + defer (set_working_directory(current_directory)); + + if (current_directory.len != 0) { + bool ok = set_working_directory(init_directory); + if (!ok) { + gb_printf_err("Error: Unable to currectly set the current working directory to '%.*s'\n", LIT(init_directory)); + } + } + + String output_filename = str_lit("test"); + String output_apk = path_remove_extension(output_filename); + + TIME_SECTION("Android aapt"); + { + TEMPORARY_ALLOCATOR_GUARD(); + gb_string_clear(cmd); + + String manifest = {}; + if (build_context.android_manifest.len != 0) { + manifest = concatenate_strings(temporary_allocator(), current_directory, build_context.android_manifest); + } else { + manifest = concatenate_strings(temporary_allocator(), init_directory, str_lit("AndroidManifest.xml")); + } + + cmd = gb_string_append_length(cmd, android_sdk_build_tools.text, android_sdk_build_tools.len); + cmd = gb_string_appendc(cmd, "aapt"); + cmd = gb_string_appendc(cmd, " package -f"); + if (manifest.len != 0) { + cmd = gb_string_append_fmt(cmd, " -M \"%.*s\"", LIT(manifest)); + } + cmd = gb_string_append_fmt(cmd, " -I \"%.*sandroid.jar\"", LIT(android_sdk_platforms)); + cmd = gb_string_append_fmt(cmd, " -F \"%.*s.apk-build\"", LIT(output_apk)); + + result = system_exec_command_line_app("android-aapt", cmd); + if (result) { + return result; + } + } + + TIME_SECTION("Android jarsigner"); + { + TEMPORARY_ALLOCATOR_GUARD(); + gb_string_clear(cmd); + + cmd = gb_string_append_length(cmd, build_context.ODIN_ANDROID_JAR_SIGNER.text, build_context.ODIN_ANDROID_JAR_SIGNER.len); + cmd = gb_string_append_fmt(cmd, " -storepass android"); + if (build_context.android_keystore.len != 0) { + String keystore = concatenate_strings(temporary_allocator(), current_directory, build_context.android_keystore); + cmd = gb_string_append_fmt(cmd, " -keystore \"%.*s\"", LIT(keystore)); + } + cmd = gb_string_append_fmt(cmd, " \"%.*s.apk-build\"", LIT(output_apk)); + if (build_context.android_keystore_alias.len != 0) { + String keystore_alias = build_context.android_keystore_alias; + cmd = gb_string_append_fmt(cmd, " \"%.*s\"", LIT(keystore_alias)); + } + + result = system_exec_command_line_app("android-jarsigner", cmd); + if (result) { + return result; + } + } + + TIME_SECTION("Android zipalign"); + { + TEMPORARY_ALLOCATOR_GUARD(); + gb_string_clear(cmd); + + cmd = gb_string_append_length(cmd, android_sdk_build_tools.text, android_sdk_build_tools.len); + cmd = gb_string_appendc(cmd, "zipalign"); + cmd = gb_string_appendc(cmd, " -f 4"); + cmd = gb_string_append_fmt(cmd, " \"%.*s.apk-build\" \"%.*s.apk\"", LIT(output_apk), LIT(output_apk)); + + result = system_exec_command_line_app("android-zipalign", cmd); + if (result) { + return result; + } + } + + return 0; +} diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index d9b04e35f..05f0ac7d7 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -645,6 +645,13 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan break; } + if (!are_types_identical(x.type, y.type)) { + gbString tx = type_to_string(x.type); + gbString ty = type_to_string(y.type); + error(call, "Mismatched types to '%.*s', '%s' vs '%s'", LIT(builtin_name), tx, ty); + gb_string_free(ty); + gb_string_free(tx); + } Type *vt = base_type(x.type); GB_ASSERT(vt->kind == Type_SimdVector); @@ -1675,12 +1682,16 @@ gb_internal bool check_builtin_procedure_directive(CheckerContext *c, Operand *o } if (ce->args.count > 0) { Ast *arg = ce->args[0]; - Operand o = {}; - Entity *e = check_ident(c, &o, arg, nullptr, nullptr, true); - if (e == nullptr || (e->flags & EntityFlag_Param) == 0) { - error(ce->args[0], "'#caller_expression' expected a valid earlier parameter name"); + if (arg->kind != Ast_Ident) { + error(arg, "'#caller_expression' expected an identifier"); + } else { + Operand o = {}; + Entity *e = check_ident(c, &o, arg, nullptr, nullptr, true); + if (e == nullptr || (e->flags & EntityFlag_Param) == 0) { + error(arg, "'#caller_expression' expected a valid earlier parameter name"); + } + arg->Ident.entity = e; } - arg->Ident.entity = e; } operand->type = t_string; diff --git a/src/check_decl.cpp b/src/check_decl.cpp index 5607ea725..250e8b854 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -628,6 +628,10 @@ gb_internal void check_const_decl(CheckerContext *ctx, Entity *e, Ast *type_expr Operand x = {}; x.type = entity->type; x.mode = Addressing_Variable; + if (entity->kind == Entity_Constant) { + x.mode = Addressing_Constant; + x.value = entity->Constant.value; + } if (!check_is_assignable_to(ctx, &x, e->type)) { gbString expr_str = expr_to_string(init); gbString op_type_str = type_to_string(entity->type); diff --git a/src/check_expr.cpp b/src/check_expr.cpp index b07fcf1d3..da193a4cc 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -8979,8 +8979,14 @@ gb_internal ExprKind check_or_else_expr(CheckerContext *c, Operand *o, Ast *node o->expr = node; return Expr_Expr; } + + Type *left_type = nullptr; + Type *right_type = nullptr; + check_or_else_split_types(c, &x, name, &left_type, &right_type); + add_type_and_value(c, arg, x.mode, x.type, x.value); + bool y_is_diverging = false; - check_expr_base(c, &y, default_value, x.type); + check_expr_base(c, &y, default_value, left_type); switch (y.mode) { case Addressing_NoValue: if (is_diverging_expr(y.expr)) { @@ -9005,11 +9011,6 @@ gb_internal ExprKind check_or_else_expr(CheckerContext *c, Operand *o, Ast *node return Expr_Expr; } - Type *left_type = nullptr; - Type *right_type = nullptr; - check_or_else_split_types(c, &x, name, &left_type, &right_type); - add_type_and_value(c, arg, x.mode, x.type, x.value); - if (left_type != nullptr) { if (!y_is_diverging) { check_assignment(c, &y, left_type, name); diff --git a/src/checker.cpp b/src/checker.cpp index 9d822073f..c44c6ce5b 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1149,6 +1149,7 @@ gb_internal void init_universal(void) { GlobalEnumValue values[Subtarget_COUNT] = { {"Default", Subtarget_Default}, {"iOS", Subtarget_iOS}, + {"Android", Subtarget_Android}, }; auto fields = add_global_enum_type(str_lit("Odin_Platform_Subtarget_Type"), values, gb_count_of(values)); diff --git a/src/linker.cpp b/src/linker.cpp index cf2ef638d..5c0fe446f 100644 --- a/src/linker.cpp +++ b/src/linker.cpp @@ -7,19 +7,15 @@ struct LinkerData { Array output_temp_paths; String output_base; String output_name; -#if defined(GB_SYSTEM_OSX) - b8 needs_system_library_linked; -#endif + bool needs_system_library_linked; }; gb_internal i32 system_exec_command_line_app(char const *name, char const *fmt, ...); gb_internal bool system_exec_command_line_app_output(char const *command, gbString *output); -#if defined(GB_SYSTEM_OSX) gb_internal void linker_enable_system_library_linking(LinkerData *ld) { - ld->needs_system_library_linked = 1; + ld->needs_system_library_linked = true; } -#endif gb_internal void linker_data_init(LinkerData *ld, CheckerInfo *info, String const &init_fullpath) { gbAllocator ha = heap_allocator(); @@ -28,9 +24,7 @@ gb_internal void linker_data_init(LinkerData *ld, CheckerInfo *info, String cons array_init(&ld->foreign_libraries, ha, 0, 1024); ptr_set_init(&ld->foreign_libraries_set, 1024); -#if defined(GB_SYSTEM_OSX) - ld->needs_system_library_linked = 0; -#endif + ld->needs_system_library_linked = false; if (build_context.out_filepath.len == 0) { ld->output_name = remove_directory_from_path(init_fullpath); @@ -136,6 +130,9 @@ gb_internal i32 linker_stage(LinkerData *gen) { return result; } + bool is_cross_linking = false; + bool is_android = false; + if (build_context.cross_compiling && selected_target_metrics->metrics == &target_essence_amd64) { #if defined(GB_SYSTEM_UNIX) result = system_exec_command_line_app("linker", "x86_64-essence-gcc \"%.*s.o\" -o \"%.*s\" %.*s %.*s", @@ -147,22 +144,29 @@ gb_internal i32 linker_stage(LinkerData *gen) { ); #endif } else if (build_context.cross_compiling && build_context.different_os) { - gb_printf_err("Linking for cross compilation for this platform is not yet supported (%.*s %.*s)\n", - LIT(target_os_names[build_context.metrics.os]), - LIT(target_arch_names[build_context.metrics.arch]) - ); - build_context.keep_object_files = true; + switch (selected_subtarget) { + case Subtarget_Android: + is_cross_linking = true; + is_android = true; + goto try_cross_linking; + default: + gb_printf_err("Linking for cross compilation for this platform is not yet supported (%.*s %.*s)\n", + LIT(target_os_names[build_context.metrics.os]), + LIT(target_arch_names[build_context.metrics.arch]) + ); + build_context.keep_object_files = true; + break; + } } else { +try_cross_linking:; + #if defined(GB_SYSTEM_WINDOWS) - bool is_windows = true; + bool is_windows = build_context.metrics.os == TargetOs_windows; #else bool is_windows = false; #endif - #if defined(GB_SYSTEM_OSX) - bool is_osx = true; - #else - bool is_osx = false; - #endif + + bool is_osx = build_context.metrics.os == TargetOs_darwin; if (is_windows) { @@ -414,23 +418,27 @@ gb_internal i32 linker_stage(LinkerData *gen) { } else { timings_start_section(timings, str_lit("ld-link")); + int const ODIN_ANDROID_API_LEVEL = build_context.ODIN_ANDROID_API_LEVEL; + + String ODIN_ANDROID_NDK = build_context.ODIN_ANDROID_NDK; + String ODIN_ANDROID_NDK_TOOLCHAIN = build_context.ODIN_ANDROID_NDK_TOOLCHAIN; + String ODIN_ANDROID_NDK_TOOLCHAIN_LIB = build_context.ODIN_ANDROID_NDK_TOOLCHAIN_LIB; + String ODIN_ANDROID_NDK_TOOLCHAIN_LIB_LEVEL = build_context.ODIN_ANDROID_NDK_TOOLCHAIN_LIB_LEVEL; + String ODIN_ANDROID_NDK_TOOLCHAIN_SYSROOT = build_context.ODIN_ANDROID_NDK_TOOLCHAIN_SYSROOT; + // Link using `clang`, unless overridden by `ODIN_CLANG_PATH` environment variable. const char* clang_path = gb_get_env("ODIN_CLANG_PATH", permanent_allocator()); if (clang_path == NULL) { clang_path = "clang"; } - // NOTE(vassvik): get cwd, for used for local shared libs linking, since those have to be relative to the exe - char cwd[256]; - #if !defined(GB_SYSTEM_WINDOWS) - getcwd(&cwd[0], 256); - #endif - //printf("%s\n", cwd); - // NOTE(vassvik): needs to add the root to the library search paths, so that the full filenames of the library // files can be passed with -l: - gbString lib_str = gb_string_make(heap_allocator(), "-L/"); + gbString lib_str = gb_string_make(heap_allocator(), ""); defer (gb_string_free(lib_str)); + #if !defined(GB_SYSTEM_WINDOWS) + lib_str = gb_string_appendc(lib_str, "-L/ "); + #endif StringSet asm_files = {}; string_set_init(&asm_files, 64); @@ -496,19 +504,20 @@ gb_internal i32 linker_stage(LinkerData *gen) { } String obj_format; - #if defined(GB_ARCH_64_BIT) - if (is_osx) { - obj_format = str_lit("macho64"); + if (build_context.metrics.ptr_size == 8) { + if (is_osx) { + obj_format = str_lit("macho64"); + } else { + obj_format = str_lit("elf64"); + } } else { - obj_format = str_lit("elf64"); + GB_ASSERT(build_context.metrics.ptr_size == 4); + if (is_osx) { + obj_format = str_lit("macho32"); + } else { + obj_format = str_lit("elf32"); + } } - #elif defined(GB_ARCH_32_BIT) - if (is_osx) { - obj_format = str_lit("macho32"); - } else { - obj_format = str_lit("elf32"); - } - #endif // GB_ARCH_*_BIT if (build_context.metrics.arch == TargetArch_riscv64) { result = system_exec_command_line_app("clang", @@ -618,6 +627,78 @@ gb_internal i32 linker_stage(LinkerData *gen) { gbString object_files = gb_string_make(heap_allocator(), ""); defer (gb_string_free(object_files)); + + + if (is_android) { // NOTE(bill): glue code needed for Android + TIME_SECTION("Android Native App Glue Compile"); + + String android_glue_object = {}; + String android_glue_static_lib = {}; + + char hash_buf[64] = {}; + gb_snprintf(hash_buf, gb_size_of(hash_buf), "%p", &hash_buf); + String hash = make_string_c(hash_buf); + + String temp_dir = normalize_path(temporary_allocator(), temporary_directory(temporary_allocator()), NIX_SEPARATOR_STRING); + android_glue_object = concatenate4_strings(temporary_allocator(), temp_dir, str_lit("android_native_app_glue-"), hash, str_lit(".o")); + android_glue_static_lib = concatenate4_strings(permanent_allocator(), temp_dir, str_lit("libandroid_native_app_glue-"), hash, str_lit(".a")); + + gbString glue = gb_string_make(heap_allocator(), clang_path); + defer (gb_string_free(glue)); + + glue = gb_string_append_fmt(glue, " --target=aarch64-linux-android%d ", ODIN_ANDROID_API_LEVEL); + glue = gb_string_appendc(glue, "-c \""); + glue = gb_string_append_length(glue, ODIN_ANDROID_NDK.text, ODIN_ANDROID_NDK.len); + glue = gb_string_appendc(glue, "sources/android/native_app_glue/android_native_app_glue.c"); + glue = gb_string_appendc(glue, "\" "); + glue = gb_string_appendc(glue, "-o \""); + glue = gb_string_append_length(glue, android_glue_object.text, android_glue_object.len); + glue = gb_string_appendc(glue, "\" "); + + glue = gb_string_appendc(glue, "\"-I"); + glue = gb_string_append_length(glue, ODIN_ANDROID_NDK_TOOLCHAIN.text, ODIN_ANDROID_NDK_TOOLCHAIN.len); + glue = gb_string_appendc(glue, "sysroot/usr/include/"); + glue = gb_string_appendc(glue, "\" "); + + glue = gb_string_appendc(glue, "\"-I"); + glue = gb_string_append_length(glue, ODIN_ANDROID_NDK_TOOLCHAIN.text, ODIN_ANDROID_NDK_TOOLCHAIN.len); + glue = gb_string_appendc(glue, "sysroot/usr/include/aarch64-linux-android/"); + glue = gb_string_appendc(glue, "\" "); + + + glue = gb_string_appendc(glue, "-Wno-macro-redefined "); + + result = system_exec_command_line_app("android-native-app-glue-compile", glue); + if (result) { + return result; + } + + TIME_SECTION("Android Native App Glue ar"); + + gbString ar = gb_string_make_length(heap_allocator(), ODIN_ANDROID_NDK_TOOLCHAIN.text, ODIN_ANDROID_NDK_TOOLCHAIN.len); + defer (gb_string_free(ar)); + + ar = gb_string_appendc(ar, "bin/llvm-ar"); + + ar = gb_string_appendc(ar, " rcs "); + + ar = gb_string_appendc(ar, "\""); + ar = gb_string_append_length(ar, android_glue_static_lib.text, android_glue_static_lib.len); + ar = gb_string_appendc(ar, "\" "); + + ar = gb_string_appendc(ar, "\""); + ar = gb_string_append_length(ar, android_glue_object.text, android_glue_object.len); + ar = gb_string_appendc(ar, "\" "); + + result = system_exec_command_line_app("android-native-app-glue-ar", ar); + if (result) { + return result; + } + + object_files = gb_string_append_fmt(object_files, "\"%.*s\" ", LIT(android_glue_static_lib)); + } + + for (String object_path : gen->output_object_paths) { object_files = gb_string_append_fmt(object_files, "\"%.*s\" ", LIT(object_path)); } @@ -661,7 +742,9 @@ gb_internal i32 linker_stage(LinkerData *gen) { link_settings = gb_string_appendc(link_settings, "-Wl,-init,'_odin_entry_point' "); link_settings = gb_string_appendc(link_settings, "-Wl,-fini,'_odin_exit_point' "); } - + } else if (is_android) { + // Always shared even in android! + link_settings = gb_string_appendc(link_settings, "-shared "); } if (build_context.build_mode == BuildMode_Executable && build_context.reloc_mode == RelocMode_PIC) { @@ -670,6 +753,7 @@ gb_internal i32 linker_stage(LinkerData *gen) { if (build_context.metrics.os != TargetOs_openbsd && build_context.metrics.os != TargetOs_haiku && build_context.metrics.arch != TargetArch_riscv64 + && !is_android ) { // OpenBSD and Haiku default to PIE executable. do not pass -no-pie for it. link_settings = gb_string_appendc(link_settings, "-no-pie "); @@ -701,6 +785,29 @@ gb_internal i32 linker_stage(LinkerData *gen) { // This points the linker to where the entry point is link_settings = gb_string_appendc(link_settings, "-e _main "); } + } else if (build_context.metrics.os == TargetOs_openbsd) { + // OpenBSD ports install shared libraries in /usr/local/lib. Also, we must explicitly link libpthread. + platform_lib_str = gb_string_appendc(platform_lib_str, "-lpthread -Wl,-L/usr/local/lib "); + // Until the LLVM back-end can be adapted to emit endbr64 instructions on amd64, we + // need to pass -z nobtcfi in order to allow the resulting program to run under + // OpenBSD 7.4 and newer. Once support is added at compile time, this can be dropped. + platform_lib_str = gb_string_appendc(platform_lib_str, "-Wl,-z,nobtcfi "); + } + + if (is_android) { + GB_ASSERT(ODIN_ANDROID_NDK_TOOLCHAIN_LIB.len != 0); + GB_ASSERT(ODIN_ANDROID_NDK_TOOLCHAIN_LIB_LEVEL.len != 0); + GB_ASSERT(ODIN_ANDROID_NDK_TOOLCHAIN_SYSROOT.len != 0); + + platform_lib_str = gb_string_appendc(platform_lib_str, "\"-L"); + platform_lib_str = gb_string_append_length(platform_lib_str, ODIN_ANDROID_NDK_TOOLCHAIN_LIB_LEVEL.text, ODIN_ANDROID_NDK_TOOLCHAIN_LIB_LEVEL.len); + platform_lib_str = gb_string_appendc(platform_lib_str, "\" "); + + platform_lib_str = gb_string_appendc(platform_lib_str, "\"--sysroot="); + platform_lib_str = gb_string_append_length(platform_lib_str, ODIN_ANDROID_NDK_TOOLCHAIN_SYSROOT.text, ODIN_ANDROID_NDK_TOOLCHAIN_SYSROOT.len); + platform_lib_str = gb_string_appendc(platform_lib_str, "\" "); + + link_settings = gb_string_appendc(link_settings, "-u ANativeActivity_onCreate "); } if (!build_context.no_rpath) { @@ -709,24 +816,31 @@ gb_internal i32 linker_stage(LinkerData *gen) { if (build_context.metrics.os == TargetOs_darwin) { link_settings = gb_string_appendc(link_settings, "-Wl,-rpath,@loader_path "); } else { - link_settings = gb_string_appendc(link_settings, "-Wl,-rpath,\\$ORIGIN "); + if (is_android) { + // ignore + } else { + link_settings = gb_string_appendc(link_settings, "-Wl,-rpath,\\$ORIGIN "); + } } } if (!build_context.no_crt) { - platform_lib_str = gb_string_appendc(platform_lib_str, "-lm "); + lib_str = gb_string_appendc(lib_str, "-lm "); if (build_context.metrics.os == TargetOs_darwin) { // NOTE: adding this causes a warning about duplicate libraries, I think it is // automatically assumed/added by clang when you don't do `-nostdlib`. - // platform_lib_str = gb_string_appendc(platform_lib_str, "-lSystem "); + // lib_str = gb_string_appendc(lib_str, "-lSystem "); } else { - platform_lib_str = gb_string_appendc(platform_lib_str, "-lc "); + lib_str = gb_string_appendc(lib_str, "-lc "); } } gbString link_command_line = gb_string_make(heap_allocator(), clang_path); defer (gb_string_free(link_command_line)); + if (is_android) { + link_command_line = gb_string_append_fmt(link_command_line, " --target=aarch64-linux-android%d ", ODIN_ANDROID_API_LEVEL); + } link_command_line = gb_string_appendc(link_command_line, " -Wno-unused-command-line-argument "); link_command_line = gb_string_appendc(link_command_line, object_files); link_command_line = gb_string_append_fmt(link_command_line, " -o \"%.*s\" ", LIT(output_filename)); @@ -736,6 +850,11 @@ gb_internal i32 linker_stage(LinkerData *gen) { link_command_line = gb_string_append_fmt(link_command_line, " %.*s ", LIT(build_context.extra_linker_flags)); link_command_line = gb_string_append_fmt(link_command_line, " %s ", link_settings); + + if (is_android) { + TIME_SECTION("Linking"); + } + if (build_context.linker_choice == Linker_lld) { link_command_line = gb_string_append_fmt(link_command_line, " -fuse-ld=lld"); result = system_exec_command_line_app("lld-link", link_command_line); diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 6f3abc607..396b94f98 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -1126,30 +1126,51 @@ gb_internal lbProcedure *lb_create_objc_names(lbModule *main_module) { return p; } -gb_internal void lb_finalize_objc_names(lbProcedure *p) { +gb_internal void lb_finalize_objc_names(lbGenerator *gen, lbProcedure *p) { if (p == nullptr) { return; } lbModule *m = p->module; + GB_ASSERT(m == &p->module->gen->default_module); TEMPORARY_ALLOCATOR_GUARD(); + StringSet handled = {}; + string_set_init(&handled); + defer (string_set_destroy(&handled)); + auto args = array_make(temporary_allocator(), 1); LLVMSetLinkage(p->value, LLVMInternalLinkage); lb_begin_procedure_body(p); - for (auto const &entry : m->objc_classes) { - String name = entry.key; - args[0] = lb_const_value(m, t_cstring, exact_value_string(name)); - lbValue ptr = lb_emit_runtime_call(p, "objc_lookUpClass", args); - lb_addr_store(p, entry.value.local_module_addr, ptr); + + auto register_thing = [&handled, &m, &args](lbProcedure *p, lbObjCGlobal const &g, char const *call) { + if (!string_set_update(&handled, g.name)) { + lbAddr addr = {}; + lbValue *found = string_map_get(&m->members, g.global_name); + if (found) { + addr = lb_addr(*found); + } else { + lbValue v = {}; + LLVMTypeRef t = lb_type(m, g.type); + v.value = LLVMAddGlobal(m->mod, t, g.global_name); + v.type = alloc_type_pointer(g.type); + addr = lb_addr(v); + LLVMSetInitializer(v.value, LLVMConstNull(t)); + } + + args[0] = lb_const_value(m, t_cstring, exact_value_string(g.name)); + lbValue ptr = lb_emit_runtime_call(p, call, args); + lb_addr_store(p, addr, ptr); + } + }; + + for (lbObjCGlobal g = {}; mpsc_dequeue(&gen->objc_classes, &g); /**/) { + register_thing(p, g, "objc_lookUpClass"); } - for (auto const &entry : m->objc_selectors) { - String name = entry.key; - args[0] = lb_const_value(m, t_cstring, exact_value_string(name)); - lbValue ptr = lb_emit_runtime_call(p, "sel_registerName", args); - lb_addr_store(p, entry.value.local_module_addr, ptr); + for (lbObjCGlobal g = {}; mpsc_dequeue(&gen->objc_selectors, &g); /**/) { + register_thing(p, g, "sel_registerName"); } lb_end_procedure_body(p); @@ -2637,7 +2658,7 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { if (gen->objc_names) { TIME_SECTION("Finalize objc names"); - lb_finalize_objc_names(gen->objc_names); + lb_finalize_objc_names(gen, gen->objc_names); } if (build_context.ODIN_DEBUG) { diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 4fd1b8d1a..3e01ada5f 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -143,11 +143,6 @@ struct lbPadType { LLVMTypeRef type; }; -struct lbObjcRef { - Entity * entity; - lbAddr local_module_addr; -}; - struct lbModule { LLVMModuleRef mod; LLVMContextRef ctx; @@ -198,8 +193,9 @@ struct lbModule { RecursiveMutex debug_values_mutex; PtrMap debug_values; - StringMap objc_classes; - StringMap objc_selectors; + + StringMap objc_classes; + StringMap objc_selectors; PtrMap map_cell_info_map; // address of runtime.Map_Info PtrMap map_info_map; // address of runtime.Map_Cell_Info @@ -218,6 +214,13 @@ struct lbEntityCorrection { char const *cname; }; +struct lbObjCGlobal { + lbModule *module; + gbString global_name; + String name; + Type * type; +}; + struct lbGenerator : LinkerData { CheckerInfo *info; @@ -235,6 +238,8 @@ struct lbGenerator : LinkerData { lbProcedure *objc_names; MPSCQueue entities_to_correct_linkage; + MPSCQueue objc_selectors; + MPSCQueue objc_classes; }; diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index b916c0017..9401e4d55 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -33,7 +33,7 @@ gb_internal bool lb_is_elem_const(Ast *elem, Type *elem_type) { gb_internal bool lb_is_const_nil(lbValue value) { LLVMValueRef v = value.value; - if (LLVMIsConstant(v)) { + if (v != nullptr && LLVMIsConstant(v)) { if (LLVMIsAConstantAggregateZero(v)) { return true; } else if (LLVMIsAConstantPointerNull(v)) { @@ -1125,10 +1125,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bo visited[index] = true; } else { if (!visited[index]) { - values[index] = lb_const_value(m, f->type, {}, false).value; + values[index] = lb_const_value(m, f->type, {}, /*allow_local*/false, is_rodata).value; visited[index] = true; } + unsigned idx_list_len = cast(unsigned)sel.index.count-1; unsigned *idx_list = gb_alloc_array(temporary_allocator(), unsigned, idx_list_len); @@ -1139,6 +1140,7 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bo i32 index = sel.index[j]; Type *cvt = base_type(cv_type); + if (cvt->kind == Type_Struct) { if (cvt->Struct.is_raw_union) { // sanity check which should have been caught by `lb_is_nested_possibly_constant` @@ -1164,8 +1166,40 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bo } if (is_constant) { LLVMValueRef elem_value = lb_const_value(m, tav.type, tav.value, allow_local, is_rodata).value; - if (LLVMIsConstant(elem_value)) { + if (LLVMIsConstant(elem_value) && LLVMIsConstant(values[index])) { values[index] = llvm_const_insert_value(m, values[index], elem_value, idx_list, idx_list_len); + } else if (is_local) { + #if 1 + lbProcedure *p = m->curr_procedure; + GB_ASSERT(p != nullptr); + if (LLVMIsConstant(values[index])) { + lbAddr addr = lb_add_local_generated(p, f->type, false); + lb_addr_store(p, addr, lbValue{values[index], f->type}); + values[index] = lb_addr_load(p, addr).value; + } + + GB_ASSERT(LLVMIsALoadInst(values[index])); + + LLVMValueRef ptr = LLVMGetOperand(values[index], 0); + + LLVMValueRef *indices = gb_alloc_array(temporary_allocator(), LLVMValueRef, idx_list_len); + LLVMTypeRef lt_u32 = lb_type(m, t_u32); + for (unsigned i = 0; i < idx_list_len; i++) { + indices[i] = LLVMConstInt(lt_u32, idx_list[i], false); + } + + ptr = LLVMBuildGEP2(p->builder, lb_type(m, f->type), ptr, indices, idx_list_len, ""); + ptr = LLVMBuildPointerCast(p->builder, ptr, lb_type(m, alloc_type_pointer(tav.type)), ""); + + if (LLVMIsALoadInst(elem_value)) { + i64 sz = type_size_of(tav.type); + LLVMValueRef src = LLVMGetOperand(elem_value, 0); + lb_mem_copy_non_overlapping(p, {ptr, t_rawptr}, {src, t_rawptr}, lb_const_int(m, t_int, sz), false); + } else { + LLVMBuildStore(p->builder, elem_value, ptr); + } + #endif + is_constant = false; } else { is_constant = false; } @@ -1205,7 +1239,7 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bo LLVMValueRef val = values[i]; if (!LLVMIsConstant(val)) { GB_ASSERT(is_local); - GB_ASSERT(LLVMGetInstructionOpcode(val) == LLVMLoad); + GB_ASSERT(LLVMIsALoadInst(val)); is_constant = false; } } @@ -1237,7 +1271,15 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bo LLVMValueRef val = old_values[i]; if (!LLVMIsConstant(val)) { LLVMValueRef dst = LLVMBuildStructGEP2(p->builder, llvm_addr_type(p->module, v.addr), v.addr.value, cast(unsigned)i, ""); + // if (LLVMIsALoadInst(val)) { + // Type *ptr_type = v.addr.type; + // i64 sz = type_size_of(type_deref(ptr_type)); + + // LLVMValueRef src = LLVMGetOperand(val, 0); + // lb_mem_copy_non_overlapping(p, {dst, ptr_type}, {src, ptr_type}, lb_const_int(m, t_int, sz), false); + // } else { LLVMBuildStore(p->builder, val, dst); + // } } } return lb_addr_load(p, v); diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index 926daaae4..53c007d8d 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -1089,7 +1089,7 @@ gb_internal void lb_add_debug_local_variable(lbProcedure *p, LLVMValueRef ptr, T #if LLVM_VERSION_MAJOR <= 18 LLVMDIBuilderInsertDeclareAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block); #else - LLVMDIBuilderInsertDbgValueRecordAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block); + LLVMDIBuilderInsertDeclareRecordAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block); #endif } diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index ea3db33f4..0c82180ec 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -3493,7 +3493,8 @@ gb_internal lbValue lb_build_expr_internal(lbProcedure *p, Ast *expr) { if (tv.value.kind != ExactValue_Invalid) { // NOTE(bill): Short on constant values - return lb_const_value(p->module, type, tv.value); + bool allow_local = true; + return lb_const_value(p->module, type, tv.value, allow_local); } else if (tv.mode == Addressing_Type) { // NOTE(bill, 2023-01-16): is this correct? I hope so at least return lb_typeid(m, tv.type); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index b7f70893f..ce2c70661 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -171,6 +171,8 @@ gb_internal bool lb_init_generator(lbGenerator *gen, Checker *c) { } mpsc_init(&gen->entities_to_correct_linkage, heap_allocator()); + mpsc_init(&gen->objc_selectors, heap_allocator()); + mpsc_init(&gen->objc_classes, heap_allocator()); return true; } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index ad14e4fcd..304db75bc 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2972,6 +2972,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } break; @@ -3034,6 +3036,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } break; @@ -3059,6 +3063,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } else { char asm_string[] = "svc #0"; @@ -3078,6 +3084,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } } @@ -3104,6 +3112,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } break; diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index e06369be3..bfeebfcbe 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -2094,41 +2094,65 @@ gb_internal void lb_set_wasm_export_attributes(LLVMValueRef value, String export } - gb_internal lbAddr lb_handle_objc_find_or_register_selector(lbProcedure *p, String const &name) { - lbObjcRef *found = string_map_get(&p->module->objc_selectors, name); + lbModule *m = p->module; + lbAddr *found = string_map_get(&m->objc_selectors, name); if (found) { - return found->local_module_addr; + return *found; } lbModule *default_module = &p->module->gen->default_module; - Entity *entity = {}; - if (default_module != p->module) { - found = string_map_get(&default_module->objc_selectors, name); - if (found) { - entity = found->entity; - } + gbString global_name = gb_string_make(permanent_allocator(), "__$objc_SEL::"); + global_name = gb_string_append_length(global_name, name.text, name.len); + + LLVMTypeRef t = lb_type(m, t_objc_SEL); + lbValue g = {}; + g.value = LLVMAddGlobal(m->mod, t, global_name); + g.type = alloc_type_pointer(t_objc_SEL); + + if (default_module == m) { + LLVMSetInitializer(g.value, LLVMConstNull(t)); + lb_add_member(m, make_string_c(global_name), g); + } else { + LLVMSetLinkage(g.value, LLVMExternalLinkage); } - if (!entity) { - gbString global_name = gb_string_make(permanent_allocator(), "__$objc_SEL::"); - global_name = gb_string_append_length(global_name, name.text, name.len); + mpsc_enqueue(&m->gen->objc_selectors, lbObjCGlobal{m, global_name, name, t_objc_SEL}); - lbAddr default_addr = lb_add_global_generated_with_name(default_module, t_objc_SEL, {}, - make_string(cast(u8 const *)global_name, gb_string_length(global_name)), - &entity); - string_map_set(&default_module->objc_selectors, name, lbObjcRef{entity, default_addr}); + lbAddr addr = lb_addr(g); + string_map_set(&m->objc_selectors, name, addr); + return addr; +} + +gb_internal lbAddr lb_handle_objc_find_or_register_class(lbProcedure *p, String const &name) { + lbModule *m = p->module; + lbAddr *found = string_map_get(&m->objc_classes, name); + if (found) { + return *found; } - lbValue ptr = lb_find_value_from_entity(p->module, entity); - lbAddr local_addr = lb_addr(ptr); + lbModule *default_module = &p->module->gen->default_module; - if (default_module != p->module) { - string_map_set(&p->module->objc_selectors, name, lbObjcRef{entity, local_addr}); + gbString global_name = gb_string_make(permanent_allocator(), "__$objc_Class::"); + global_name = gb_string_append_length(global_name, name.text, name.len); + + LLVMTypeRef t = lb_type(m, t_objc_Class); + lbValue g = {}; + g.value = LLVMAddGlobal(m->mod, t, global_name); + g.type = alloc_type_pointer(t_objc_Class); + + if (default_module == m) { + LLVMSetInitializer(g.value, LLVMConstNull(t)); + lb_add_member(m, make_string_c(global_name), g); + } else { + LLVMSetLinkage(g.value, LLVMExternalLinkage); } + mpsc_enqueue(&m->gen->objc_classes, lbObjCGlobal{m, global_name, name, t_objc_Class}); - return local_addr; + lbAddr addr = lb_addr(g); + string_map_set(&m->objc_classes, name, addr); + return addr; } gb_internal lbValue lb_handle_objc_find_selector(lbProcedure *p, Ast *expr) { @@ -2157,41 +2181,6 @@ gb_internal lbValue lb_handle_objc_register_selector(lbProcedure *p, Ast *expr) return lb_addr_load(p, dst); } -gb_internal lbAddr lb_handle_objc_find_or_register_class(lbProcedure *p, String const &name) { - lbObjcRef *found = string_map_get(&p->module->objc_classes, name); - if (found) { - return found->local_module_addr; - } - - lbModule *default_module = &p->module->gen->default_module; - Entity *entity = {}; - - if (default_module != p->module) { - found = string_map_get(&default_module->objc_classes, name); - if (found) { - entity = found->entity; - } - } - - if (!entity) { - gbString global_name = gb_string_make(permanent_allocator(), "__$objc_Class::"); - global_name = gb_string_append_length(global_name, name.text, name.len); - - lbAddr default_addr = lb_add_global_generated_with_name(default_module, t_objc_Class, {}, - make_string(cast(u8 const *)global_name, gb_string_length(global_name)), - &entity); - string_map_set(&default_module->objc_classes, name, lbObjcRef{entity, default_addr}); - } - - lbValue ptr = lb_find_value_from_entity(p->module, entity); - lbAddr local_addr = lb_addr(ptr); - - if (default_module != p->module) { - string_map_set(&p->module->objc_classes, name, lbObjcRef{entity, local_addr}); - } - - return local_addr; -} gb_internal lbValue lb_handle_objc_find_class(lbProcedure *p, Ast *expr) { ast_node(ce, CallExpr, expr); diff --git a/src/main.cpp b/src/main.cpp index 289a6150a..c19bbde22 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -74,6 +74,7 @@ gb_global Timings global_timings = {0}; #include "cached.cpp" #include "linker.cpp" +#include "bundle_command.cpp" #if defined(GB_SYSTEM_WINDOWS) && defined(ODIN_TILDE_BACKEND) #define ALLOW_TILDE 1 @@ -408,6 +409,10 @@ enum BuildFlagKind { BuildFlag_Subsystem, #endif + BuildFlag_AndroidKeystore, + BuildFlag_AndroidKeystoreAlias, + BuildFlag_AndroidManifest, + BuildFlag_COUNT, }; @@ -426,12 +431,12 @@ struct BuildFlag { BuildFlagKind kind; String name; BuildFlagParamKind param_kind; - u32 command_support; + u64 command_support; bool allow_multiple; }; -gb_internal void add_flag(Array *build_flags, BuildFlagKind kind, String name, BuildFlagParamKind param_kind, u32 command_support, bool allow_multiple=false) { +gb_internal void add_flag(Array *build_flags, BuildFlagKind kind, String name, BuildFlagParamKind param_kind, u64 command_support, bool allow_multiple=false) { BuildFlag flag = {kind, name, param_kind, command_support, allow_multiple}; array_add(build_flags, flag); } @@ -567,7 +572,7 @@ gb_internal bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_Microarch, str_lit("microarch"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_TargetFeatures, str_lit("target-features"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_StrictTargetFeatures, str_lit("strict-target-features"), BuildFlagParam_None, Command__does_build); - add_flag(&build_flags, BuildFlag_MinimumOSVersion, str_lit("minimum-os-version"), BuildFlagParam_String, Command__does_build); + add_flag(&build_flags, BuildFlag_MinimumOSVersion, str_lit("minimum-os-version"), BuildFlagParam_String, Command__does_build | Command_bundle_android); add_flag(&build_flags, BuildFlag_RelocMode, str_lit("reloc-mode"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_DisableRedZone, str_lit("disable-red-zone"), BuildFlagParam_None, Command__does_build); @@ -624,9 +629,20 @@ gb_internal bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_Subsystem, str_lit("subsystem"), BuildFlagParam_String, Command__does_build); #endif + add_flag(&build_flags, BuildFlag_AndroidKeystore, str_lit("android-keystore"), BuildFlagParam_String, Command_bundle_android); + add_flag(&build_flags, BuildFlag_AndroidKeystoreAlias, str_lit("android-keystore-alias"), BuildFlagParam_String, Command_bundle_android); + add_flag(&build_flags, BuildFlag_AndroidManifest, str_lit("android-manifest"), BuildFlagParam_String, Command_bundle_android); - GB_ASSERT(args.count >= 3); - Array flag_args = array_slice(args, 3, args.count); + + Array flag_args = {}; + + if (build_context.command_kind == Command_bundle_android) { + GB_ASSERT(args.count >= 4); + flag_args = array_slice(args, 4, args.count); + } else { + GB_ASSERT(args.count >= 3); + flag_args = array_slice(args, 3, args.count); + } bool set_flags[BuildFlag_COUNT] = {}; @@ -1105,8 +1121,9 @@ gb_internal bool parse_build_flags(Array args) { String str = value.value_string; bool found = false; - if (selected_target_metrics->metrics->os != TargetOs_darwin) { - gb_printf_err("-subtarget can only be used with darwin based targets at the moment\n"); + if (selected_target_metrics->metrics->os != TargetOs_darwin && + selected_target_metrics->metrics->os != TargetOs_linux ) { + gb_printf_err("-subtarget can only be used with darwin and linux based targets at the moment\n"); bad_flags = true; break; } @@ -1637,6 +1654,20 @@ gb_internal bool parse_build_flags(Array args) { } #endif + case BuildFlag_AndroidKeystore: + GB_ASSERT(value.kind == ExactValue_String); + build_context.android_keystore = value.value_string; + break; + + case BuildFlag_AndroidKeystoreAlias: + GB_ASSERT(value.kind == ExactValue_String); + build_context.android_keystore_alias = value.value_string; + break; + + case BuildFlag_AndroidManifest: + GB_ASSERT(value.kind == ExactValue_String); + build_context.android_manifest = value.value_string; + break; } } @@ -1652,8 +1683,8 @@ gb_internal bool parse_build_flags(Array args) { gb_printf_err("'%.*s' is supported with the following commands:\n", LIT(name)); gb_printf_err("\t"); i32 count = 0; - for (u32 i = 0; i < 32; i++) { - if (found_bf.command_support & (1< 0) { gb_printf_err(", "); } @@ -2236,6 +2267,10 @@ gb_internal void print_show_help(String const arg0, String command, String optio } else if (command == "strip-semicolon") { print_usage_line(1, "strip-semicolon"); print_usage_line(2, "Parses and type checks .odin file(s) and then removes unneeded semicolons from the entire project."); + } else if (command == "bundle") { + print_usage_line(1, "bundle Bundles a directory in a specific layout for that platform"); + print_usage_line(2, "Supported platforms:"); + print_usage_line(3, "android"); } bool doc = command == "doc"; @@ -2245,6 +2280,7 @@ gb_internal void print_show_help(String const arg0, String command, String optio bool strip_semicolon = command == "strip-semicolon"; bool check_only = command == "check" || strip_semicolon; bool check = run_or_build || check_only; + bool bundle = command == "bundle"; if (command == "help") { doc = true; @@ -2512,13 +2548,15 @@ gb_internal void print_show_help(String const arg0, String command, String optio } } - if (run_or_build) { + if (run_or_build || bundle) { if (print_flag("-minimum-os-version:")) { print_usage_line(2, "Sets the minimum OS version targeted by the application."); print_usage_line(2, "Default: -minimum-os-version:11.0.0"); print_usage_line(2, "Only used when target is Darwin, if given, linking mismatched versions will emit a warning."); } + } + if (run_or_build) { if (print_flag("-no-bounds-check")) { print_usage_line(2, "Disables bounds checking program wide."); } @@ -3299,6 +3337,19 @@ int main(int arg_count, char const **arg_ptr) { print_show_help(args[0], args[1], args[2]); return 0; } + } else if (command == "bundle") { + if (args.count < 4) { + usage(args[0]); + return 1; + } + if (args[2] == "android") { + build_context.command_kind = Command_bundle_android; + } else { + gb_printf_err("Unknown package command: '%.*s'\n", LIT(args[2])); + usage(args[0]); + return 1; + } + init_filename = args[3]; } else if (command == "root") { gb_printf("%.*s", LIT(odin_root_dir())); return 0; @@ -3333,10 +3384,14 @@ int main(int arg_count, char const **arg_ptr) { } if (!single_file_package) { - gb_printf_err("ERROR: `%.*s %.*s` takes a package as its first argument.\n", LIT(args[0]), LIT(command)); + gb_printf_err("ERROR: `%.*s %.*s` takes a package/directory as its first argument.\n", LIT(args[0]), LIT(command)); if (init_filename == "-file") { gb_printf_err("Did you mean `%.*s %.*s -file`?\n", LIT(args[0]), LIT(command)); } else { + if (!gb_file_exists(cast(const char*)init_filename.text)) { + gb_printf_err("The file '%.*s' was not found.\n", LIT(init_filename)); + return 1; + } gb_printf_err("Did you mean `%.*s %.*s %.*s -file`?\n", LIT(args[0]), LIT(command), LIT(init_filename)); } @@ -3370,6 +3425,10 @@ int main(int arg_count, char const **arg_ptr) { return 0; } + if (command == "bundle") { + return bundle(init_filename); + } + // NOTE(bill): add 'shared' directory if it is not already set if (!find_library_collection_path(str_lit("shared"), nullptr)) { add_library_collection(str_lit("shared"), diff --git a/src/path.cpp b/src/path.cpp index 2c08ddd98..12f8d3d4e 100644 --- a/src/path.cpp +++ b/src/path.cpp @@ -30,28 +30,80 @@ gb_internal String remove_directory_from_path(String const &s) { } -// NOTE(Mark Naughton): getcwd as String -#if !defined(GB_SYSTEM_WINDOWS) -gb_internal String get_current_directory(void) { - char cwd[256]; - getcwd(cwd, 256); +#if defined(GB_SYSTEM_WINDOWS) +gb_global SRWLOCK cwd_lock; - return make_string_c(cwd); +String get_working_directory(gbAllocator allocator) { + AcquireSRWLockExclusive(&cwd_lock); + + TEMPORARY_ALLOCATOR_GUARD(); + + DWORD sz_utf16 = GetCurrentDirectoryW(0, nullptr); + wchar_t *dir_buf_wstr = gb_alloc_array(temporary_allocator(), wchar_t, sz_utf16); + if (dir_buf_wstr == nullptr) { + ReleaseSRWLockExclusive(&cwd_lock); + return {}; + } + + DWORD n = GetCurrentDirectoryW(sz_utf16, dir_buf_wstr); + GB_ASSERT(n+1 == sz_utf16); + ReleaseSRWLockExclusive(&cwd_lock); + + + isize buf_len = sz_utf16*4; + u8 *buf = gb_alloc_array(allocator, u8, buf_len); + gb_ucs2_to_utf8(buf, buf_len, cast(u16 *)dir_buf_wstr); + + return make_string_c(cast(char const *)buf); +} + +bool set_working_directory(String dir) { + bool ok = false; + TEMPORARY_ALLOCATOR_GUARD(); + + char const *cdir = alloc_cstring(temporary_allocator(), dir); + wchar_t *wstr = gb__alloc_utf8_to_ucs2(temporary_allocator(), cdir, nullptr); + + AcquireSRWLockExclusive(&cwd_lock); + + ok = SetCurrentDirectoryW(wstr); + + ReleaseSRWLockExclusive(&cwd_lock); + + return ok; } #else -gb_internal String get_current_directory(void) { - gbAllocator a = heap_allocator(); - wchar_t cwd[256]; - GetCurrentDirectoryW(256, cwd); +String get_working_directory(gbAllocator allocator) { + TEMPORARY_ALLOCATOR_GUARD(); - String16 wstr = make_string16_c(cwd); + auto buf = array_make(temporary_allocator()); + size_t size = PATH_MAX; - return string16_to_string(a, wstr); + char const *cwd; + for (; cwd == nullptr; size *= 2) { + array_resize(&buf, size); + + cwd = getcwd(buf.data, buf.count); + if (cwd == nullptr && errno != ERANGE) { + return {}; + } + } + + return copy_string(allocator, make_string_c(cwd)); } + +bool set_working_directory(String dir) { + TEMPORARY_ALLOCATOR_GUARD(); + char const *cdir = alloc_cstring(temporary_allocator(), dir); + return !chdir(cdir); +} + #endif + + gb_internal bool path_is_directory(String path); gb_internal String directory_from_path(String const &s) { diff --git a/src/string.cpp b/src/string.cpp index b001adf0e..88b679540 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -273,6 +273,15 @@ gb_internal String path_extension(String const &str, bool include_dot = true) { return substring(str, include_dot ? pos : pos + 1, str.len); } + +gb_internal String path_remove_extension(String const &str) { + isize pos = string_extension_position(str); + if (pos < 0) { + return str; + } + return substring(str, 0, pos); +} + gb_internal String string_trim_whitespace(String str) { while (str.len > 0 && rune_is_whitespace(str[str.len-1])) { str.len--; diff --git a/tests/benchmark/crypto/benchmark_aead.odin b/tests/benchmark/crypto/benchmark_aead.odin new file mode 100644 index 000000000..bfd888a43 --- /dev/null +++ b/tests/benchmark/crypto/benchmark_aead.odin @@ -0,0 +1,96 @@ +package benchmark_core_crypto + +import "base:runtime" +import "core:crypto" +import "core:testing" +import "core:text/table" +import "core:time" + +import "core:crypto/aead" + +@(private = "file") +ITERS :: 10000 +@(private = "file") +SIZES := []int{64, 1024, 65536} + +@(test) +benchmark_crypto_aead :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + + tbl: table.Table + table.init(&tbl) + defer table.destroy(&tbl) + + table.caption(&tbl, "AEAD") + table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Time", "Throughput") + + for algo, i in aead.Algorithm { + if algo == .Invalid { + continue + } + if i > 1 { + table.row(&tbl) + } + + algo_name := aead.ALGORITHM_NAMES[algo] + key_sz := aead.KEY_SIZES[algo] + + key := make([]byte, key_sz, context.temp_allocator) + crypto.rand_bytes(key) + + // TODO: Benchmark all available imlementations? + ctx: aead.Context + aead.init(&ctx, algo, key) + + for sz, _ in SIZES { + options := &time.Benchmark_Options{ + rounds = ITERS, + bytes = aead.IV_SIZES[algo] + sz, + setup = setup_sized_buf, + bench = do_bench_aead, + teardown = teardown_sized_buf, + } + context.user_ptr = &ctx + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil) + + time_per_iter := options.duration / ITERS + table.aligned_row_of_values( + &tbl, + .Right, + algo_name, + table.format(&tbl, "%d", sz), + table.format(&tbl, "%8M", time_per_iter), + table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second), + ) + } + } + + log_table(&tbl) +} + +@(private = "file") +do_bench_aead :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + tag_: [aead.MAX_TAG_SIZE]byte + + ctx := (^aead.Context)(context.user_ptr) + iv_sz := aead.iv_size(ctx) + + iv := options.input[:iv_sz] + buf := options.input[iv_sz:] + tag := tag_[:aead.tag_size(ctx)] + + for _ in 0 ..= options.rounds { + aead.seal_ctx(ctx, buf, tag, iv, nil, buf) + } + options.count = options.rounds + options.processed = options.rounds * (options.bytes - iv_sz) + + return +} diff --git a/tests/benchmark/crypto/benchmark_crypto.odin b/tests/benchmark/crypto/benchmark_crypto.odin deleted file mode 100644 index b139ea669..000000000 --- a/tests/benchmark/crypto/benchmark_crypto.odin +++ /dev/null @@ -1,415 +0,0 @@ -package benchmark_core_crypto - -import "base:runtime" -import "core:encoding/hex" -import "core:fmt" -import "core:log" -import "core:strings" -import "core:testing" -import "core:time" - -import "core:crypto/aes" -import "core:crypto/chacha20" -import "core:crypto/chacha20poly1305" -import "core:crypto/ed25519" -import "core:crypto/poly1305" -import "core:crypto/x25519" - -// Cryptographic primitive benchmarks. - -@(test) -benchmark_crypto :: proc(t: ^testing.T) { - runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() - - str: strings.Builder - strings.builder_init(&str, context.allocator) - defer { - log.info(strings.to_string(str)) - strings.builder_destroy(&str) - } - - { - name := "AES256-CTR 64 bytes" - options := &time.Benchmark_Options { - rounds = 1_000, - bytes = 64, - setup = _setup_sized_buf, - bench = _benchmark_aes256_ctr, - teardown = _teardown_sized_buf, - } - - err := time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "AES256-CTR 1024 bytes" - options.bytes = 1024 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "AES256-CTR 65536 bytes" - options.bytes = 65536 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - } - { - name := "ChaCha20 64 bytes" - options := &time.Benchmark_Options { - rounds = 1_000, - bytes = 64, - setup = _setup_sized_buf, - bench = _benchmark_chacha20, - teardown = _teardown_sized_buf, - } - - err := time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "ChaCha20 1024 bytes" - options.bytes = 1024 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "ChaCha20 65536 bytes" - options.bytes = 65536 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - } - { - name := "Poly1305 64 zero bytes" - options := &time.Benchmark_Options { - rounds = 1_000, - bytes = 64, - setup = _setup_sized_buf, - bench = _benchmark_poly1305, - teardown = _teardown_sized_buf, - } - - err := time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "Poly1305 1024 zero bytes" - options.bytes = 1024 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - } - { - name := "chacha20poly1305 64 bytes" - options := &time.Benchmark_Options { - rounds = 1_000, - bytes = 64, - setup = _setup_sized_buf, - bench = _benchmark_chacha20poly1305, - teardown = _teardown_sized_buf, - } - - err := time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "chacha20poly1305 1024 bytes" - options.bytes = 1024 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "chacha20poly1305 65536 bytes" - options.bytes = 65536 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - } - { - name := "AES256-GCM 64 bytes" - options := &time.Benchmark_Options { - rounds = 1_000, - bytes = 64, - setup = _setup_sized_buf, - bench = _benchmark_aes256_gcm, - teardown = _teardown_sized_buf, - } - - key := [aes.KEY_SIZE_256]byte { - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - } - ctx: aes.Context_GCM - aes.init_gcm(&ctx, key[:]) - - context.user_ptr = &ctx - - err := time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "AES256-GCM 1024 bytes" - options.bytes = 1024 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - - name = "AES256-GCM 65536 bytes" - options.bytes = 65536 - err = time.benchmark(options, context.allocator) - testing.expect(t, err == nil, name) - benchmark_print(&str, name, options) - } - { - iters :: 10000 - - priv_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe" - priv_bytes, _ := hex.decode(transmute([]byte)(priv_str), context.temp_allocator) - priv_key: ed25519.Private_Key - start := time.now() - for i := 0; i < iters; i = i + 1 { - ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes) - assert(ok, "private key should deserialize") - } - elapsed := time.since(start) - fmt.sbprintfln(&str, - "ed25519.private_key_set_bytes: ~%f us/op", - time.duration_microseconds(elapsed) / iters, - ) - - pub_bytes := priv_key._pub_key._b[:] // "I know what I am doing" - pub_key: ed25519.Public_Key - start = time.now() - for i := 0; i < iters; i = i + 1 { - ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes[:]) - assert(ok, "public key should deserialize") - } - elapsed = time.since(start) - fmt.sbprintfln(&str, - "ed25519.public_key_set_bytes: ~%f us/op", - time.duration_microseconds(elapsed) / iters, - ) - - msg := "Got a job for you, 621." - sig_bytes: [ed25519.SIGNATURE_SIZE]byte - msg_bytes := transmute([]byte)(msg) - start = time.now() - for i := 0; i < iters; i = i + 1 { - ed25519.sign(&priv_key, msg_bytes, sig_bytes[:]) - } - elapsed = time.since(start) - fmt.sbprintfln(&str, - "ed25519.sign: ~%f us/op", - time.duration_microseconds(elapsed) / iters, - ) - - start = time.now() - for i := 0; i < iters; i = i + 1 { - ok := ed25519.verify(&pub_key, msg_bytes, sig_bytes[:]) - assert(ok, "signature should validate") - } - elapsed = time.since(start) - fmt.sbprintfln(&str, - "ed25519.verify: ~%f us/op", - time.duration_microseconds(elapsed) / iters, - ) - } - { - point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef" - scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe" - - point, _ := hex.decode(transmute([]byte)(point_str), context.temp_allocator) - scalar, _ := hex.decode(transmute([]byte)(scalar_str), context.temp_allocator) - out: [x25519.POINT_SIZE]byte = --- - - iters :: 10000 - start := time.now() - for i := 0; i < iters; i = i + 1 { - x25519.scalarmult(out[:], scalar[:], point[:]) - } - elapsed := time.since(start) - - fmt.sbprintfln(&str, - "x25519.scalarmult: ~%f us/op", - time.duration_microseconds(elapsed) / iters, - ) - } -} - -@(private) -_setup_sized_buf :: proc( - options: ^time.Benchmark_Options, - allocator := context.allocator, -) -> ( - err: time.Benchmark_Error, -) { - assert(options != nil) - - options.input = make([]u8, options.bytes, allocator) - return nil if len(options.input) == options.bytes else .Allocation_Error -} - -@(private) -_teardown_sized_buf :: proc( - options: ^time.Benchmark_Options, - allocator := context.allocator, -) -> ( - err: time.Benchmark_Error, -) { - assert(options != nil) - - delete(options.input) - return nil -} - -@(private) -_benchmark_chacha20 :: proc( - options: ^time.Benchmark_Options, - allocator := context.allocator, -) -> ( - err: time.Benchmark_Error, -) { - buf := options.input - key := [chacha20.KEY_SIZE]byte { - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - } - iv := [chacha20.IV_SIZE]byte { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - } - - ctx: chacha20.Context = --- - chacha20.init(&ctx, key[:], iv[:]) - - for _ in 0 ..= options.rounds { - chacha20.xor_bytes(&ctx, buf, buf) - } - options.count = options.rounds - options.processed = options.rounds * options.bytes - return nil -} - -@(private) -_benchmark_poly1305 :: proc( - options: ^time.Benchmark_Options, - allocator := context.allocator, -) -> ( - err: time.Benchmark_Error, -) { - buf := options.input - key := [poly1305.KEY_SIZE]byte { - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - } - - tag: [poly1305.TAG_SIZE]byte = --- - for _ in 0 ..= options.rounds { - poly1305.sum(tag[:], buf, key[:]) - } - options.count = options.rounds - options.processed = options.rounds * options.bytes - //options.hash = u128(h) - return nil -} - -@(private) -_benchmark_chacha20poly1305 :: proc( - options: ^time.Benchmark_Options, - allocator := context.allocator, -) -> ( - err: time.Benchmark_Error, -) { - buf := options.input - key := [chacha20.KEY_SIZE]byte { - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - } - iv := [chacha20.IV_SIZE]byte { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - } - - ctx: chacha20poly1305.Context = --- - chacha20poly1305.init(&ctx, key[:]) // Basically 0 overhead. - - tag: [chacha20poly1305.TAG_SIZE]byte = --- - - for _ in 0 ..= options.rounds { - chacha20poly1305.seal(&ctx, buf, tag[:], iv[:], nil, buf) - } - options.count = options.rounds - options.processed = options.rounds * options.bytes - return nil -} - -@(private) -_benchmark_aes256_ctr :: proc( - options: ^time.Benchmark_Options, - allocator := context.allocator, -) -> ( - err: time.Benchmark_Error, -) { - buf := options.input - key := [aes.KEY_SIZE_256]byte { - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, - } - iv := [aes.CTR_IV_SIZE]byte { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - } - - ctx: aes.Context_CTR = --- - aes.init_ctr(&ctx, key[:], iv[:]) - - for _ in 0 ..= options.rounds { - aes.xor_bytes_ctr(&ctx, buf, buf) - } - options.count = options.rounds - options.processed = options.rounds * options.bytes - return nil -} - -_benchmark_aes256_gcm :: proc( - options: ^time.Benchmark_Options, - allocator := context.allocator, -) -> ( - err: time.Benchmark_Error, -) { - buf := options.input - iv: [aes.GCM_IV_SIZE]byte - tag: [aes.GCM_TAG_SIZE]byte = --- - - ctx := (^aes.Context_GCM)(context.user_ptr) - - for _ in 0 ..= options.rounds { - aes.seal_gcm(ctx, buf, tag[:], iv[:], nil, buf) - } - options.count = options.rounds - options.processed = options.rounds * options.bytes - return nil -} - -@(private) -benchmark_print :: proc(str: ^strings.Builder, name: string, options: ^time.Benchmark_Options, loc := #caller_location) { - fmt.sbprintfln(str, "[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n", - name, - options.rounds, - options.processed, - time.duration_nanoseconds(options.duration), - options.rounds_per_second, - options.megabytes_per_second, - ) -} diff --git a/tests/benchmark/crypto/benchmark_ecc.odin b/tests/benchmark/crypto/benchmark_ecc.odin new file mode 100644 index 000000000..16ca798dc --- /dev/null +++ b/tests/benchmark/crypto/benchmark_ecc.odin @@ -0,0 +1,163 @@ +package benchmark_core_crypto + +import "base:runtime" +import "core:encoding/hex" +import "core:testing" +import "core:text/table" +import "core:time" + +import "core:crypto/ed25519" +import "core:crypto/x25519" +import "core:crypto/x448" + +@(private = "file") +ECDH_ITERS :: 10000 +@(private = "file") +DSA_ITERS :: 10000 + +@(test) +benchmark_crypto_ecc :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + + bench_ecdh() + bench_dsa() +} + +@(private = "file") +bench_ecdh :: proc() { + tbl: table.Table + table.init(&tbl) + defer table.destroy(&tbl) + + table.caption(&tbl, "ECDH") + table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Scalar-Basepoint", "Scalar-Point") + + append_tbl := proc(tbl: ^table.Table, algo_name: string, bp, sc: time.Duration) { + table.aligned_row_of_values( + tbl, + .Right, + algo_name, + table.format(tbl, "%8M", bp), + table.format(tbl, "%8M", sc), + ) + } + + scalar_bp, scalar := bench_x25519() + append_tbl(&tbl, "X25519", scalar_bp, scalar) + + scalar_bp, scalar = bench_x448() + append_tbl(&tbl, "X448", scalar_bp, scalar) + + log_table(&tbl) +} + +@(private = "file") +bench_x25519 :: proc() -> (bp, sc: time.Duration) { + point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe" + + point, _ := hex.decode(transmute([]byte)(point_str), context.temp_allocator) + scalar, _ := hex.decode(transmute([]byte)(scalar_str), context.temp_allocator) + out: [x25519.POINT_SIZE]byte = --- + + start := time.tick_now() + for _ in 0 ..< ECDH_ITERS { + x25519.scalarmult_basepoint(out[:], scalar[:]) + } + bp = time.tick_since(start) / ECDH_ITERS + + start = time.tick_now() + for _ in 0 ..< ECDH_ITERS { + x25519.scalarmult(out[:], scalar[:], point[:]) + } + sc = time.tick_since(start) / ECDH_ITERS + + return +} + +@(private = "file") +bench_x448 :: proc() -> (bp, sc: time.Duration) { + point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe" + + point, _ := hex.decode(transmute([]byte)(point_str), context.temp_allocator) + scalar, _ := hex.decode(transmute([]byte)(scalar_str), context.temp_allocator) + out: [x448.POINT_SIZE]byte = --- + + start := time.tick_now() + for _ in 0 ..< ECDH_ITERS { + x448.scalarmult_basepoint(out[:], scalar[:]) + } + bp = time.tick_since(start) / ECDH_ITERS + + start = time.tick_now() + for _ in 0 ..< ECDH_ITERS { + x448.scalarmult(out[:], scalar[:], point[:]) + } + sc = time.tick_since(start) / ECDH_ITERS + + return +} + +@(private = "file") +bench_dsa :: proc() { + tbl: table.Table + table.init(&tbl) + defer table.destroy(&tbl) + + table.caption(&tbl, "ECDSA/EdDSA") + table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Op", "Time") + + append_tbl := proc(tbl: ^table.Table, algo_name, op: string, t: time.Duration) { + table.aligned_row_of_values( + tbl, + .Right, + algo_name, + op, + table.format(tbl, "%8M", t), + ) + } + + sk, sig, verif := bench_ed25519() + append_tbl(&tbl, "ed25519", "private_key_set_bytes", sk) + append_tbl(&tbl, "ed25519", "sign", sig) + append_tbl(&tbl, "ed25519", "verify", verif) + + log_table(&tbl) +} + +@(private = "file") +bench_ed25519 :: proc() -> (sk, sig, verif: time.Duration) { + priv_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe" + priv_bytes, _ := hex.decode(transmute([]byte)(priv_str), context.temp_allocator) + priv_key: ed25519.Private_Key + start := time.tick_now() + for _ in 0 ..< DSA_ITERS { + ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes) + assert(ok, "private key should deserialize") + } + sk = time.tick_since(start) / DSA_ITERS + + pub_bytes := priv_key._pub_key._b[:] // "I know what I am doing" + pub_key: ed25519.Public_Key + ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes[:]) + assert(ok, "public key should deserialize") + + msg := "Got a job for you, 621." + sig_bytes: [ed25519.SIGNATURE_SIZE]byte + msg_bytes := transmute([]byte)(msg) + start = time.tick_now() + for _ in 0 ..< DSA_ITERS { + ed25519.sign(&priv_key, msg_bytes, sig_bytes[:]) + } + sig = time.tick_since(start) / DSA_ITERS + + start = time.tick_now() + for _ in 0 ..< DSA_ITERS { + ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes[:]) + assert(ok, "signature should validate") + } + verif = time.tick_since(start) / DSA_ITERS + + return +} diff --git a/tests/benchmark/crypto/benchmark_hash.odin b/tests/benchmark/crypto/benchmark_hash.odin new file mode 100644 index 000000000..f9c560e6d --- /dev/null +++ b/tests/benchmark/crypto/benchmark_hash.odin @@ -0,0 +1,101 @@ +package benchmark_core_crypto + +import "base:runtime" +import "core:testing" +import "core:text/table" +import "core:time" + +import "core:crypto/hash" + +@(private = "file") +ITERS :: 10000 +@(private = "file") +SIZES := []int{64, 1024, 65536} + +@(test) +benchmark_crypto_hash :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + + tbl: table.Table + table.init(&tbl) + defer table.destroy(&tbl) + + table.caption(&tbl, "Hash") + table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Time", "Throughput") + + for algo, i in hash.Algorithm { + // Skip the sentinel value, and uncommon algorithms + #partial switch algo { + case .Invalid: + continue + case .Legacy_KECCAK_224, .Legacy_KECCAK_256, .Legacy_KECCAK_384, .Legacy_KECCAK_512: + // Skip: Legacy and not worth using over SHA3 + continue + case .Insecure_MD5, .Insecure_SHA1: + // Skip: Legacy and not worth using at all + continue + case .SHA224, .SHA384, .SHA3_224, .SHA3_384: + // Skip: Uncommon SHA2/SHA3 variants + continue + case .SM3: + // Skip: Liberty Prime is online. All systems nominal. + // Weapons hot. Mission: the destruction of any and + // all Chinese communists. + continue + } + if i > 1 { + table.row(&tbl) + } + + algo_name := hash.ALGORITHM_NAMES[algo] + + for sz, _ in SIZES { + options := &time.Benchmark_Options{ + rounds = ITERS, + bytes = sz, + setup = setup_sized_buf, + bench = do_bench_hash, + teardown = teardown_sized_buf, + } + tmp := algo + context.user_ptr = &tmp + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil) + + time_per_iter := options.duration / ITERS + table.aligned_row_of_values( + &tbl, + .Right, + algo_name, + table.format(&tbl, "%d", sz), + table.format(&tbl, "%8M", time_per_iter), + table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second), + ) + } + } + + log_table(&tbl) +} + +@(private = "file") +do_bench_hash :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + digest_: [hash.MAX_DIGEST_SIZE]byte + + buf := options.input + algo := (^hash.Algorithm)(context.user_ptr)^ + digest := digest_[:hash.DIGEST_SIZES[algo]] + + for _ in 0 ..= options.rounds { + hash.hash_bytes_to_buffer(algo, buf, digest) + } + options.count = options.rounds + options.processed = options.rounds * (options.bytes) + + return +} diff --git a/tests/benchmark/crypto/benchmark_mac.odin b/tests/benchmark/crypto/benchmark_mac.odin new file mode 100644 index 000000000..a0d2cae90 --- /dev/null +++ b/tests/benchmark/crypto/benchmark_mac.odin @@ -0,0 +1,191 @@ +package benchmark_core_crypto + +import "base:runtime" +import "core:testing" +import "core:text/table" +import "core:time" + +import "core:crypto/hmac" +import "core:crypto/kmac" +import "core:crypto/poly1305" + +@(private = "file") +ITERS :: 10000 +@(private = "file") +SIZES := []int{64, 1024, 65536} +@(private = "file") +KMAC_KEY_SIZES := []int{128, 256} + +@(test) +benchmark_crypto_mac :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + + tbl: table.Table + table.init(&tbl) + defer table.destroy(&tbl) + + table.caption(&tbl, "MAC") + table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Time", "Throughput") + + { + for sz, _ in SIZES { + options := &time.Benchmark_Options{ + rounds = ITERS, + bytes = sz, + setup = setup_sized_buf, + bench = do_bench_hmac_sha_256, + teardown = teardown_sized_buf, + } + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil) + + time_per_iter := options.duration / ITERS + table.aligned_row_of_values( + &tbl, + .Right, + "HMAC-SHA256", + table.format(&tbl, "%d", sz), + table.format(&tbl, "%8M", time_per_iter), + table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second), + ) + } + } + + table.row(&tbl) + + for key_sz, i in KMAC_KEY_SIZES { + if i > 0 { + table.row(&tbl) + } + + for sz, _ in SIZES { + options := &time.Benchmark_Options{ + rounds = ITERS, + bytes = sz, + processed = key_sz, // Pls ignore. + setup = setup_sized_buf, + bench = do_bench_kmac, + teardown = teardown_sized_buf, + } + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil) + + time_per_iter := options.duration / ITERS + table.aligned_row_of_values( + &tbl, + .Right, + table.format(&tbl, "KMAC%d", key_sz), + table.format(&tbl, "%d", sz), + table.format(&tbl, "%8M", time_per_iter), + table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second), + ) + } + } + + table.row(&tbl) + + { + for sz, _ in SIZES { + options := &time.Benchmark_Options{ + rounds = ITERS, + bytes = sz, + setup = setup_sized_buf, + bench = do_bench_poly1305, + teardown = teardown_sized_buf, + } + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil) + + time_per_iter := options.duration / ITERS + table.aligned_row_of_values( + &tbl, + .Right, + "poly1305", + table.format(&tbl, "%d", sz), + table.format(&tbl, "%8M", time_per_iter), + table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second), + ) + } + } + + log_table(&tbl) +} + +@(private = "file") +do_bench_hmac_sha_256 :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + buf := options.input + key := [32]byte { + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + } + + tag: [32]byte = --- + for _ in 0 ..= options.rounds { + hmac.sum(.SHA256, tag[:], buf, key[:]) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + + return +} + +@(private = "file") +do_bench_kmac :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + buf := options.input + key := [kmac.MIN_KEY_SIZE_256]byte { + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + } + sec_strength := options.processed + + tag: [32]byte = --- + for _ in 0 ..= options.rounds { + kmac.sum(sec_strength, tag[:sec_strength/8], buf, key[:], nil) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + + return +} + +@(private = "file") +do_bench_poly1305 :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + buf := options.input + key := [poly1305.KEY_SIZE]byte { + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + } + + tag: [poly1305.TAG_SIZE]byte = --- + for _ in 0 ..= options.rounds { + poly1305.sum(tag[:], buf, key[:]) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + + return +} diff --git a/tests/benchmark/crypto/benchmark_stream.odin b/tests/benchmark/crypto/benchmark_stream.odin new file mode 100644 index 000000000..38c5a87c6 --- /dev/null +++ b/tests/benchmark/crypto/benchmark_stream.odin @@ -0,0 +1,145 @@ +package benchmark_core_crypto + +import "base:runtime" +import "core:crypto" +import "core:testing" +import "core:text/table" +import "core:time" + +import "core:crypto/aes" +import "core:crypto/chacha20" + +@(private = "file") +ITERS :: 10000 +@(private = "file") +SIZES := []int{64, 1024, 65536} +@(private = "file") +AES_CTR_KEY_SIZES := []int{128, 192, 256} + +@(test) +benchmark_crypto_stream :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + + tbl: table.Table + table.init(&tbl) + defer table.destroy(&tbl) + + table.caption(&tbl, "Stream Cipher") + table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Time", "Throughput") + + for key_sz, i in AES_CTR_KEY_SIZES { + if i > 0 { + table.row(&tbl) + } + + key := make([]byte, key_sz/8, context.temp_allocator) + iv := make([]byte, aes.CTR_IV_SIZE, context.temp_allocator) + crypto.rand_bytes(key) + crypto.rand_bytes(iv) + + ctx: aes.Context_CTR + aes.init_ctr(&ctx, key, iv) + + for sz, _ in SIZES { + options := &time.Benchmark_Options{ + rounds = ITERS, + bytes = sz, + setup = setup_sized_buf, + bench = do_bench_aes_ctr, + teardown = teardown_sized_buf, + } + context.user_ptr = &ctx + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil) + + time_per_iter := options.duration / ITERS + table.aligned_row_of_values( + &tbl, + .Right, + table.format(&tbl, "AES%d-CTR", key_sz), + table.format(&tbl, "%d", sz), + table.format(&tbl, "%8M", time_per_iter), + table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second), + ) + } + } + + table.row(&tbl) + + { + key := make([]byte, chacha20.KEY_SIZE, context.temp_allocator) + iv := make([]byte, chacha20.IV_SIZE, context.temp_allocator) + crypto.rand_bytes(key) + crypto.rand_bytes(iv) + + ctx: chacha20.Context + chacha20.init(&ctx, key, iv) + + for sz, _ in SIZES { + options := &time.Benchmark_Options{ + rounds = ITERS, + bytes = sz, + setup = setup_sized_buf, + bench = do_bench_chacha20, + teardown = teardown_sized_buf, + } + context.user_ptr = &ctx + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil) + + time_per_iter := options.duration / ITERS + table.aligned_row_of_values( + &tbl, + .Right, + "chacha20", + table.format(&tbl, "%d", sz), + table.format(&tbl, "%8M", time_per_iter), + table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second), + ) + } + } + + log_table(&tbl) +} + +@(private = "file") +do_bench_aes_ctr :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + ctx := (^aes.Context_CTR)(context.user_ptr) + + buf := options.input + + for _ in 0 ..= options.rounds { + aes.xor_bytes_ctr(ctx, buf, buf) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + + return +} + +@(private = "file") +do_bench_chacha20 :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + ctx := (^chacha20.Context)(context.user_ptr) + + buf := options.input + + for _ in 0 ..= options.rounds { + chacha20.xor_bytes(ctx, buf, buf) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + + return +} diff --git a/tests/benchmark/crypto/benchmark_utils.odin b/tests/benchmark/crypto/benchmark_utils.odin new file mode 100644 index 000000000..6609adbf7 --- /dev/null +++ b/tests/benchmark/crypto/benchmark_utils.odin @@ -0,0 +1,50 @@ +package benchmark_core_crypto + +import "core:crypto" +import "core:fmt" +import "core:log" +import "core:strings" +import "core:text/table" +import "core:time" + +@(private) +log_table :: #force_inline proc(tbl: ^table.Table) { + sb := strings.builder_make() + defer strings.builder_destroy(&sb) + + wr := strings.to_writer(&sb) + + fmt.sbprintln(&sb) + table.write_plain_table(wr, tbl) + + log.info(strings.to_string(sb)) +} + +@(private) +setup_sized_buf :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + assert(options != nil) + + options.input = make([]u8, options.bytes, allocator) + if len(options.input) > 0 { + crypto.rand_bytes(options.input) + } + return nil if len(options.input) == options.bytes else .Allocation_Error +} + +@(private) +teardown_sized_buf :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + assert(options != nil) + + delete(options.input) + return nil +} diff --git a/tests/core/crypto/test_core_crypto_aead.odin b/tests/core/crypto/test_core_crypto_aead.odin index 90eedc0b2..961311cd6 100644 --- a/tests/core/crypto/test_core_crypto_aead.odin +++ b/tests/core/crypto/test_core_crypto_aead.odin @@ -1,7 +1,10 @@ package test_core_crypto import "base:runtime" +import "core:crypto/aes" +import "core:crypto/aegis" import "core:crypto/aead" +import "core:crypto/deoxysii" import "core:encoding/hex" import "core:testing" @@ -17,6 +20,14 @@ test_aead :: proc(t: ^testing.T) { for impl in supported_chacha_impls() { append(&chacha_impls, impl) } + aegis_impls := make([dynamic]aead.Implementation, context.temp_allocator) + for impl in supported_aegis_impls() { + append(&aegis_impls, impl) + } + deoxysii_impls := make([dynamic]aead.Implementation, context.temp_allocator) + for impl in supported_deoxysii_impls() { + append(&deoxysii_impls, impl) + } impls := [aead.Algorithm][dynamic]aead.Implementation{ .Invalid = nil, .AES_GCM_128 = aes_impls, @@ -24,6 +35,11 @@ test_aead :: proc(t: ^testing.T) { .AES_GCM_256 = aes_impls, .CHACHA20POLY1305 = chacha_impls, .XCHACHA20POLY1305 = chacha_impls, + .AEGIS_128L = aegis_impls, + .AEGIS_128L_256 = aegis_impls, + .AEGIS_256 = aegis_impls, + .AEGIS_256_256 = aegis_impls, + .DEOXYS_II_256 = deoxysii_impls, } test_vectors := []struct{ @@ -224,6 +240,263 @@ test_aead :: proc(t: ^testing.T) { "bd6d179d3e83d43b9576579493c0e939572a1700252bfaccbed2902c21396cbb731c7f1b0b4aa6440bf3a82f4eda7e39ae64c6708c54c216cb96b72e1213b4522f8c9ba40db5d945b11b69b982c1bb9e3f3fac2bc369488f76b2383565d3fff921f9664c97637da9768812f615c68b13b52e", "c0875924c1c7987947deafd8780acf49", }, + // AEGIS-128L + // https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-11.txt + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "", + "00000000000000000000000000000000", + "c1c0e58bd913006feba00f4b3cc3594e", + "abe0ece80c24868a226a35d16bdae37a", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "", + "00000000000000000000000000000000", + "c1c0e58bd913006feba00f4b3cc3594e", + "25835bfbb21632176cf03840687cb968cace4617af1bd0f7d064c639a5c79ee4", + }, + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "", + "", + "", + "c2b879a67def9d74e6c14f708bbcc9b4", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "", + "", + "", + "1360dc9db8ae42455f6e5b6a9d488ea4f2184c4e12120249335c4ee84bafe25d", + }, + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "79d94593d8c2119d7e8fd9b8fc77845c5c077a05b2528b6ac54b563aed8efe84", + "cc6f3372f6aa1bb82388d695c3962d9a", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "79d94593d8c2119d7e8fd9b8fc77845c5c077a05b2528b6ac54b563aed8efe84", + "022cb796fe7e0ae1197525ff67e309484cfbab6528ddef89f17d74ef8ecd82b3", + }, + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d", + "79d94593d8c2119d7e8fd9b8fc77", + "5c04b3dba849b2701effbe32c7f0fab7", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d", + "79d94593d8c2119d7e8fd9b8fc77", + "86f1b80bfb463aba711d15405d094baf4a55a15dbfec81a76f35ed0b9c8b04ac", + }, + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829", + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637", + "b31052ad1cca4e291abcf2df3502e6bdb1bfd6db36798be3607b1f94d34478aa7ede7f7a990fec10", + "7542a745733014f9474417b337399507", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829", + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637", + "b31052ad1cca4e291abcf2df3502e6bdb1bfd6db36798be3607b1f94d34478aa7ede7f7a990fec10", + "b91e2947a33da8bee89b6794e647baf0fc835ff574aca3fc27c33be0db2aff98", + }, + // AEGIS-256 + // https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-11.txt + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "", + "00000000000000000000000000000000", + "754fc3d8c973246dcc6d741412a4b236", + "3fe91994768b332ed7f570a19ec5896e", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "", + "00000000000000000000000000000000", + "754fc3d8c973246dcc6d741412a4b236", + "1181a1d18091082bf0266f66297d167d2e68b845f61a3b0527d31fc7b7b89f13", + }, + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "", + "", + "", + "e3def978a0f054afd1e761d7553afba3", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "", + "", + "", + "6a348c930adbd654896e1666aad67de989ea75ebaa2b82fb588977b1ffec864a", + }, + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "f373079ed84b2709faee373584585d60accd191db310ef5d8b11833df9dec711", + "8d86f91ee606e9ff26a01b64ccbdd91d", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "f373079ed84b2709faee373584585d60accd191db310ef5d8b11833df9dec711", + "b7d28d0c3c0ebd409fd22b44160503073a547412da0854bfb9723020dab8da1a", + }, + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d", + "f373079ed84b2709faee37358458", + "c60b9c2d33ceb058f96e6dd03c215652", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d", + "f373079ed84b2709faee37358458", + "8c1cc703c81281bee3f6d9966e14948b4a175b2efbdc31e61a98b4465235c2d9", + }, + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829", + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637", + "57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67", + "ab8a7d53fd0e98d727accca94925e128", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829", + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637", + "57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67", + "a3aca270c006094d71c20e6910b5161c0826df233d08919a566ec2c05990f734", + }, + // Deoxys-II-256 + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "", + "", + "", + "2b97bd77712f0cde975309959dfe1d7c", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "", + "", + "54708ae5565a71f147bdb94d7ba3aed7", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "f495c9c03d29989695d98ff5d430650125805c1e0576d06f26cbda42b1f82238b8", + "", + "", + "3277689dc4208cc1ff59d15434a1baf1", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "9da20db1c2781f6669257d87e2a4d9be1970f7581bef2c995e1149331e5e8cc1", + "92ce3aec3a4b72ff9eab71c2a93492fa", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "", + "15cd77732f9d0c4c6e581ef400876ad9188c5b8850ebd38224da95d7cdc99f7acc", + "e5ffd2abc5b459a73667756eda6443ede86c0883fc51dd75d22bb14992c684618c", + "5fa78d57308f19d0252072ee39df5ecc", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "000102030405060708090a0b0c0d0e0f", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "109f8a168b36dfade02628a9e129d5257f03cc7912aefa79729b67b186a2b08f", + "6549f9bf10acba0a451dbb2484a60d90", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "000102030405060708090a0b0c0d0e0f10", + "422857fb165af0a35c03199fb895604dca9cea6d788954962c419e0d5c225c0327", + "7d772203fa38be296d8d20d805163130c69aba8cb16ed845c2296c61a8f34b394e", + "0b3f10e3933c78190b24b33008bf80e9", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "3290bb8441279dc6083a43e9048c3dc08966ab30d7a6b35759e7a13339f124918f3b5ab1affa65e6c0e3680eb33a6ec82424ab1ce5a40b8654e13d845c29b13896a1466a75fc875acba4527ded37ed00c600a357c9a6e586c74cf3d85cd3258c813218f319d12b82480e5124ff19ec00bda1fbb8bd25eeb3de9fcbf3296deba250caf7e9f4ef0be1918e24221dd0be888c59c166ad761d7b58462a1b1d44b04265b45827172c133dd5b6c870b9af7b21368d12a88f4efa1751047543d584382d9ec22e7550d50ecddba27d1f65453f1f3398de54ee8c1f4ac8e16f5523d89641e99a632380af0f0b1e6b0e192ec29bf1d8714978ff9fbfb93604142393e9a82c3aaebbbe15e3b4e5cfd18bdfe309315c9f9f830deebe2edcdc24f8eca90fda49f6646e789c5041fb5be933fa843278e95f3a54f8eb41f14777ea949d5ea442b01249e64816151a325769e264ed4acd5c3f21700ca755d5bc0c2c5f9453419510bc74f2d71621dcecb9efc9c24791b4bb560fb70a8231521d6560af89d8d50144d9c080863f043781153bcd59030e60bd17a6d7aa083211b67b581fa4f74cce4d030d1e8f9429fd725c110040d41eb6989ffb1595c72cbe3c9b78a8ab80d71a6a5283da77b89cae295bb13c14fbe466b617f4da8ad60b085e2ea153f6713ae0046aa31e0ba44e43ef36a111bf05c073a4e3624cd35f63a546f9142b35aa81b8826d", + "83dab23b1379e090755c99079cfe918cb737e989f2d720ccaff493a744927644fec3653211fa75306a83486e5c34ecfe63870c97251a73e4b9033ae374809711b211ed5d293a592e466a81170f1d85750b5ca025ccd4579947edbae9ec132bfb1a7233ad79fae30006a6699f143893861b975226ed9d3cfb8a240be232fbf4e83755d59d20bc2faa2ea5e5b0428427485cca5e76a89fe32bdd59ab4177ad7cb1899c101e3c4f7535129591390ebdf30140846078b13867bbb2efd6cf434afe356eb18d716b21fd664c26c908496534bf2cde6d6b897799016594fb6d9f830ae5f44ccec26d42ff0d1a21b80cdbe8c8c170a5f766fad884abcc781b5b8ebc0f559bfeaa4557b04d977d51411a7f47bf437d0280cf9f92bc4f9cd6226337a492320851955adae2cafea22a89c3132dd252e4728328eda05555dff3241404341b8aa502d45c456113af42a8e91a85e4b4e9555028982ec3d144722af0eb04a6d3b8127c3040629de53f5fd187048198e8f8e8cc857afcbae45c693fec12fc2149d5e7587d0121b1717d0147f6979f75e8f085293f705c3399a6cc8df7057bf481e6c374edf0a0af7479f858045357b7fe21021c3fabdaf012652bf2e5db257bd9490ce637a81477bd3f9814a2198fdb9afa9344321f2393798670e588c47a1924d592cda3eb5a96754dfd92d87ee1ffa9d4ee586c85d7518c5d2db57d0451c33de0", + "88294fcef65a1bdfd7baaa472816c64ef5bef2622b88c1ec5a739396157ef4935f3aa76449e391c32da28ee2857f399ac3dd95aed30cfb26cc0063cd4cd8f7431108176fbf370123856662b000a8348e5925fbb97c9ec0c737758330a7983f06b51590c1d2f5e5faaf0eb58e34e19e5fc85cec03d3926dd46a79ba7026e83dec24e07484c9103dd0cdb0edb505500caca5e1d5dbc71348cf00648821488ebaab7f9d84bbbf91b3c521dbef30110e7bd94f8dad5ab8e0cc5411ca9682d210d5d80c0c4bdbba8181789a4273d6deb80899fdcd976ca6f3a9770b54305f586a04256cfbeb4c11254e88559f294db3b9a94b80ab9f9a02cb4c0748de0af7818685521691dba5738be546dba13a56016fb8635af9dff50f25d1b17ad21707db2640a76a741e65e559b2afaaec0f37e18436bf02008f84dbd7b2698687a22376b65dc7524fca8a28709eee3f3caee3b28ed1173d1e08ee849e2ca63d2c90d555755c8fbafd5d2f4b37f06a1dbd6852ee2ffcfe79d510152e98fc4f3094f740a4aede9ee378b606d34576776bf5f1269f5385a84b3928433bfca177550ccfcd22cd0331bbc595e38c2758b2662476fa66354c4e84c7b360405aa3f5b2a48621bdca1a90c69b21789c91b5b8c568e3c741d99e22f6d7e26f2abed045f1d578b782ab4a5cf2af636d842b3012e180e4b045d8d15b057b69c92398a517053daf9be7c2935e", + "a616f0c218e18b526cf2a3f8c115e262", + }, } for v, _ in test_vectors { algo_name := aead.ALGORITHM_NAMES[v.algo] @@ -337,3 +610,23 @@ test_aead :: proc(t: ^testing.T) { } } } + +supported_aegis_impls :: proc() -> [dynamic]aes.Implementation { + impls := make([dynamic]aes.Implementation, 0, 2, context.temp_allocator) + append(&impls, aes.Implementation.Portable) + if aegis.is_hardware_accelerated() { + append(&impls, aes.Implementation.Hardware) + } + + return impls +} + +supported_deoxysii_impls :: proc() -> [dynamic]aes.Implementation { + impls := make([dynamic]aes.Implementation, 0, 2, context.temp_allocator) + append(&impls, aes.Implementation.Portable) + if deoxysii.is_hardware_accelerated() { + append(&impls, aes.Implementation.Hardware) + } + + return impls +} diff --git a/tests/core/crypto/test_core_crypto_ecc25519.odin b/tests/core/crypto/test_core_crypto_edwards.odin similarity index 91% rename from tests/core/crypto/test_core_crypto_ecc25519.odin rename to tests/core/crypto/test_core_crypto_edwards.odin index fec4fa38e..61933c00f 100644 --- a/tests/core/crypto/test_core_crypto_ecc25519.odin +++ b/tests/core/crypto/test_core_crypto_edwards.odin @@ -7,6 +7,7 @@ import field "core:crypto/_fiat/field_curve25519" import "core:crypto/ed25519" import "core:crypto/ristretto255" import "core:crypto/x25519" +import "core:crypto/x448" @(test) test_sqrt_ratio_m1 :: proc(t: ^testing.T) { @@ -684,6 +685,68 @@ test_x25519 :: proc(t: ^testing.T) { } } +@(test) +test_x448 :: proc(t: ^testing.T) { + // Local copy of this so that the base point doesn't need to be exported. + _BASE_POINT: [56]byte = { + 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + + test_vectors := []struct { + scalar: string, + point: string, + product: string, + } { + // Test vectors from RFC 7748 + { + "3d262fddf9ec8e88495266fea19a34d28882acef045104d0d1aae121700a779c984c24f8cdd78fbff44943eba368f54b29259a4f1c600ad3", + "06fce640fa3487bfda5f6cf2d5263f8aad88334cbd07437f020f08f9814dc031ddbdc38c19c6da2583fa5429db94ada18aa7a7fb4ef8a086", + "ce3e4ff95a60dc6697da1db1d85e6afbdf79b50a2412d7546d5f239fe14fbaadeb445fc66a01b0779d98223961111e21766282f73dd96b6f", + }, + { + "203d494428b8399352665ddca42f9de8fef600908e0d461cb021f8c538345dd77c3e4806e25f46d3315c44e0a5b4371282dd2c8d5be3095f", + "0fbcc2f993cd56d3305b0b7d9e55d4c1a8fb5dbb52f8e9a1e9b6201b165d015894e56c4d3570bee52fe205e28a78b91cdfbde71ce8d157db", + "884a02576239ff7a2f2f63b2db6a9ff37047ac13568e1e30fe63c4a7ad1b3ee3a5700df34321d62077e63633c575c1c954514e99da7c179d", + }, + } + for v, _ in test_vectors { + scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator) + point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator) + + derived_point: [x448.POINT_SIZE]byte + x448.scalarmult(derived_point[:], scalar[:], point[:]) + derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator)) + + testing.expectf( + t, + derived_point_str == v.product, + "Expected %s for %s * %s, but got %s instead", + v.product, + v.scalar, + v.point, + derived_point_str, + ) + + // Abuse the test vectors to sanity-check the scalar-basepoint multiply. + p1, p2: [x448.POINT_SIZE]byte + x448.scalarmult_basepoint(p1[:], scalar[:]) + x448.scalarmult(p2[:], scalar[:], _BASE_POINT[:]) + p1_str := string(hex.encode(p1[:], context.temp_allocator)) + p2_str := string(hex.encode(p2[:], context.temp_allocator)) + testing.expectf( + t, + p1_str == p2_str, + "Expected %s for %s * basepoint, but got %s instead", + p2_str, + v.scalar, + p1_str, + ) + } +} + @(private) ge_str :: proc(ge: ^ristretto255.Group_Element) -> string { b: [ristretto255.ELEMENT_SIZE]byte diff --git a/tests/core/os/os2/dir.odin b/tests/core/os/os2/dir.odin index 7077e9ae2..8ef333219 100644 --- a/tests/core/os/os2/dir.odin +++ b/tests/core/os/os2/dir.odin @@ -2,27 +2,27 @@ package tests_core_os_os2 import os "core:os/os2" import "core:log" -import "core:path/filepath" import "core:slice" import "core:testing" import "core:strings" @(test) test_read_dir :: proc(t: ^testing.T) { - path := filepath.join({#directory, "../dir"}) + path, err_join := os.join_path({#directory, "../dir"}, context.allocator) defer delete(path) - fis, err := os.read_all_directory_by_path(path, context.allocator) + fis, err_read := os.read_all_directory_by_path(path, context.allocator) defer os.file_info_slice_delete(fis, context.allocator) slice.sort_by_key(fis, proc(fi: os.File_Info) -> string { return fi.name }) - if err == .Unsupported { + if err_read == .Unsupported { log.warn("os2 directory functionality is unsupported, skipping test") return } - testing.expect_value(t, err, nil) + testing.expect_value(t, err_join, nil) + testing.expect_value(t, err_read, nil) testing.expect_value(t, len(fis), 2) testing.expect_value(t, fis[0].name, "b.txt") @@ -34,8 +34,9 @@ test_read_dir :: proc(t: ^testing.T) { @(test) test_walker :: proc(t: ^testing.T) { - path := filepath.join({#directory, "../dir"}) + path, err := os.join_path({#directory, "../dir"}, context.allocator) defer delete(path) + testing.expect_value(t, err, nil) w := os.walker_create(path) defer os.walker_destroy(&w) @@ -45,11 +46,12 @@ test_walker :: proc(t: ^testing.T) { @(test) test_walker_file :: proc(t: ^testing.T) { - path := filepath.join({#directory, "../dir"}) + path, err_join := os.join_path({#directory, "../dir"}, context.allocator) defer delete(path) + testing.expect_value(t, err_join, nil) - f, err := os.open(path) - testing.expect_value(t, err, nil) + f, err_open := os.open(path) + testing.expect_value(t, err_open, nil) defer os.close(f) w := os.walker_create(f) @@ -64,10 +66,18 @@ test_walker_internal :: proc(t: ^testing.T, w: ^os.Walker) { path: string, } + joined_1, err_joined_1 := os.join_path({"dir", "b.txt"}, context.allocator) + joined_2, err_joined_2 := os.join_path({"dir", "sub"}, context.allocator) + joined_3, err_joined_3 := os.join_path({"dir", "sub", ".gitkeep"}, context.allocator) + + testing.expect_value(t, err_joined_1, nil) + testing.expect_value(t, err_joined_2, nil) + testing.expect_value(t, err_joined_3, nil) + expected := [?]Seen{ - {.Regular, filepath.join({"dir", "b.txt"})}, - {.Directory, filepath.join({"dir", "sub"})}, - {.Regular, filepath.join({"dir", "sub", ".gitkeep"})}, + {.Regular, joined_1}, + {.Directory, joined_2}, + {.Regular, joined_3}, } seen: [dynamic]Seen diff --git a/tests/core/os/os2/file.odin b/tests/core/os/os2/file.odin index c4df74f4a..0152a2008 100644 --- a/tests/core/os/os2/file.odin +++ b/tests/core/os/os2/file.odin @@ -2,11 +2,13 @@ package tests_core_os_os2 import os "core:os/os2" import "core:testing" -import "core:path/filepath" @(test) test_clone :: proc(t: ^testing.T) { - f, err := os.open(filepath.join({#directory, "file.odin"}, context.temp_allocator)) + joined, err := os.join_path({#directory, "file.odin"}, context.temp_allocator) + testing.expect_value(t, err, nil) + f: ^os.File + f, err = os.open(joined) testing.expect_value(t, err, nil) testing.expect(t, f != nil) diff --git a/tests/core/os/os2/path.odin b/tests/core/os/os2/path.odin index b91f43368..2cf1f1f1c 100644 --- a/tests/core/os/os2/path.odin +++ b/tests/core/os/os2/path.odin @@ -2,7 +2,6 @@ package tests_core_os_os2 import os "core:os/os2" import "core:log" -import "core:path/filepath" import "core:testing" import "core:strings" @@ -17,6 +16,351 @@ test_executable :: proc(t: ^testing.T) { testing.expect_value(t, err, nil) testing.expect(t, len(path) > 0) - testing.expect(t, filepath.is_abs(path)) - testing.expectf(t, strings.contains(path, filepath.base(os.args[0])), "expected the executable path to contain the base of os.args[0] which is %q", filepath.base(os.args[0])) + testing.expect(t, os.is_absolute_path(path)) + _, filename := os.split_path(os.args[0]) + testing.expectf(t, strings.contains(path, filename), "expected the executable path to contain the base of os.args[0] which is %q", filename) +} + +posix_to_dos_path :: proc(path: string) -> string { + if len(path) == 0 { + return path + } + path := path + path, _ = strings.replace_all(path, `/`, `\`, context.temp_allocator) + if path[0] == '\\' { + path = strings.concatenate({"C:", path}, context.temp_allocator) + } + return path +} + +@(test) +test_clean_path :: proc(t: ^testing.T) { + Test_Case :: struct{ + path: string, + expected: string, + } + + test_cases := [?]Test_Case { + {`../../foo/../../`, `../../..`}, + {`../../foo/..`, `../..`}, + {`../../foo`, `../../foo`}, + {`../..`, `../..`}, + {`.././foo`, `../foo`}, + {`..`, `..`}, + {`.`, `.`}, + {`.foo`, `.foo`}, + {`/../../foo/../../`, `/`}, + {`/../`, `/`}, + {`/..`, `/`}, + {`/`, `/`}, + {`//home/foo/bar/../../`, `/home`}, + {`/a/../..`, `/`}, + {`/a/../`, `/`}, + {`/a/あ`, `/a/あ`}, + {`/a/あ/..`, `/a`}, + {`/あ/a/..`, `/あ`}, + {`/あ/a/../あ`, `/あ/あ`}, + {`/home/../`, `/`}, + {`/home/..`, `/`}, + {`/home/foo/../../usr`, `/usr`}, + {`/home/foo/../..`, `/`}, + {`/home/foo/../`, `/home`}, + {``, `.`}, + {`a/..`, `.`}, + {`a`, `a`}, + {`abc//.//../foo`, `foo`}, + {`foo`, `foo`}, + {`home/foo/bar/../../`, `home`}, + } + + when ODIN_OS == .Windows { + for &tc in test_cases { + tc.path = posix_to_dos_path(tc.path) + tc.expected = posix_to_dos_path(tc.expected) + } + } + + for tc in test_cases { + joined, err := os.clean_path(tc.path, context.temp_allocator) + testing.expectf(t, joined == tc.expected && err == nil, "expected clean_path(%q) -> %q; got: %q, %v", tc.path, tc.expected, joined, err) + } +} + +@(test) +test_is_absolute_path :: proc(t: ^testing.T) { + when ODIN_OS == .Windows { + testing.expect(t, os.is_absolute_path(`C:\Windows`)) + } else { + testing.expect(t, os.is_absolute_path("/home")) + } + testing.expect(t, !os.is_absolute_path("home")) +} + +@(test) +test_get_relative_path :: proc(t: ^testing.T) { + Test_Case :: struct { + base, target: string, + expected: string, + } + + Fail_Case :: struct { + base, target: string, + } + + test_cases := [?]Test_Case { + {"", "foo", "foo"}, + {".", "foo", "foo"}, + {"/", "/", "."}, + {"/", "/home/alice/bert", "home/alice/bert"}, + {"/a", "/b", "../b"}, + {"/あ", "/あ/a", "a"}, + {"/a", "/a/あ", "あ"}, + {"/あ", "/い", "../い"}, + {"/a", "/usr", "../usr"}, + {"/home", "/", ".."}, + {"/home", "/home/alice/bert", "alice/bert"}, + {"/home/foo", "/", "../.."}, + {"/home/foo", "/home", ".."}, + {"/home/foo", "/home/alice/bert", "../alice/bert"}, + {"/home/foo", "/home/foo", "."}, + {"/home/foo", "/home/foo/bar", "bar"}, + {"/home/foo/bar", "/home", "../.."}, + {"/home/foo/bar", "/home/alice/bert", "../../alice/bert"}, + {"/home/foo/bar/bert", "/home/alice/bert", "../../../alice/bert"}, + {"/www", "/mount", "../mount"}, + {"foo", ".", ".."}, + {"foo", "bar", "../bar"}, + {"foo", "bar", "../bar"}, + {"foo", "../bar", "../../bar"}, + {"foo", "foo", "."}, + {"foo", "foo/bar", "bar"}, + {"home/foo/bar", "home/alice/bert", "../../alice/bert"}, + } + + fail_cases := [?]Fail_Case { + {"", "/home"}, + {"/home", ""}, + {"..", ""}, + } + + when ODIN_OS == .Windows { + for &tc in test_cases { + tc.base = posix_to_dos_path(tc.base) + tc.target = posix_to_dos_path(tc.target) + // Make one part all capitals to test case-insensitivity. + tc.target = strings.to_upper(tc.target, context.temp_allocator) + tc.expected = posix_to_dos_path(tc.expected) + } + for &tc in fail_cases { + tc.base = posix_to_dos_path(tc.base) + tc.target = posix_to_dos_path(tc.target) + } + } + + for tc in test_cases { + result, err := os.get_relative_path(tc.base, tc.target, context.temp_allocator) + joined, err2 := os.join_path({tc.base, result}, context.temp_allocator) + + when ODIN_OS == .Windows { + passed := strings.equal_fold(result, tc.expected) && err == nil + join_guaranteed := strings.equal_fold(joined, tc.target) && err2 == nil + } else { + passed := result == tc.expected && err == nil + join_guaranteed := joined == tc.target && err2 == nil + } + testing.expectf(t, passed, "expected get_relative_path(%q, %q) -> %q; got %q, %v", tc.base, tc.target, tc.expected, result, err) + testing.expectf(t, join_guaranteed, "join_path({{%q, %q}}) guarantee of get_relative_path(%q, %q) failed; got %q, %v instead", tc.base, result, tc.base, tc.target, joined, err2) + } + + for tc in fail_cases { + result, err := os.get_relative_path(tc.base, tc.target, context.temp_allocator) + testing.expectf(t, result == "" && err != nil, "expected get_relative_path(%q, %q) to fail, got %q, %v", tc.base, tc.target, result, err) + } +} + +@(test) +test_split_path :: proc(t: ^testing.T) { + Test_Case :: struct { + path: string, + dir, filename: string, + } + + test_cases := [?]Test_Case { + { "", "", "" }, + { "/", "/", "" }, + { "/a", "/", "a" }, + { "readme.txt", "", "readme.txt" }, + { "/readme.txt", "/", "readme.txt" }, + { "/var/readme.txt", "/var", "readme.txt" }, + { "/home/foo/bar.tar.gz", "/home/foo", "bar.tar.gz" }, + } + + when ODIN_OS == .Windows { + for &tc in test_cases { + tc.path = posix_to_dos_path(tc.path) + tc.dir = posix_to_dos_path(tc.dir) + tc.filename = posix_to_dos_path(tc.filename) + } + } + + for tc in test_cases { + dir, filename := os.split_path(tc.path) + testing.expectf(t, dir == tc.dir && filename == tc.filename, "expected split_path(%q) -> %q, %q; got: %q, %q", tc.path, tc.dir, tc.filename, dir, filename) + } +} + +@(test) +test_join_path :: proc(t: ^testing.T) { + Test_Case :: struct { + elems: []string, + expected: string, + } + + test_cases := [?]Test_Case { + { {"" }, "" }, + { {"/" }, "/" }, + { {"home" }, "home" }, + { {"home", "" }, "home" }, + { {"/home", "" }, "/home" }, + { {"", "home" }, "home" }, + { {"", "/home" }, "/home" }, + { {"", "/home", "", "foo" }, "/home/foo" }, + { {"", "home", "", "", "foo", "" }, "home/foo" }, + } + + when ODIN_OS == .Windows { + for &tc in test_cases { + for &elem in tc.elems { + elem = posix_to_dos_path(elem) + } + tc.expected = posix_to_dos_path(tc.expected) + } + } + + for tc in test_cases { + result, err := os.join_path(tc.elems, context.temp_allocator) + testing.expectf(t, result == tc.expected && err == nil, "expected join_path(%v) -> %q; got: %q, %v", tc.elems, tc.expected, result, err) + } +} + +@(test) +test_split_filename :: proc(t: ^testing.T) { + Test_Case :: struct { + filename: string, + base, ext: string, + } + + test_cases := [?]Test_Case { + {"", "", ""}, + {"a", "a", ""}, + {".", ".", ""}, + {".a", ".a", ""}, + {".foo", ".foo", ""}, + {".foo.txt", ".foo", "txt"}, + {"a.b", "a", "b"}, + {"foo", "foo", ""}, + {"readme.txt", "readme", "txt"}, + {"pkg.tar.gz", "pkg.tar", "gz"}, + // Assert API ignores directory hierarchies: + {"dir/FILE.TXT", "dir/FILE", "TXT"}, + } + + for tc in test_cases { + base, ext := os.split_filename(tc.filename) + testing.expectf(t, base == tc.base && ext == tc.ext, "expected split_filename(%q) -> %q, %q; got: %q, %q", tc.filename, tc.base, tc.ext, base, ext) + } +} + +@(test) +test_split_filename_all :: proc(t: ^testing.T) { + Test_Case :: struct { + filename: string, + base, ext: string, + } + + test_cases := [?]Test_Case { + {"", "", ""}, + {"a", "a", ""}, + {".", ".", ""}, + {".a", ".a", ""}, + {".foo", ".foo", ""}, + {".foo.txt", ".foo", "txt"}, + {"a.b", "a", "b"}, + {"foo", "foo", ""}, + {"readme.txt", "readme", "txt"}, + {"pkg.tar.gz", "pkg", "tar.gz"}, + // Assert API ignores directory hierarchies: + {"dir/FILE.TXT", "dir/FILE", "TXT"}, + } + + for tc in test_cases { + base, ext := os.split_filename_all(tc.filename) + testing.expectf(t, base == tc.base && ext == tc.ext, "expected split_filename_all(%q) -> %q, %q; got: %q, %q", tc.filename, tc.base, tc.ext, base, ext) + } +} + +@(test) +test_join_filename :: proc(t: ^testing.T) { + Test_Case :: struct { + base, ext: string, + expected: string, + } + + test_cases := [?]Test_Case { + {"", "", ""}, + {"", "foo", "foo"}, + {"foo", "", "foo"}, + {"readme", "txt", "readme.txt"}, + {"pkg.tar", "gz", "pkg.tar.gz"}, + {"pkg", "tar.gz", "pkg.tar.gz"}, + // Assert API ignores directory hierarchies: + {"dir/FILE", "TXT", "dir/FILE.TXT"}, + } + + for tc in test_cases { + result, err := os.join_filename(tc.base, tc.ext, context.temp_allocator) + testing.expectf(t, result == tc.expected && err == nil, "expected join_filename(%q, %q) -> %q; got: %q, %v", tc.base, tc.ext, tc.expected, result, err) + } +} + +@(test) +test_split_path_list :: proc(t: ^testing.T) { + Test_Case :: struct { + path_list: string, + expected: []string, + } + + when ODIN_OS != .Windows { + test_cases := [?]Test_Case { + {``, {}}, + {`/bin:`, {`/bin`, ``}}, + {`/usr/local/bin`, {`/usr/local/bin`}}, + {`/usr/local/bin:/usr/bin`, {`/usr/local/bin`, `/usr/bin`}}, + {`"/extra bin":/bin`, {`/extra bin`, `/bin`}}, + {`"/extra:bin":/bin`, {`/extra:bin`, `/bin`}}, + } + } else { + test_cases := [?]Test_Case { + {``, {}}, + {`C:\bin;`, {`C:\bin`, ``}}, + {`C:\usr\local\bin`, {`C:\usr\local\bin`}}, + {`C:\usr\local\bin;C:\usr\bin`, {`C:\usr\local\bin`, `C:\usr\bin`}}, + {`"C:\extra bin";C:\bin`, {`C:\extra bin`, `C:\bin`}}, + {`"C:\extra;bin";C:\bin`, {`C:\extra;bin`, `C:\bin`}}, + } + } + + for tc in test_cases { + result, err := os.split_path_list(tc.path_list, context.temp_allocator) + if testing.expectf(t, len(result) == len(tc.expected), "expected split_path_list(%q) -> %v; got %v, %v", tc.path_list, tc.expected, result, err) { + ok := true + for entry, i in result { + if entry != tc.expected[i] { + ok = false + break + } + } + testing.expectf(t, ok, "expected split_path_list(%q) -> %v; got %v, %v", tc.path_list, tc.expected, result, err) + } + } } diff --git a/vendor/sdl3/SDL3.dll b/vendor/sdl3/SDL3.dll index 6c9d438f9..3bbfd4be8 100644 Binary files a/vendor/sdl3/SDL3.dll and b/vendor/sdl3/SDL3.dll differ diff --git a/vendor/sdl3/SDL3.lib b/vendor/sdl3/SDL3.lib index aec791196..f736458fb 100644 Binary files a/vendor/sdl3/SDL3.lib and b/vendor/sdl3/SDL3.lib differ diff --git a/vendor/sdl3/include/SDL.h b/vendor/sdl3/include/SDL.h index 861c404f5..9d2168877 100644 --- a/vendor/sdl3/include/SDL.h +++ b/vendor/sdl3/include/SDL.h @@ -20,7 +20,7 @@ */ /** - * Main include header for the SDL library, version 3.2.0 + * Main include header for the SDL library, version 3.2.10 * * It is almost always best to include just this one header instead of * picking out individual headers included here. There are exceptions to diff --git a/vendor/sdl3/include/SDL_assert.h b/vendor/sdl3/include/SDL_assert.h index 09b3b478f..6c90acc02 100644 --- a/vendor/sdl3/include/SDL_assert.h +++ b/vendor/sdl3/include/SDL_assert.h @@ -149,6 +149,8 @@ extern "C" { #define SDL_TriggerBreakpoint() __asm__ __volatile__ ( "bkpt #22\n\t" ) #elif defined(_WIN32) && ((defined(__GNUC__) || defined(__clang__)) && (defined(__arm64__) || defined(__aarch64__)) ) #define SDL_TriggerBreakpoint() __asm__ __volatile__ ( "brk #0xF000\n\t" ) +#elif defined(__GNUC__) || defined(__clang__) + #define SDL_TriggerBreakpoint() __builtin_trap() /* older gcc may not support SDL_HAS_BUILTIN(__builtin_trap) above */ #elif defined(__386__) && defined(__WATCOMC__) #define SDL_TriggerBreakpoint() { _asm { int 0x03 } } #elif defined(HAVE_SIGNAL_H) && !defined(__WATCOMC__) diff --git a/vendor/sdl3/include/SDL_atomic.h b/vendor/sdl3/include/SDL_atomic.h index 03e3fb134..78b5e0fa5 100644 --- a/vendor/sdl3/include/SDL_atomic.h +++ b/vendor/sdl3/include/SDL_atomic.h @@ -498,7 +498,7 @@ extern SDL_DECLSPEC int SDLCALL SDL_AddAtomicInt(SDL_AtomicInt *a, int v); * * ***Note: If you don't know what this macro is for, you shouldn't use it!*** * - * \param a a pointer to an SDL_AtomicInt to increment. + * \param a a pointer to an SDL_AtomicInt to decrement. * \returns true if the variable reached zero after decrementing, false * otherwise. * diff --git a/vendor/sdl3/include/SDL_audio.h b/vendor/sdl3/include/SDL_audio.h index 956938215..c6acf885f 100644 --- a/vendor/sdl3/include/SDL_audio.h +++ b/vendor/sdl3/include/SDL_audio.h @@ -781,7 +781,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_IsAudioDevicePlayback(SDL_AudioDeviceID dev * Physical devices can not be paused or unpaused, only logical devices * created through SDL_OpenAudioDevice() can be. * - * \param dev a device opened by SDL_OpenAudioDevice(). + * \param devid a device opened by SDL_OpenAudioDevice(). * \returns true on success or false on failure; call SDL_GetError() for more * information. * @@ -792,7 +792,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_IsAudioDevicePlayback(SDL_AudioDeviceID dev * \sa SDL_ResumeAudioDevice * \sa SDL_AudioDevicePaused */ -extern SDL_DECLSPEC bool SDLCALL SDL_PauseAudioDevice(SDL_AudioDeviceID dev); +extern SDL_DECLSPEC bool SDLCALL SDL_PauseAudioDevice(SDL_AudioDeviceID devid); /** * Use this function to unpause audio playback on a specified device. @@ -809,7 +809,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_PauseAudioDevice(SDL_AudioDeviceID dev); * Physical devices can not be paused or unpaused, only logical devices * created through SDL_OpenAudioDevice() can be. * - * \param dev a device opened by SDL_OpenAudioDevice(). + * \param devid a device opened by SDL_OpenAudioDevice(). * \returns true on success or false on failure; call SDL_GetError() for more * information. * @@ -820,7 +820,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_PauseAudioDevice(SDL_AudioDeviceID dev); * \sa SDL_AudioDevicePaused * \sa SDL_PauseAudioDevice */ -extern SDL_DECLSPEC bool SDLCALL SDL_ResumeAudioDevice(SDL_AudioDeviceID dev); +extern SDL_DECLSPEC bool SDLCALL SDL_ResumeAudioDevice(SDL_AudioDeviceID devid); /** * Use this function to query if an audio device is paused. @@ -832,7 +832,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_ResumeAudioDevice(SDL_AudioDeviceID dev); * created through SDL_OpenAudioDevice() can be. Physical and invalid device * IDs will report themselves as unpaused here. * - * \param dev a device opened by SDL_OpenAudioDevice(). + * \param devid a device opened by SDL_OpenAudioDevice(). * \returns true if device is valid and paused, false otherwise. * * \threadsafety It is safe to call this function from any thread. @@ -842,7 +842,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_ResumeAudioDevice(SDL_AudioDeviceID dev); * \sa SDL_PauseAudioDevice * \sa SDL_ResumeAudioDevice */ -extern SDL_DECLSPEC bool SDLCALL SDL_AudioDevicePaused(SDL_AudioDeviceID dev); +extern SDL_DECLSPEC bool SDLCALL SDL_AudioDevicePaused(SDL_AudioDeviceID devid); /** * Get the gain of an audio device. @@ -1583,6 +1583,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_PauseAudioStreamDevice(SDL_AudioStream *str * previously been paused. Once unpaused, any bound audio streams will begin * to progress again, and audio can be generated. * + * Remember, SDL_OpenAudioDeviceStream opens device in a paused state, so this + * function call is required for audio playback to begin on such device. + * * \param stream the audio stream associated with the audio device to resume. * \returns true on success or false on failure; call SDL_GetError() for more * information. @@ -1714,7 +1717,7 @@ typedef void (SDLCALL *SDL_AudioStreamCallback)(void *userdata, SDL_AudioStream * audio to the stream during this call; if needed, the request that triggered * this callback will obtain the new data immediately. * - * The callback's `approx_request` argument is roughly how many bytes of + * The callback's `additional_amount` argument is roughly how many bytes of * _unconverted_ data (in the stream's input format) is needed by the caller, * although this may overestimate a little for safety. This takes into account * how much is already in the stream and only asks for any extra necessary to @@ -1759,13 +1762,13 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetAudioStreamGetCallback(SDL_AudioStream * * The callback can (optionally) call SDL_GetAudioStreamData() to obtain audio * from the stream during this call. * - * The callback's `approx_request` argument is how many bytes of _converted_ - * data (in the stream's output format) was provided by the caller, although - * this may underestimate a little for safety. This value might be less than - * what is currently available in the stream, if data was already there, and - * might be less than the caller provided if the stream needs to keep a buffer - * to aid in resampling. Which means the callback may be provided with zero - * bytes, and a different amount on each call. + * The callback's `additional_amount` argument is how many bytes of + * _converted_ data (in the stream's output format) was provided by the + * caller, although this may underestimate a little for safety. This value + * might be less than what is currently available in the stream, if data was + * already there, and might be less than the caller provided if the stream + * needs to keep a buffer to aid in resampling. Which means the callback may + * be provided with zero bytes, and a different amount on each call. * * The callback may call SDL_GetAudioStreamAvailable to see the total amount * currently available to read from the stream, instead of the total provided diff --git a/vendor/sdl3/include/SDL_camera.h b/vendor/sdl3/include/SDL_camera.h index c4d05962b..5f3911fdf 100644 --- a/vendor/sdl3/include/SDL_camera.h +++ b/vendor/sdl3/include/SDL_camera.h @@ -239,7 +239,7 @@ extern SDL_DECLSPEC SDL_CameraID * SDLCALL SDL_GetCameras(int *count); * there _is_ a camera until the user has given you permission to check * through a scary warning popup. * - * \param devid the camera device instance ID to query. + * \param instance_id the camera device instance ID. * \param count a pointer filled in with the number of elements in the list, * may be NULL. * \returns a NULL terminated array of pointers to SDL_CameraSpec or NULL on @@ -254,7 +254,7 @@ extern SDL_DECLSPEC SDL_CameraID * SDLCALL SDL_GetCameras(int *count); * \sa SDL_GetCameras * \sa SDL_OpenCamera */ -extern SDL_DECLSPEC SDL_CameraSpec ** SDLCALL SDL_GetCameraSupportedFormats(SDL_CameraID devid, int *count); +extern SDL_DECLSPEC SDL_CameraSpec ** SDLCALL SDL_GetCameraSupportedFormats(SDL_CameraID instance_id, int *count); /** * Get the human-readable device name for a camera. diff --git a/vendor/sdl3/include/SDL_dialog.h b/vendor/sdl3/include/SDL_dialog.h index 460038ff2..ddb9e24d5 100644 --- a/vendor/sdl3/include/SDL_dialog.h +++ b/vendor/sdl3/include/SDL_dialog.h @@ -84,8 +84,8 @@ typedef struct SDL_DialogFileFilter * - A pointer to NULL, the user either didn't choose any file or canceled the * dialog. * - A pointer to non-`NULL`, the user chose one or more files. The argument - * is a null-terminated list of pointers to C strings, each containing a - * path. + * is a null-terminated array of pointers to UTF-8 encoded strings, each + * containing a path. * * The filelist argument should not be freed; it will automatically be freed * when the callback returns. diff --git a/vendor/sdl3/include/SDL_events.h b/vendor/sdl3/include/SDL_events.h index 1323e9f0b..56a2194b3 100644 --- a/vendor/sdl3/include/SDL_events.h +++ b/vendor/sdl3/include/SDL_events.h @@ -132,7 +132,7 @@ typedef enum SDL_EventType /* Window events */ /* 0x200 was SDL_WINDOWEVENT, reserve the number for sdl2-compat */ - /* 0x201 was SDL_EVENT_SYSWM, reserve the number for sdl2-compat */ + /* 0x201 was SDL_SYSWMEVENT, reserve the number for sdl2-compat */ SDL_EVENT_WINDOW_SHOWN = 0x202, /**< Window has been shown */ SDL_EVENT_WINDOW_HIDDEN, /**< Window has been hidden */ SDL_EVENT_WINDOW_EXPOSED, /**< Window has been exposed and should be redrawn, and can be redrawn directly from event watchers for this event */ @@ -1108,7 +1108,7 @@ typedef enum SDL_EventAction * \param numevents if action is SDL_ADDEVENT, the number of events to add * back to the event queue; if action is SDL_PEEKEVENT or * SDL_GETEVENT, the maximum number of events to retrieve. - * \param action action to take; see [[#action|Remarks]] for details. + * \param action action to take; see [Remarks](#remarks) for details. * \param minType minimum value of the event type to be considered; * SDL_EVENT_FIRST is a safe choice. * \param maxType maximum value of the event type to be considered; diff --git a/vendor/sdl3/include/SDL_gamepad.h b/vendor/sdl3/include/SDL_gamepad.h index 264f763b2..99f8b6593 100644 --- a/vendor/sdl3/include/SDL_gamepad.h +++ b/vendor/sdl3/include/SDL_gamepad.h @@ -29,7 +29,7 @@ * "joysticks" now are actually console-style gamepads. So SDL provides the * gamepad API on top of the lower-level joystick functionality. * - * The difference betweena joystick and a gamepad is that a gamepad tells you + * The difference between a joystick and a gamepad is that a gamepad tells you * _where_ a button or axis is on the device. You don't speak to gamepads in * terms of arbitrary numbers like "button 3" or "axis 2" but in standard * locations: the d-pad, the shoulder buttons, triggers, A/B/X/Y (or diff --git a/vendor/sdl3/include/SDL_gpu.h b/vendor/sdl3/include/SDL_gpu.h index fa870a579..9f516d73f 100644 --- a/vendor/sdl3/include/SDL_gpu.h +++ b/vendor/sdl3/include/SDL_gpu.h @@ -35,13 +35,14 @@ * can render offscreen entirely, perhaps for image processing, and not use a * window at all. * - * Next the app prepares static data (things that are created once and used + * Next, the app prepares static data (things that are created once and used * over and over). For example: * * - Shaders (programs that run on the GPU): use SDL_CreateGPUShader(). - * - Vertex buffers (arrays of geometry data) and other data rendering will - * need: use SDL_UploadToGPUBuffer(). - * - Textures (images): use SDL_UploadToGPUTexture(). + * - Vertex buffers (arrays of geometry data) and other rendering data: use + * SDL_CreateGPUBuffer() and SDL_UploadToGPUBuffer(). + * - Textures (images): use SDL_CreateGPUTexture() and + * SDL_UploadToGPUTexture(). * - Samplers (how textures should be read from): use SDL_CreateGPUSampler(). * - Render pipelines (precalculated rendering state): use * SDL_CreateGPUGraphicsPipeline() @@ -130,7 +131,8 @@ * It is optimal for apps to pre-compile the shader formats they might use, * but for ease of use SDL provides a separate project, * [SDL_shadercross](https://github.com/libsdl-org/SDL_shadercross) - * , for performing runtime shader cross-compilation. + * , for performing runtime shader cross-compilation. It also has a CLI + * interface for offline precompilation as well. * * This is an extremely quick overview that leaves out several important * details. Already, though, one can see that GPU programming can be quite @@ -888,6 +890,10 @@ typedef enum SDL_GPUCubeMapFace * Unlike textures, READ | WRITE can be used for simultaneous read-write * usage. The same data synchronization concerns as textures apply. * + * If you use a STORAGE flag, the data in the buffer must respect std140 + * layout conventions. In practical terms this means you must ensure that vec3 + * and vec4 fields are 16-byte aligned. + * * \since This datatype is available since SDL 3.2.0. * * \sa SDL_CreateGPUBuffer @@ -1361,6 +1367,7 @@ typedef struct SDL_GPUTextureLocation * * \sa SDL_UploadToGPUTexture * \sa SDL_DownloadFromGPUTexture + * \sa SDL_CreateGPUTexture */ typedef struct SDL_GPUTextureRegion { @@ -1489,9 +1496,16 @@ typedef struct SDL_GPUIndirectDispatchCommand /** * A structure specifying the parameters of a sampler. * + * Note that mip_lod_bias is a no-op for the Metal driver. For Metal, LOD bias + * must be applied via shader instead. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_CreateGPUSampler + * \sa SDL_GPUFilter + * \sa SDL_GPUSamplerMipmapMode + * \sa SDL_GPUSamplerAddressMode + * \sa SDL_GPUCompareOp */ typedef struct SDL_GPUSamplerCreateInfo { @@ -1530,14 +1544,14 @@ typedef struct SDL_GPUSamplerCreateInfo * \since This struct is available since SDL 3.2.0. * * \sa SDL_GPUVertexAttribute - * \sa SDL_GPUVertexInputState + * \sa SDL_GPUVertexInputRate */ typedef struct SDL_GPUVertexBufferDescription { Uint32 slot; /**< The binding slot of the vertex buffer. */ Uint32 pitch; /**< The byte pitch between consecutive elements of the vertex buffer. */ SDL_GPUVertexInputRate input_rate; /**< Whether attribute addressing is a function of the vertex index or instance index. */ - Uint32 instance_step_rate; /**< The number of instances to draw using the same per-instance data before advancing in the instance buffer by one element. Ignored unless input_rate is SDL_GPU_VERTEXINPUTRATE_INSTANCE */ + Uint32 instance_step_rate; /**< Reserved for future use. Must be set to 0. */ } SDL_GPUVertexBufferDescription; /** @@ -1550,6 +1564,7 @@ typedef struct SDL_GPUVertexBufferDescription * * \sa SDL_GPUVertexBufferDescription * \sa SDL_GPUVertexInputState + * \sa SDL_GPUVertexElementFormat */ typedef struct SDL_GPUVertexAttribute { @@ -1706,10 +1721,13 @@ typedef struct SDL_GPUTransferBufferCreateInfo * A structure specifying the parameters of the graphics pipeline rasterizer * state. * - * NOTE: Some backend APIs (D3D11/12) will enable depth clamping even if - * enable_depth_clip is true. If you rely on this clamp+clip behavior, - * consider enabling depth clip and then manually clamping depth in your - * fragment shaders on Metal and Vulkan. + * Note that SDL_GPU_FILLMODE_LINE is not supported on many Android devices. + * For those devices, the fill mode will automatically fall back to FILL. + * + * Also note that the D3D12 driver will enable depth clamping even if + * enable_depth_clip is true. If you need this clamp+clip behavior, consider + * enabling depth clip and then manually clamping depth in your fragment + * shaders on Metal and Vulkan. * * \since This struct is available since SDL 3.2.0. * @@ -1740,8 +1758,8 @@ typedef struct SDL_GPURasterizerState typedef struct SDL_GPUMultisampleState { SDL_GPUSampleCount sample_count; /**< The number of samples to be used in rasterization. */ - Uint32 sample_mask; /**< Determines which samples get updated in the render targets. Treated as 0xFFFFFFFF if enable_mask is false. */ - bool enable_mask; /**< Enables sample masking. */ + Uint32 sample_mask; /**< Reserved for future use. Must be set to 0. */ + bool enable_mask; /**< Reserved for future use. Must be set to false. */ Uint8 padding1; Uint8 padding2; Uint8 padding3; @@ -1791,6 +1809,8 @@ typedef struct SDL_GPUColorTargetDescription * \since This struct is available since SDL 3.2.0. * * \sa SDL_GPUGraphicsPipelineCreateInfo + * \sa SDL_GPUColorTargetDescription + * \sa SDL_GPUTextureFormat */ typedef struct SDL_GPUGraphicsPipelineTargetInfo { @@ -1809,6 +1829,7 @@ typedef struct SDL_GPUGraphicsPipelineTargetInfo * \since This struct is available since SDL 3.2.0. * * \sa SDL_CreateGPUGraphicsPipeline + * \sa SDL_GPUShader * \sa SDL_GPUVertexInputState * \sa SDL_GPUPrimitiveType * \sa SDL_GPURasterizerState @@ -1836,6 +1857,7 @@ typedef struct SDL_GPUGraphicsPipelineCreateInfo * \since This struct is available since SDL 3.2.0. * * \sa SDL_CreateGPUComputePipeline + * \sa SDL_GPUShaderFormat */ typedef struct SDL_GPUComputePipelineCreateInfo { @@ -2104,7 +2126,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GPUSupportsProperties( * \sa SDL_DestroyGPUDevice * \sa SDL_GPUSupportsShaderFormats */ -extern SDL_DECLSPEC SDL_GPUDevice *SDLCALL SDL_CreateGPUDevice( +extern SDL_DECLSPEC SDL_GPUDevice * SDLCALL SDL_CreateGPUDevice( SDL_GPUShaderFormat format_flags, bool debug_mode, const char *name); @@ -2152,7 +2174,7 @@ extern SDL_DECLSPEC SDL_GPUDevice *SDLCALL SDL_CreateGPUDevice( * \sa SDL_DestroyGPUDevice * \sa SDL_GPUSupportsProperties */ -extern SDL_DECLSPEC SDL_GPUDevice *SDLCALL SDL_CreateGPUDeviceWithProperties( +extern SDL_DECLSPEC SDL_GPUDevice * SDLCALL SDL_CreateGPUDeviceWithProperties( SDL_PropertiesID props); #define SDL_PROP_GPU_DEVICE_CREATE_DEBUGMODE_BOOLEAN "SDL.gpu.device.create.debugmode" @@ -2275,7 +2297,7 @@ extern SDL_DECLSPEC SDL_GPUShaderFormat SDLCALL SDL_GetGPUShaderFormats(SDL_GPUD * \sa SDL_BindGPUComputePipeline * \sa SDL_ReleaseGPUComputePipeline */ -extern SDL_DECLSPEC SDL_GPUComputePipeline *SDLCALL SDL_CreateGPUComputePipeline( +extern SDL_DECLSPEC SDL_GPUComputePipeline * SDLCALL SDL_CreateGPUComputePipeline( SDL_GPUDevice *device, const SDL_GPUComputePipelineCreateInfo *createinfo); @@ -2302,7 +2324,7 @@ extern SDL_DECLSPEC SDL_GPUComputePipeline *SDLCALL SDL_CreateGPUComputePipeline * \sa SDL_BindGPUGraphicsPipeline * \sa SDL_ReleaseGPUGraphicsPipeline */ -extern SDL_DECLSPEC SDL_GPUGraphicsPipeline *SDLCALL SDL_CreateGPUGraphicsPipeline( +extern SDL_DECLSPEC SDL_GPUGraphicsPipeline * SDLCALL SDL_CreateGPUGraphicsPipeline( SDL_GPUDevice *device, const SDL_GPUGraphicsPipelineCreateInfo *createinfo); @@ -2329,7 +2351,7 @@ extern SDL_DECLSPEC SDL_GPUGraphicsPipeline *SDLCALL SDL_CreateGPUGraphicsPipeli * \sa SDL_BindGPUFragmentSamplers * \sa SDL_ReleaseGPUSampler */ -extern SDL_DECLSPEC SDL_GPUSampler *SDLCALL SDL_CreateGPUSampler( +extern SDL_DECLSPEC SDL_GPUSampler * SDLCALL SDL_CreateGPUSampler( SDL_GPUDevice *device, const SDL_GPUSamplerCreateInfo *createinfo); @@ -2408,7 +2430,7 @@ extern SDL_DECLSPEC SDL_GPUSampler *SDLCALL SDL_CreateGPUSampler( * \sa SDL_CreateGPUGraphicsPipeline * \sa SDL_ReleaseGPUShader */ -extern SDL_DECLSPEC SDL_GPUShader *SDLCALL SDL_CreateGPUShader( +extern SDL_DECLSPEC SDL_GPUShader * SDLCALL SDL_CreateGPUShader( SDL_GPUDevice *device, const SDL_GPUShaderCreateInfo *createinfo); @@ -2469,7 +2491,7 @@ extern SDL_DECLSPEC SDL_GPUShader *SDLCALL SDL_CreateGPUShader( * \sa SDL_ReleaseGPUTexture * \sa SDL_GPUTextureSupportsFormat */ -extern SDL_DECLSPEC SDL_GPUTexture *SDLCALL SDL_CreateGPUTexture( +extern SDL_DECLSPEC SDL_GPUTexture * SDLCALL SDL_CreateGPUTexture( SDL_GPUDevice *device, const SDL_GPUTextureCreateInfo *createinfo); @@ -2490,6 +2512,10 @@ extern SDL_DECLSPEC SDL_GPUTexture *SDLCALL SDL_CreateGPUTexture( * Note that certain combinations of usage flags are invalid. For example, a * buffer cannot have both the VERTEX and INDEX flags. * + * If you use a STORAGE flag, the data in the buffer must respect std140 + * layout conventions. In practical terms this means you must ensure that vec3 + * and vec4 fields are 16-byte aligned. + * * For better understanding of underlying concepts and memory management with * SDL GPU API, you may refer * [this blog post](https://moonside.games/posts/sdl-gpu-concepts-cycling/) @@ -2521,7 +2547,7 @@ extern SDL_DECLSPEC SDL_GPUTexture *SDLCALL SDL_CreateGPUTexture( * \sa SDL_DispatchGPUComputeIndirect * \sa SDL_ReleaseGPUBuffer */ -extern SDL_DECLSPEC SDL_GPUBuffer *SDLCALL SDL_CreateGPUBuffer( +extern SDL_DECLSPEC SDL_GPUBuffer * SDLCALL SDL_CreateGPUBuffer( SDL_GPUDevice *device, const SDL_GPUBufferCreateInfo *createinfo); @@ -2554,7 +2580,7 @@ extern SDL_DECLSPEC SDL_GPUBuffer *SDLCALL SDL_CreateGPUBuffer( * \sa SDL_DownloadFromGPUTexture * \sa SDL_ReleaseGPUTransferBuffer */ -extern SDL_DECLSPEC SDL_GPUTransferBuffer *SDLCALL SDL_CreateGPUTransferBuffer( +extern SDL_DECLSPEC SDL_GPUTransferBuffer * SDLCALL SDL_CreateGPUTransferBuffer( SDL_GPUDevice *device, const SDL_GPUTransferBufferCreateInfo *createinfo); @@ -2782,7 +2808,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_ReleaseGPUGraphicsPipeline( * \sa SDL_SubmitGPUCommandBuffer * \sa SDL_SubmitGPUCommandBufferAndAcquireFence */ -extern SDL_DECLSPEC SDL_GPUCommandBuffer *SDLCALL SDL_AcquireGPUCommandBuffer( +extern SDL_DECLSPEC SDL_GPUCommandBuffer * SDLCALL SDL_AcquireGPUCommandBuffer( SDL_GPUDevice *device); /* Uniform Data */ @@ -2792,6 +2818,10 @@ extern SDL_DECLSPEC SDL_GPUCommandBuffer *SDLCALL SDL_AcquireGPUCommandBuffer( * * Subsequent draw calls will use this uniform data. * + * The data being pushed must respect std140 layout conventions. In practical + * terms this means you must ensure that vec3 and vec4 fields are 16-byte + * aligned. + * * \param command_buffer a command buffer. * \param slot_index the vertex uniform slot to push data to. * \param data client data to write. @@ -2810,6 +2840,10 @@ extern SDL_DECLSPEC void SDLCALL SDL_PushGPUVertexUniformData( * * Subsequent draw calls will use this uniform data. * + * The data being pushed must respect std140 layout conventions. In practical + * terms this means you must ensure that vec3 and vec4 fields are 16-byte + * aligned. + * * \param command_buffer a command buffer. * \param slot_index the fragment uniform slot to push data to. * \param data client data to write. @@ -2828,6 +2862,10 @@ extern SDL_DECLSPEC void SDLCALL SDL_PushGPUFragmentUniformData( * * Subsequent draw calls will use this uniform data. * + * The data being pushed must respect std140 layout conventions. In practical + * terms this means you must ensure that vec3 and vec4 fields are 16-byte + * aligned. + * * \param command_buffer a command buffer. * \param slot_index the uniform slot to push data to. * \param data client data to write. @@ -2868,7 +2906,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_PushGPUComputeUniformData( * * \sa SDL_EndGPURenderPass */ -extern SDL_DECLSPEC SDL_GPURenderPass *SDLCALL SDL_BeginGPURenderPass( +extern SDL_DECLSPEC SDL_GPURenderPass * SDLCALL SDL_BeginGPURenderPass( SDL_GPUCommandBuffer *command_buffer, const SDL_GPUColorTargetInfo *color_target_infos, Uint32 num_color_targets, @@ -2978,6 +3016,9 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUIndexBuffer( * * The textures must have been created with SDL_GPU_TEXTUREUSAGE_SAMPLER. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param render_pass a render pass handle. * \param first_slot the vertex sampler slot to begin binding from. * \param texture_sampler_bindings an array of texture-sampler binding @@ -2986,6 +3027,8 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUIndexBuffer( * array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUVertexSamplers( SDL_GPURenderPass *render_pass, @@ -2999,12 +3042,17 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUVertexSamplers( * These textures must have been created with * SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param render_pass a render pass handle. * \param first_slot the vertex storage texture slot to begin binding from. * \param storage_textures an array of storage textures. * \param num_bindings the number of storage texture to bind from the array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUVertexStorageTextures( SDL_GPURenderPass *render_pass, @@ -3018,12 +3066,17 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUVertexStorageTextures( * These buffers must have been created with * SDL_GPU_BUFFERUSAGE_GRAPHICS_STORAGE_READ. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param render_pass a render pass handle. * \param first_slot the vertex storage buffer slot to begin binding from. * \param storage_buffers an array of buffers. * \param num_bindings the number of buffers to bind from the array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUVertexStorageBuffers( SDL_GPURenderPass *render_pass, @@ -3036,6 +3089,9 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUVertexStorageBuffers( * * The textures must have been created with SDL_GPU_TEXTUREUSAGE_SAMPLER. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param render_pass a render pass handle. * \param first_slot the fragment sampler slot to begin binding from. * \param texture_sampler_bindings an array of texture-sampler binding @@ -3044,6 +3100,8 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUVertexStorageBuffers( * array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUFragmentSamplers( SDL_GPURenderPass *render_pass, @@ -3057,12 +3115,17 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUFragmentSamplers( * These textures must have been created with * SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param render_pass a render pass handle. * \param first_slot the fragment storage texture slot to begin binding from. * \param storage_textures an array of storage textures. * \param num_bindings the number of storage textures to bind from the array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUFragmentStorageTextures( SDL_GPURenderPass *render_pass, @@ -3076,12 +3139,17 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUFragmentStorageTextures( * These buffers must have been created with * SDL_GPU_BUFFERUSAGE_GRAPHICS_STORAGE_READ. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param render_pass a render pass handle. * \param first_slot the fragment storage buffer slot to begin binding from. * \param storage_buffers an array of storage buffers. * \param num_bindings the number of storage buffers to bind from the array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUFragmentStorageBuffers( SDL_GPURenderPass *render_pass, @@ -3245,7 +3313,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPURenderPass( * * \sa SDL_EndGPUComputePass */ -extern SDL_DECLSPEC SDL_GPUComputePass *SDLCALL SDL_BeginGPUComputePass( +extern SDL_DECLSPEC SDL_GPUComputePass * SDLCALL SDL_BeginGPUComputePass( SDL_GPUCommandBuffer *command_buffer, const SDL_GPUStorageTextureReadWriteBinding *storage_texture_bindings, Uint32 num_storage_texture_bindings, @@ -3269,6 +3337,9 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputePipeline( * * The textures must have been created with SDL_GPU_TEXTUREUSAGE_SAMPLER. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param compute_pass a compute pass handle. * \param first_slot the compute sampler slot to begin binding from. * \param texture_sampler_bindings an array of texture-sampler binding @@ -3277,6 +3348,8 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputePipeline( * array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeSamplers( SDL_GPUComputePass *compute_pass, @@ -3290,12 +3363,17 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeSamplers( * These textures must have been created with * SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param compute_pass a compute pass handle. * \param first_slot the compute storage texture slot to begin binding from. * \param storage_textures an array of storage textures. * \param num_bindings the number of storage textures to bind from the array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeStorageTextures( SDL_GPUComputePass *compute_pass, @@ -3309,12 +3387,17 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeStorageTextures( * These buffers must have been created with * SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_READ. * + * Be sure your shader is set up according to the requirements documented in + * SDL_CreateGPUShader(). + * * \param compute_pass a compute pass handle. * \param first_slot the compute storage buffer slot to begin binding from. * \param storage_buffers an array of storage buffer binding structs. * \param num_bindings the number of storage buffers to bind from the array. * * \since This function is available since SDL 3.2.0. + * + * \sa SDL_CreateGPUShader */ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeStorageBuffers( SDL_GPUComputePass *compute_pass, @@ -3389,7 +3472,9 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPUComputePass( /** * Maps a transfer buffer into application address space. * - * You must unmap the transfer buffer before encoding upload commands. + * You must unmap the transfer buffer before encoding upload commands. The + * memory is owned by the graphics driver - do NOT call SDL_free() on the + * returned pointer. * * \param device a GPU context. * \param transfer_buffer a transfer buffer. @@ -3399,7 +3484,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPUComputePass( * * \since This function is available since SDL 3.2.0. */ -extern SDL_DECLSPEC void *SDLCALL SDL_MapGPUTransferBuffer( +extern SDL_DECLSPEC void * SDLCALL SDL_MapGPUTransferBuffer( SDL_GPUDevice *device, SDL_GPUTransferBuffer *transfer_buffer, bool cycle); @@ -3430,7 +3515,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_UnmapGPUTransferBuffer( * * \since This function is available since SDL 3.2.0. */ -extern SDL_DECLSPEC SDL_GPUCopyPass *SDLCALL SDL_BeginGPUCopyPass( +extern SDL_DECLSPEC SDL_GPUCopyPass * SDLCALL SDL_BeginGPUCopyPass( SDL_GPUCommandBuffer *command_buffer); /** @@ -3848,6 +3933,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_WaitForGPUSwapchain( * freed by the user. You MUST NOT call this function from any thread other * than the one that created the window. * + * The swapchain texture is write-only and cannot be used as a sampler or for + * another reading operation. + * * \param command_buffer a command buffer. * \param window a window that has been claimed. * \param swapchain_texture a pointer filled in with a swapchain texture @@ -3866,6 +3954,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_WaitForGPUSwapchain( * * \sa SDL_SubmitGPUCommandBuffer * \sa SDL_SubmitGPUCommandBufferAndAcquireFence + * \sa SDL_AcquireGPUSwapchainTexture */ extern SDL_DECLSPEC bool SDLCALL SDL_WaitAndAcquireGPUSwapchainTexture( SDL_GPUCommandBuffer *command_buffer, @@ -3922,7 +4011,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SubmitGPUCommandBuffer( * \sa SDL_SubmitGPUCommandBuffer * \sa SDL_ReleaseGPUFence */ -extern SDL_DECLSPEC SDL_GPUFence *SDLCALL SDL_SubmitGPUCommandBufferAndAcquireFence( +extern SDL_DECLSPEC SDL_GPUFence * SDLCALL SDL_SubmitGPUCommandBufferAndAcquireFence( SDL_GPUCommandBuffer *command_buffer); /** @@ -4004,6 +4093,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_QueryGPUFence( /** * Releases a fence obtained from SDL_SubmitGPUCommandBufferAndAcquireFence. * + * You must not reference the fence after calling this function. + * * \param device a GPU context. * \param fence a fence. * @@ -4054,7 +4145,7 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GPUTextureSupportsFormat( * \param device a GPU context. * \param format the texture format to check. * \param sample_count the sample count to check. - * \returns a hardware-specific version of min(preferred, possible). + * \returns whether the sample count is supported for this texture format. * * \since This function is available since SDL 3.2.0. */ diff --git a/vendor/sdl3/include/SDL_guid.h b/vendor/sdl3/include/SDL_guid.h index e2f32ffc9..312c42c03 100644 --- a/vendor/sdl3/include/SDL_guid.h +++ b/vendor/sdl3/include/SDL_guid.h @@ -71,6 +71,8 @@ typedef struct SDL_GUID { * \param pszGUID buffer in which to write the ASCII string. * \param cbGUID the size of pszGUID, should be at least 33 bytes. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_StringToGUID @@ -87,6 +89,8 @@ extern SDL_DECLSPEC void SDLCALL SDL_GUIDToString(SDL_GUID guid, char *pszGUID, * \param pchGUID string containing an ASCII representation of a GUID. * \returns a SDL_GUID structure. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GUIDToString diff --git a/vendor/sdl3/include/SDL_hints.h b/vendor/sdl3/include/SDL_hints.h index 8f2d074d1..9c8ad3f82 100644 --- a/vendor/sdl3/include/SDL_hints.h +++ b/vendor/sdl3/include/SDL_hints.h @@ -2191,6 +2191,28 @@ extern "C" { */ #define SDL_HINT_JOYSTICK_ZERO_CENTERED_DEVICES "SDL_JOYSTICK_ZERO_CENTERED_DEVICES" +/** + * A variable containing a list of devices and their desired number of haptic + * (force feedback) enabled axis. + * + * The format of the string is a comma separated list of USB VID/PID pairs in + * hexadecimal form plus the number of desired axes, e.g. + * + * `0xAAAA/0xBBBB/1,0xCCCC/0xDDDD/3` + * + * This hint supports a "wildcard" device that will set the number of haptic + * axes on all initialized haptic devices which were not defined explicitly in + * this hint. + * + * `0xFFFF/0xFFFF/1` + * + * This hint should be set before a controller is opened. The number of haptic + * axes won't exceed the number of real axes found on the device. + * + * \since This hint is available since SDL 3.2.5. + */ +#define SDL_HINT_JOYSTICK_HAPTIC_AXES "SDL_JOYSTICK_HAPTIC_AXES" + /** * A variable that controls keycode representation in keyboard events. * @@ -2349,8 +2371,8 @@ extern "C" { #define SDL_HINT_MAC_OPENGL_ASYNC_DISPATCH "SDL_MAC_OPENGL_ASYNC_DISPATCH" /** - * A variable controlling whether the Option (⌥) key on macOS should be - * remapped to act as the Alt key. + * A variable controlling whether the Option key on macOS should be remapped + * to act as the Alt key. * * The variable can be set to the following values: * @@ -3585,6 +3607,22 @@ extern "C" { */ #define SDL_HINT_VIDEO_WIN_D3DCOMPILER "SDL_VIDEO_WIN_D3DCOMPILER" +/** + * A variable controlling whether SDL should call XSelectInput() to enable + * input events on X11 windows wrapped by SDL windows. + * + * The variable can be set to the following values: + * + * - "0": Don't call XSelectInput(), assuming the native window code has done + * it already. + * - "1": Call XSelectInput() to enable input events. (default) + * + * This hint should be set before creating a window. + * + * \since This hint is available since SDL 3.2.10. + */ +#define SDL_HINT_VIDEO_X11_EXTERNAL_WINDOW_INPUT "SDL_VIDEO_X11_EXTERNAL_WINDOW_INPUT" + /** * A variable controlling whether the X11 _NET_WM_BYPASS_COMPOSITOR hint * should be used. @@ -4360,7 +4398,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_ResetHints(void); * \sa SDL_SetHint * \sa SDL_SetHintWithPriority */ -extern SDL_DECLSPEC const char *SDLCALL SDL_GetHint(const char *name); +extern SDL_DECLSPEC const char * SDLCALL SDL_GetHint(const char *name); /** * Get the boolean value of a hint variable. diff --git a/vendor/sdl3/include/SDL_log.h b/vendor/sdl3/include/SDL_log.h index a56476c6d..3fd7ec2ed 100644 --- a/vendor/sdl3/include/SDL_log.h +++ b/vendor/sdl3/include/SDL_log.h @@ -41,8 +41,8 @@ * "system", "audio", "video", "render", "input", "test", or `*` for any * unspecified category. * - * The level can be a numeric level, one of "verbose", "debug", "info", - * "warn", "error", "critical", or "quiet" to disable that category. + * The level can be a numeric level, one of "trace", "verbose", "debug", + * "info", "warn", "error", "critical", or "quiet" to disable that category. * * You can omit the category if you want to set the logging level for all * categories. @@ -56,6 +56,15 @@ * - Windows: debug output stream * - Android: log output * - Others: standard error output (stderr) + * + * You don't need to have a newline (`\n`) on the end of messages, the + * functions will do that for you. For consistent behavior cross-platform, you + * shouldn't have any newlines in messages, such as to log multiple lines in + * one call; unusual platform-specific behavior can be observed in such usage. + * Do one log call per line instead, with no newlines in messages. + * + * Each log call is atomic, so you won't see log messages cut off one another + * when logging from multiple threads. */ #ifndef SDL_log_h_ diff --git a/vendor/sdl3/include/SDL_main.h b/vendor/sdl3/include/SDL_main.h index 2e7a2ebb6..905d78e96 100644 --- a/vendor/sdl3/include/SDL_main.h +++ b/vendor/sdl3/include/SDL_main.h @@ -28,6 +28,9 @@ * should look like this: * * ```c + * #include + * #include + * * int main(int argc, char *argv[]) * { * } @@ -38,9 +41,9 @@ * This is also where an app can be configured to use the main callbacks, via * the SDL_MAIN_USE_CALLBACKS macro. * - * This is a "single-header library," which is to say that including this - * header inserts code into your program, and you should only include it once - * in most cases. SDL.h does not include this header automatically. + * SDL_main.h is a "single-header library," which is to say that including + * this header inserts code into your program, and you should only include it + * once in most cases. SDL.h does not include this header automatically. * * For more information, see: * diff --git a/vendor/sdl3/include/SDL_mouse.h b/vendor/sdl3/include/SDL_mouse.h index 18856e20e..864135d2b 100644 --- a/vendor/sdl3/include/SDL_mouse.h +++ b/vendor/sdl3/include/SDL_mouse.h @@ -353,7 +353,7 @@ extern SDL_DECLSPEC SDL_MouseButtonFlags SDLCALL SDL_GetRelativeMouseState(float * * \sa SDL_WarpMouseGlobal */ -extern SDL_DECLSPEC void SDLCALL SDL_WarpMouseInWindow(SDL_Window * window, +extern SDL_DECLSPEC void SDLCALL SDL_WarpMouseInWindow(SDL_Window *window, float x, float y); /** @@ -514,8 +514,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_CaptureMouse(bool enabled); * \sa SDL_DestroyCursor * \sa SDL_SetCursor */ -extern SDL_DECLSPEC SDL_Cursor * SDLCALL SDL_CreateCursor(const Uint8 * data, - const Uint8 * mask, +extern SDL_DECLSPEC SDL_Cursor * SDLCALL SDL_CreateCursor(const Uint8 *data, + const Uint8 *mask, int w, int h, int hot_x, int hot_y); diff --git a/vendor/sdl3/include/SDL_pixels.h b/vendor/sdl3/include/SDL_pixels.h index b6f38acbe..4127ac06c 100644 --- a/vendor/sdl3/include/SDL_pixels.h +++ b/vendor/sdl3/include/SDL_pixels.h @@ -676,6 +676,9 @@ typedef enum SDL_PixelFormat SDL_PIXELFORMAT_EXTERNAL_OES = 0x2053454fu, /**< Android video texture format */ /* SDL_DEFINE_PIXELFOURCC('O', 'E', 'S', ' ') */ + SDL_PIXELFORMAT_MJPG = 0x47504a4du, /**< Motion JPEG */ + /* SDL_DEFINE_PIXELFOURCC('M', 'J', 'P', 'G') */ + /* Aliases for RGBA byte arrays of color data, for the current platform */ #if SDL_BYTEORDER == SDL_BIG_ENDIAN SDL_PIXELFORMAT_RGBA32 = SDL_PIXELFORMAT_RGBA8888, diff --git a/vendor/sdl3/include/SDL_platform_defines.h b/vendor/sdl3/include/SDL_platform_defines.h index 7e9a0a92a..6b240a8be 100644 --- a/vendor/sdl3/include/SDL_platform_defines.h +++ b/vendor/sdl3/include/SDL_platform_defines.h @@ -471,8 +471,6 @@ * \since This macro is available since SDL 3.2.0. */ #define SDL_PLATFORM_3DS 1 - -#undef __3DS__ #endif #endif /* SDL_platform_defines_h_ */ diff --git a/vendor/sdl3/include/SDL_power.h b/vendor/sdl3/include/SDL_power.h index 4056ce3c8..694fb0924 100644 --- a/vendor/sdl3/include/SDL_power.h +++ b/vendor/sdl3/include/SDL_power.h @@ -79,6 +79,10 @@ typedef enum SDL_PowerState * It's possible a platform can only report battery percentage or time left * but not both. * + * On some platforms, retrieving power supply details might be expensive. If + * you want to display continuous status you could call this function every + * minute or so. + * * \param seconds a pointer filled in with the seconds of battery life left, * or NULL to ignore. This will be filled in with -1 if we * can't determine a value or there is no battery. diff --git a/vendor/sdl3/include/SDL_process.h b/vendor/sdl3/include/SDL_process.h index 2cc77395b..511b2f9c5 100644 --- a/vendor/sdl3/include/SDL_process.h +++ b/vendor/sdl3/include/SDL_process.h @@ -103,7 +103,7 @@ typedef struct SDL_Process SDL_Process; * \sa SDL_WaitProcess * \sa SDL_DestroyProcess */ -extern SDL_DECLSPEC SDL_Process *SDLCALL SDL_CreateProcess(const char * const *args, bool pipe_stdio); +extern SDL_DECLSPEC SDL_Process * SDLCALL SDL_CreateProcess(const char * const *args, bool pipe_stdio); /** * Description of where standard I/O should be directed when creating a @@ -173,13 +173,13 @@ typedef enum SDL_ProcessIO * standard input when `SDL_PROP_PROCESS_CREATE_STDIN_NUMBER` is set to * `SDL_PROCESS_STDIO_REDIRECT`. * - `SDL_PROP_PROCESS_CREATE_STDOUT_NUMBER`: an SDL_ProcessIO value - * describing where standard output for the process goes go, defaults to + * describing where standard output for the process goes to, defaults to * `SDL_PROCESS_STDIO_INHERITED`. * - `SDL_PROP_PROCESS_CREATE_STDOUT_POINTER`: an SDL_IOStream pointer used * for standard output when `SDL_PROP_PROCESS_CREATE_STDOUT_NUMBER` is set * to `SDL_PROCESS_STDIO_REDIRECT`. * - `SDL_PROP_PROCESS_CREATE_STDERR_NUMBER`: an SDL_ProcessIO value - * describing where standard error for the process goes go, defaults to + * describing where standard error for the process goes to, defaults to * `SDL_PROCESS_STDIO_INHERITED`. * - `SDL_PROP_PROCESS_CREATE_STDERR_POINTER`: an SDL_IOStream pointer used * for standard error when `SDL_PROP_PROCESS_CREATE_STDERR_NUMBER` is set to @@ -215,7 +215,7 @@ typedef enum SDL_ProcessIO * \sa SDL_WaitProcess * \sa SDL_DestroyProcess */ -extern SDL_DECLSPEC SDL_Process *SDLCALL SDL_CreateProcessWithProperties(SDL_PropertiesID props); +extern SDL_DECLSPEC SDL_Process * SDLCALL SDL_CreateProcessWithProperties(SDL_PropertiesID props); #define SDL_PROP_PROCESS_CREATE_ARGS_POINTER "SDL.process.create.args" #define SDL_PROP_PROCESS_CREATE_ENVIRONMENT_POINTER "SDL.process.create.environment" @@ -320,7 +320,7 @@ extern SDL_DECLSPEC void * SDLCALL SDL_ReadProcess(SDL_Process *process, size_t * \sa SDL_CreateProcessWithProperties * \sa SDL_GetProcessOutput */ -extern SDL_DECLSPEC SDL_IOStream *SDLCALL SDL_GetProcessInput(SDL_Process *process); +extern SDL_DECLSPEC SDL_IOStream * SDLCALL SDL_GetProcessInput(SDL_Process *process); /** * Get the SDL_IOStream associated with process standard output. @@ -344,7 +344,7 @@ extern SDL_DECLSPEC SDL_IOStream *SDLCALL SDL_GetProcessInput(SDL_Process *proce * \sa SDL_CreateProcessWithProperties * \sa SDL_GetProcessInput */ -extern SDL_DECLSPEC SDL_IOStream *SDLCALL SDL_GetProcessOutput(SDL_Process *process); +extern SDL_DECLSPEC SDL_IOStream * SDLCALL SDL_GetProcessOutput(SDL_Process *process); /** * Stop a process. diff --git a/vendor/sdl3/include/SDL_rect.h b/vendor/sdl3/include/SDL_rect.h index 8998de6f4..eb2d34a69 100644 --- a/vendor/sdl3/include/SDL_rect.h +++ b/vendor/sdl3/include/SDL_rect.h @@ -371,7 +371,7 @@ SDL_FORCE_INLINE bool SDL_RectEmptyFloat(const SDL_FRect *r) * * \sa SDL_RectsEqualFloat */ -SDL_FORCE_INLINE bool SDL_RectsEqualEpsilon(const SDL_FRect *a, const SDL_FRect *b, const float epsilon) +SDL_FORCE_INLINE bool SDL_RectsEqualEpsilon(const SDL_FRect *a, const SDL_FRect *b, float epsilon) { return (a && b && ((a == b) || ((SDL_fabsf(a->x - b->x) <= epsilon) && diff --git a/vendor/sdl3/include/SDL_render.h b/vendor/sdl3/include/SDL_render.h index 891e99452..3352545d4 100644 --- a/vendor/sdl3/include/SDL_render.h +++ b/vendor/sdl3/include/SDL_render.h @@ -490,6 +490,9 @@ extern SDL_DECLSPEC SDL_PropertiesID SDLCALL SDL_GetRendererProperties(SDL_Rende * This returns the true output size in pixels, ignoring any render targets or * logical size and presentation. * + * For the output size of the current rendering target, with logical size + * adjustments, use SDL_GetCurrentRenderOutputSize() instead. + * * \param renderer the rendering context. * \param w a pointer filled in with the width in pixels. * \param h a pointer filled in with the height in pixels. @@ -508,9 +511,10 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetRenderOutputSize(SDL_Renderer *renderer, * Get the current output size in pixels of a rendering context. * * If a rendering target is active, this will return the size of the rendering - * target in pixels, otherwise if a logical size is set, it will return the - * logical size, otherwise it will return the value of - * SDL_GetRenderOutputSize(). + * target in pixels, otherwise return the value of SDL_GetRenderOutputSize(). + * + * Rendering target or not, the output will be adjusted by the current logical + * presentation state, dictated by SDL_SetRenderLogicalPresentation(). * * \param renderer the rendering context. * \param w a pointer filled in with the current width. @@ -1318,6 +1322,11 @@ extern SDL_DECLSPEC void SDLCALL SDL_UnlockTexture(SDL_Texture *texture); * To stop rendering to a texture and render to the window again, call this * function with a NULL `texture`. * + * Viewport, cliprect, scale, and logical presentation are unique to each + * render target. Get and set functions for these states apply to the current + * render target set by this function, and those states persist on each target + * when the current render target changes. + * * \param renderer the rendering context. * \param texture the targeted texture, which must be created with the * `SDL_TEXTUREACCESS_TARGET` flag, or NULL to render to the @@ -1351,25 +1360,39 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetRenderTarget(SDL_Renderer *renderer, SDL extern SDL_DECLSPEC SDL_Texture * SDLCALL SDL_GetRenderTarget(SDL_Renderer *renderer); /** - * Set a device independent resolution and presentation mode for rendering. + * Set a device-independent resolution and presentation mode for rendering. * * This function sets the width and height of the logical rendering output. - * The renderer will act as if the window is always the requested dimensions, - * scaling to the actual window resolution as necessary. + * The renderer will act as if the current render target is always the + * requested dimensions, scaling to the actual resolution as necessary. * * This can be useful for games that expect a fixed size, but would like to * scale the output to whatever is available, regardless of how a user resizes * a window, or if the display is high DPI. * + * Logical presentation can be used with both render target textures and the + * renderer's window; the state is unique to each render target, and this + * function sets the state for the current render target. It might be useful + * to draw to a texture that matches the window dimensions with logical + * presentation enabled, and then draw that texture across the entire window + * with logical presentation disabled. Be careful not to render both with + * logical presentation enabled, however, as this could produce + * double-letterboxing, etc. + * * You can disable logical coordinates by setting the mode to * SDL_LOGICAL_PRESENTATION_DISABLED, and in that case you get the full pixel - * resolution of the output window; it is safe to toggle logical presentation + * resolution of the render target; it is safe to toggle logical presentation * during the rendering of a frame: perhaps most of the rendering is done to * specific dimensions but to make fonts look sharp, the app turns off logical - * presentation while drawing text. + * presentation while drawing text, for example. * - * Letterboxing will only happen if logical presentation is enabled during - * SDL_RenderPresent; be sure to reenable it first if you were using it. + * For the renderer's window, letterboxing is drawn into the framebuffer if + * logical presentation is enabled during SDL_RenderPresent; be sure to + * reenable it before presenting if you were toggling it, otherwise the + * letterbox areas might have artifacts from previous frames (or artifacts + * from external overlays, etc). Letterboxing is never drawn into texture + * render targets; be sure to call SDL_RenderClear() before drawing into the + * texture so the letterboxing areas are cleared, if appropriate. * * You can convert coordinates in an event into rendering coordinates using * SDL_ConvertEventToRenderCoordinates(). @@ -1397,6 +1420,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetRenderLogicalPresentation(SDL_Renderer * * This function gets the width and height of the logical rendering output, or * the output size in pixels if a logical resolution is not enabled. * + * Each render target has its own logical presentation state. This function + * gets the state for the current render target. + * * \param renderer the rendering context. * \param w an int to be filled with the width. * \param h an int to be filled with the height. @@ -1420,6 +1446,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetRenderLogicalPresentation(SDL_Renderer * * presentation is disabled, it will fill the rectangle with the output size, * in pixels. * + * Each render target has its own logical presentation state. This function + * gets the rectangle for the current render target. + * * \param renderer the rendering context. * \param rect a pointer filled in with the final presentation rectangle, may * be NULL. @@ -1536,6 +1565,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_ConvertEventToRenderCoordinates(SDL_Rendere * * The area's width and height must be >= 0. * + * Each render target has its own viewport. This function sets the viewport + * for the current render target. + * * \param renderer the rendering context. * \param rect the SDL_Rect structure representing the drawing area, or NULL * to set the viewport to the entire target. @@ -1554,6 +1586,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetRenderViewport(SDL_Renderer *renderer, c /** * Get the drawing area for the current target. * + * Each render target has its own viewport. This function gets the viewport + * for the current render target. + * * \param renderer the rendering context. * \param rect an SDL_Rect structure filled in with the current drawing area. * \returns true on success or false on failure; call SDL_GetError() for more @@ -1575,6 +1610,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetRenderViewport(SDL_Renderer *renderer, S * whether you should restore a specific rectangle or NULL. Note that the * viewport is always reset when changing rendering targets. * + * Each render target has its own viewport. This function checks the viewport + * for the current render target. + * * \param renderer the rendering context. * \returns true if the viewport was set to a specific rectangle, or false if * it was set to NULL (the entire target). @@ -1613,6 +1651,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetRenderSafeArea(SDL_Renderer *renderer, S /** * Set the clip rectangle for rendering on the specified target. * + * Each render target has its own clip rectangle. This function sets the + * cliprect for the current render target. + * * \param renderer the rendering context. * \param rect an SDL_Rect structure representing the clip area, relative to * the viewport, or NULL to disable clipping. @@ -1631,6 +1672,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetRenderClipRect(SDL_Renderer *renderer, c /** * Get the clip rectangle for the current target. * + * Each render target has its own clip rectangle. This function gets the + * cliprect for the current render target. + * * \param renderer the rendering context. * \param rect an SDL_Rect structure filled in with the current clipping area * or an empty rectangle if clipping is disabled. @@ -1647,7 +1691,10 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetRenderClipRect(SDL_Renderer *renderer, c extern SDL_DECLSPEC bool SDLCALL SDL_GetRenderClipRect(SDL_Renderer *renderer, SDL_Rect *rect); /** - * Get whether clipping is enabled on the given renderer. + * Get whether clipping is enabled on the given render target. + * + * Each render target has its own clip rectangle. This function checks the + * cliprect for the current render target. * * \param renderer the rendering context. * \returns true if clipping is enabled or false if not; call SDL_GetError() @@ -1673,6 +1720,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_RenderClipEnabled(SDL_Renderer *renderer); * will be handled using the appropriate quality hints. For best results use * integer scaling factors. * + * Each render target has its own scale. This function sets the scale for the + * current render target. + * * \param renderer the rendering context. * \param scaleX the horizontal scaling factor. * \param scaleY the vertical scaling factor. @@ -1690,6 +1740,9 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetRenderScale(SDL_Renderer *renderer, floa /** * Get the drawing scale for the current target. * + * Each render target has its own scale. This function gets the scale for the + * current render target. + * * \param renderer the rendering context. * \param scaleX a pointer filled in with the horizontal scaling factor. * \param scaleY a pointer filled in with the vertical scaling factor. @@ -2247,15 +2300,21 @@ extern SDL_DECLSPEC bool SDLCALL SDL_RenderGeometryRaw(SDL_Renderer *renderer, /** * Read pixels from the current rendering target. * - * The returned surface should be freed with SDL_DestroySurface() + * The returned surface contains pixels inside the desired area clipped to the + * current viewport, and should be freed with SDL_DestroySurface(). + * + * Note that this returns the actual pixels on the screen, so if you are using + * logical presentation you should use SDL_GetRenderLogicalPresentationRect() + * to get the area containing your content. * * **WARNING**: This is a very slow operation, and should not be used * frequently. If you're using this on the main rendering target, it should be * called after rendering and before SDL_RenderPresent(). * * \param renderer the rendering context. - * \param rect an SDL_Rect structure representing the area in pixels relative - * to the to current viewport, or NULL for the entire viewport. + * \param rect an SDL_Rect structure representing the area to read, which will + * be clipped to the current viewport, or NULL for the entire + * viewport. * \returns a new SDL_Surface on success or NULL on failure; call * SDL_GetError() for more information. * diff --git a/vendor/sdl3/include/SDL_revision.h b/vendor/sdl3/include/SDL_revision.h index 18f7c4d6c..f99e03a95 100644 --- a/vendor/sdl3/include/SDL_revision.h +++ b/vendor/sdl3/include/SDL_revision.h @@ -48,9 +48,9 @@ */ #define SDL_REVISION "Some arbitrary string decided at SDL build time" #elif defined(SDL_VENDOR_INFO) -#define SDL_REVISION "release-3.2.0-0-g535d80bad (" SDL_VENDOR_INFO ")" +#define SDL_REVISION "release-3.2.10-0-g877399b2b (" SDL_VENDOR_INFO ")" #else -#define SDL_REVISION "release-3.2.0-0-g535d80bad" +#define SDL_REVISION "release-3.2.10-0-g877399b2b" #endif #endif /* SDL_revision_h_ */ diff --git a/vendor/sdl3/include/SDL_stdinc.h b/vendor/sdl3/include/SDL_stdinc.h index 4e15a3c64..b2728da2f 100644 --- a/vendor/sdl3/include/SDL_stdinc.h +++ b/vendor/sdl3/include/SDL_stdinc.h @@ -1299,8 +1299,11 @@ extern "C" { * * If `size` is 0, it will be set to 1. * - * If you want to allocate memory aligned to a specific alignment, consider - * using SDL_aligned_alloc(). + * If the allocation is successful, the returned pointer is guaranteed to be + * aligned to either the *fundamental alignment* (`alignof(max_align_t)` in + * C11 and later) or `2 * sizeof(void *)`, whichever is smaller. Use + * SDL_aligned_alloc() if you need to allocate memory aligned to an alignment + * greater than this guarantee. * * \param size the size to allocate. * \returns a pointer to the allocated memory, or NULL if allocation failed. @@ -1323,6 +1326,10 @@ extern SDL_DECLSPEC SDL_MALLOC void * SDLCALL SDL_malloc(size_t size); * * If either of `nmemb` or `size` is 0, they will both be set to 1. * + * If the allocation is successful, the returned pointer is guaranteed to be + * aligned to either the *fundamental alignment* (`alignof(max_align_t)` in + * C11 and later) or `2 * sizeof(void *)`, whichever is smaller. + * * \param nmemb the number of elements in the array. * \param size the size of each element of the array. * \returns a pointer to the allocated array, or NULL if allocation failed. @@ -1357,6 +1364,11 @@ extern SDL_DECLSPEC SDL_MALLOC SDL_ALLOC_SIZE2(1, 2) void * SDLCALL SDL_calloc(s * - If it returns NULL (indicating failure), then `mem` will remain valid and * must still be freed with SDL_free(). * + * If the allocation is successfully resized, the returned pointer is + * guaranteed to be aligned to either the *fundamental alignment* + * (`alignof(max_align_t)` in C11 and later) or `2 * sizeof(void *)`, + * whichever is smaller. + * * \param mem a pointer to allocated memory to reallocate, or NULL. * \param size the new size of the memory. * \returns a pointer to the newly allocated memory, or NULL if allocation @@ -4243,14 +4255,14 @@ extern SDL_DECLSPEC int SDLCALL SDL_vasprintf(char **strp, SDL_PRINTF_FORMAT_STR /** * Seeds the pseudo-random number generator. * - * Reusing the seed number will cause SDL_rand_*() to repeat the same stream - * of 'random' numbers. + * Reusing the seed number will cause SDL_rand() to repeat the same stream of + * 'random' numbers. * * \param seed the value to use as a random number seed, or 0 to use * SDL_GetPerformanceCounter(). * * \threadsafety This should be called on the same thread that calls - * SDL_rand*() + * SDL_rand() * * \since This function is available since SDL 3.2.0. * @@ -4701,7 +4713,7 @@ extern SDL_DECLSPEC double SDLCALL SDL_atan2(double y, double x); * * \since This function is available since SDL 3.2.0. * - * \sa SDL_atan2f + * \sa SDL_atan2 * \sa SDL_atan * \sa SDL_tan */ @@ -4810,7 +4822,7 @@ extern SDL_DECLSPEC double SDLCALL SDL_copysign(double x, double y); * * \since This function is available since SDL 3.2.0. * - * \sa SDL_copysignf + * \sa SDL_copysign * \sa SDL_fabsf */ extern SDL_DECLSPEC float SDLCALL SDL_copysignf(float x, float y); @@ -4943,7 +4955,7 @@ extern SDL_DECLSPEC float SDLCALL SDL_expf(float x); * Range: `0 <= y <= INF` * * This function operates on double-precision floating point values, use - * SDL_copysignf for single-precision floats. + * SDL_fabsf for single-precision floats. * * \param x floating point value to use as the magnitude. * \returns the absolute value of `x`. @@ -4964,7 +4976,7 @@ extern SDL_DECLSPEC double SDLCALL SDL_fabs(double x); * Range: `0 <= y <= INF` * * This function operates on single-precision floating point values, use - * SDL_copysignf for double-precision floats. + * SDL_fabs for double-precision floats. * * \param x floating point value to use as the magnitude. * \returns the absolute value of `x`. @@ -5016,7 +5028,7 @@ extern SDL_DECLSPEC double SDLCALL SDL_floor(double x); * Range: `-INF <= y <= INF`, y integer * * This function operates on single-precision floating point values, use - * SDL_floorf for double-precision floats. + * SDL_floor for double-precision floats. * * \param x floating point value. * \returns the floor of `x`. @@ -5073,7 +5085,7 @@ extern SDL_DECLSPEC double SDLCALL SDL_trunc(double x); * Range: `-INF <= y <= INF`, y integer * * This function operates on single-precision floating point values, use - * SDL_truncf for double-precision floats. + * SDL_trunc for double-precision floats. * * \param x floating point value. * \returns `x` truncated to an integer. @@ -5131,7 +5143,7 @@ extern SDL_DECLSPEC double SDLCALL SDL_fmod(double x, double y); * Range: `-y <= z <= y` * * This function operates on single-precision floating point values, use - * SDL_fmod for single-precision floats. + * SDL_fmod for double-precision floats. * * \param x the numerator. * \param y the denominator. Must not be 0. @@ -5409,7 +5421,7 @@ extern SDL_DECLSPEC double SDLCALL SDL_pow(double x, double y); * instead. * * This function operates on single-precision floating point values, use - * SDL_powf for double-precision floats. + * SDL_pow for double-precision floats. * * This function may use a different approximation across different versions, * platforms and configurations. i.e, it can return a different value given @@ -5469,8 +5481,8 @@ extern SDL_DECLSPEC double SDLCALL SDL_round(double x); * * Range: `-INF <= y <= INF`, y integer * - * This function operates on double-precision floating point values, use - * SDL_roundf for single-precision floats. To get the result as an integer + * This function operates on single-precision floating point values, use + * SDL_round for double-precision floats. To get the result as an integer * type, use SDL_lroundf. * * \param x floating point value. @@ -5499,7 +5511,7 @@ extern SDL_DECLSPEC float SDLCALL SDL_roundf(float x); * Range: `MIN_LONG <= y <= MAX_LONG` * * This function operates on double-precision floating point values, use - * SDL_lround for single-precision floats. To get the result as a + * SDL_lroundf for single-precision floats. To get the result as a * floating-point type, use SDL_round. * * \param x floating point value. @@ -5528,8 +5540,8 @@ extern SDL_DECLSPEC long SDLCALL SDL_lround(double x); * Range: `MIN_LONG <= y <= MAX_LONG` * * This function operates on single-precision floating point values, use - * SDL_lroundf for double-precision floats. To get the result as a - * floating-point type, use SDL_roundf, + * SDL_lround for double-precision floats. To get the result as a + * floating-point type, use SDL_roundf. * * \param x floating point value. * \returns the nearest integer to `x`. @@ -5742,7 +5754,7 @@ extern SDL_DECLSPEC double SDLCALL SDL_tan(double x); * Range: `-INF <= y <= INF` * * This function operates on single-precision floating point values, use - * SDL_tanf for double-precision floats. + * SDL_tan for double-precision floats. * * This function may use a different approximation across different versions, * platforms and configurations. i.e, it can return a different value given @@ -5969,7 +5981,6 @@ char *strdup(const char *str); their prototype defined (clang-diagnostic-implicit-function-declaration) */ #include #include -#include #define SDL_malloc malloc #define SDL_calloc calloc diff --git a/vendor/sdl3/include/SDL_storage.h b/vendor/sdl3/include/SDL_storage.h index 8b89ace18..6837ebaac 100644 --- a/vendor/sdl3/include/SDL_storage.h +++ b/vendor/sdl3/include/SDL_storage.h @@ -450,7 +450,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_CloseStorage(SDL_Storage *storage); * * This function should be called in regular intervals until it returns true - * however, it is not recommended to spinwait on this call, as the backend may - * depend on a synchronous message loop. + * depend on a synchronous message loop. You might instead poll this in your + * game's main loop while processing events and drawing a loading screen. * * \param storage a storage container to query. * \returns true if the container is ready, false otherwise. @@ -636,10 +637,10 @@ extern SDL_DECLSPEC Uint64 SDLCALL SDL_GetStorageSpaceRemaining(SDL_Storage *sto * Enumerate a directory tree, filtered by pattern, and return a list. * * Files are filtered out if they don't match the string in `pattern`, which - * may contain wildcard characters '*' (match everything) and '?' (match one + * may contain wildcard characters `*` (match everything) and `?` (match one * character). If pattern is NULL, no filtering is done and all results are * returned. Subdirectories are permitted, and are specified with a path - * separator of '/'. Wildcard characters '*' and '?' never match a path + * separator of '/'. Wildcard characters `*` and `?` never match a path * separator. * * `flags` may be set to SDL_GLOB_CASEINSENSITIVE to make the pattern matching diff --git a/vendor/sdl3/include/SDL_surface.h b/vendor/sdl3/include/SDL_surface.h index 0752f5307..7bff7cfb2 100644 --- a/vendor/sdl3/include/SDL_surface.h +++ b/vendor/sdl3/include/SDL_surface.h @@ -73,7 +73,7 @@ typedef Uint32 SDL_SurfaceFlags; * * \since This macro is available since SDL 3.2.0. */ -#define SDL_MUSTLOCK(S) ((((S)->flags & SDL_SURFACE_LOCK_NEEDED)) == SDL_SURFACE_LOCK_NEEDED) +#define SDL_MUSTLOCK(S) (((S)->flags & SDL_SURFACE_LOCK_NEEDED) == SDL_SURFACE_LOCK_NEEDED) /** * The scaling mode. @@ -82,6 +82,7 @@ typedef Uint32 SDL_SurfaceFlags; */ typedef enum SDL_ScaleMode { + SDL_SCALEMODE_INVALID = -1, SDL_SCALEMODE_NEAREST, /**< nearest pixel sampling */ SDL_SCALEMODE_LINEAR /**< linear filtering */ } SDL_ScaleMode; @@ -120,6 +121,9 @@ typedef enum SDL_FlipMode * format with a pitch of 32 would consist of 32x32 bytes of Y plane followed * by 32x16 bytes of UV plane. * + * When a surface holds MJPG format data, pixels points at the compressed JPEG + * image and pitch is the length of that data. + * * \since This struct is available since SDL 3.2.0. * * \sa SDL_CreateSurface @@ -153,6 +157,8 @@ typedef struct SDL_Surface SDL_Surface; * \returns the new SDL_Surface structure that is created or NULL on failure; * call SDL_GetError() for more information. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_CreateSurfaceFrom @@ -181,6 +187,8 @@ extern SDL_DECLSPEC SDL_Surface * SDLCALL SDL_CreateSurface(int width, int heigh * \returns the new SDL_Surface structure that is created or NULL on failure; * call SDL_GetError() for more information. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_CreateSurface @@ -195,6 +203,8 @@ extern SDL_DECLSPEC SDL_Surface * SDLCALL SDL_CreateSurfaceFrom(int width, int h * * \param surface the SDL_Surface to free. * + * \threadsafety No other thread should be using the surface when it is freed. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_CreateSurface @@ -221,11 +231,17 @@ extern SDL_DECLSPEC void SDLCALL SDL_DestroySurface(SDL_Surface *surface); * the same tone mapping that Chrome uses for HDR content, the form "*=N", * where N is a floating point scale factor applied in linear space, and * "none", which disables tone mapping. This defaults to "chrome". + * - `SDL_PROP_SURFACE_HOTSPOT_X_NUMBER`: the hotspot pixel offset from the + * left edge of the image, if this surface is being used as a cursor. + * - `SDL_PROP_SURFACE_HOTSPOT_Y_NUMBER`: the hotspot pixel offset from the + * top edge of the image, if this surface is being used as a cursor. * * \param surface the SDL_Surface structure to query. * \returns a valid property ID on success or 0 on failure; call * SDL_GetError() for more information. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC SDL_PropertiesID SDLCALL SDL_GetSurfaceProperties(SDL_Surface *surface); @@ -233,6 +249,8 @@ extern SDL_DECLSPEC SDL_PropertiesID SDLCALL SDL_GetSurfaceProperties(SDL_Surfac #define SDL_PROP_SURFACE_SDR_WHITE_POINT_FLOAT "SDL.surface.SDR_white_point" #define SDL_PROP_SURFACE_HDR_HEADROOM_FLOAT "SDL.surface.HDR_headroom" #define SDL_PROP_SURFACE_TONEMAP_OPERATOR_STRING "SDL.surface.tonemap" +#define SDL_PROP_SURFACE_HOTSPOT_X_NUMBER "SDL.surface.hotspot.x" +#define SDL_PROP_SURFACE_HOTSPOT_Y_NUMBER "SDL.surface.hotspot.y" /** * Set the colorspace used by a surface. @@ -246,6 +264,8 @@ extern SDL_DECLSPEC SDL_PropertiesID SDLCALL SDL_GetSurfaceProperties(SDL_Surfac * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GetSurfaceColorspace @@ -263,6 +283,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetSurfaceColorspace(SDL_Surface *surface, * \returns the colorspace used by the surface, or SDL_COLORSPACE_UNKNOWN if * the surface is NULL. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_SetSurfaceColorspace @@ -291,6 +313,8 @@ extern SDL_DECLSPEC SDL_Colorspace SDLCALL SDL_GetSurfaceColorspace(SDL_Surface * the surface didn't have an index format); call SDL_GetError() for * more information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_SetPaletteColors @@ -307,6 +331,8 @@ extern SDL_DECLSPEC SDL_Palette * SDLCALL SDL_CreateSurfacePalette(SDL_Surface * * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_CreatePalette @@ -321,6 +347,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetSurfacePalette(SDL_Surface *surface, SDL * \returns a pointer to the palette used by the surface, or NULL if there is * no palette used. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_SetSurfacePalette @@ -344,6 +372,8 @@ extern SDL_DECLSPEC SDL_Palette * SDLCALL SDL_GetSurfacePalette(SDL_Surface *sur * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_RemoveSurfaceAlternateImages @@ -358,6 +388,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_AddSurfaceAlternateImage(SDL_Surface *surfa * \param surface the SDL_Surface structure to query. * \returns true if alternate versions are available or false otherwise. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_AddSurfaceAlternateImage @@ -383,6 +415,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SurfaceHasAlternateImages(SDL_Surface *surf * failure; call SDL_GetError() for more information. This should be * freed with SDL_free() when it is no longer needed. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_AddSurfaceAlternateImage @@ -399,6 +433,8 @@ extern SDL_DECLSPEC SDL_Surface ** SDLCALL SDL_GetSurfaceImages(SDL_Surface *sur * * \param surface the SDL_Surface structure to update. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_AddSurfaceAlternateImage @@ -423,6 +459,10 @@ extern SDL_DECLSPEC void SDLCALL SDL_RemoveSurfaceAlternateImages(SDL_Surface *s * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. The locking referred to by + * this function is making the pixels available for direct + * access, not thread-safe locking. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_MUSTLOCK @@ -435,6 +475,10 @@ extern SDL_DECLSPEC bool SDLCALL SDL_LockSurface(SDL_Surface *surface); * * \param surface the SDL_Surface structure to be unlocked. * + * \threadsafety This function is not thread safe. The locking referred to by + * this function is making the pixels available for direct + * access, not thread-safe locking. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_LockSurface @@ -453,6 +497,8 @@ extern SDL_DECLSPEC void SDLCALL SDL_UnlockSurface(SDL_Surface *surface); * \returns a pointer to a new SDL_Surface structure or NULL on failure; call * SDL_GetError() for more information. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_DestroySurface @@ -471,6 +517,8 @@ extern SDL_DECLSPEC SDL_Surface * SDLCALL SDL_LoadBMP_IO(SDL_IOStream *src, bool * \returns a pointer to a new SDL_Surface structure or NULL on failure; call * SDL_GetError() for more information. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_DestroySurface @@ -495,6 +543,8 @@ extern SDL_DECLSPEC SDL_Surface * SDLCALL SDL_LoadBMP(const char *file); * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_LoadBMP_IO @@ -516,6 +566,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SaveBMP_IO(SDL_Surface *surface, SDL_IOStre * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_LoadBMP @@ -534,6 +586,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SaveBMP(SDL_Surface *surface, const char *f * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_BlitSurface @@ -550,6 +604,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetSurfaceRLE(SDL_Surface *surface, bool en * \param surface the SDL_Surface structure to query. * \returns true if the surface is RLE enabled, false otherwise. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_SetSurfaceRLE @@ -572,6 +628,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SurfaceHasRLE(SDL_Surface *surface); * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GetSurfaceColorKey @@ -588,6 +646,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetSurfaceColorKey(SDL_Surface *surface, bo * \param surface the SDL_Surface structure to query. * \returns true if the surface has a color key, false otherwise. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_SetSurfaceColorKey @@ -608,6 +668,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SurfaceHasColorKey(SDL_Surface *surface); * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_SetSurfaceColorKey @@ -631,6 +693,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetSurfaceColorKey(SDL_Surface *surface, Ui * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GetSurfaceColorMod @@ -649,6 +713,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetSurfaceColorMod(SDL_Surface *surface, Ui * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GetSurfaceAlphaMod @@ -669,6 +735,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetSurfaceColorMod(SDL_Surface *surface, Ui * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GetSurfaceAlphaMod @@ -684,6 +752,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetSurfaceAlphaMod(SDL_Surface *surface, Ui * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GetSurfaceColorMod @@ -703,6 +773,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetSurfaceAlphaMod(SDL_Surface *surface, Ui * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GetSurfaceBlendMode @@ -717,6 +789,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetSurfaceBlendMode(SDL_Surface *surface, S * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_SetSurfaceBlendMode @@ -738,6 +812,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetSurfaceBlendMode(SDL_Surface *surface, S * \returns true if the rectangle intersects the surface, otherwise false and * blits will be completely clipped. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_GetSurfaceClipRect @@ -757,6 +833,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetSurfaceClipRect(SDL_Surface *surface, co * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_SetSurfaceClipRect @@ -771,6 +849,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_GetSurfaceClipRect(SDL_Surface *surface, SD * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC bool SDLCALL SDL_FlipSurface(SDL_Surface *surface, SDL_FlipMode flip); @@ -787,6 +867,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_FlipSurface(SDL_Surface *surface, SDL_FlipM * \returns a copy of the surface or NULL on failure; call SDL_GetError() for * more information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_DestroySurface @@ -806,6 +888,8 @@ extern SDL_DECLSPEC SDL_Surface * SDLCALL SDL_DuplicateSurface(SDL_Surface *surf * \returns a copy of the surface or NULL on failure; call SDL_GetError() for * more information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_DestroySurface @@ -831,6 +915,8 @@ extern SDL_DECLSPEC SDL_Surface * SDLCALL SDL_ScaleSurface(SDL_Surface *surface, * \returns the new SDL_Surface structure that is created or NULL on failure; * call SDL_GetError() for more information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_ConvertSurfaceAndColorspace @@ -857,6 +943,8 @@ extern SDL_DECLSPEC SDL_Surface * SDLCALL SDL_ConvertSurface(SDL_Surface *surfac * \returns the new SDL_Surface structure that is created or NULL on failure; * call SDL_GetError() for more information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_ConvertSurface @@ -878,6 +966,10 @@ extern SDL_DECLSPEC SDL_Surface * SDLCALL SDL_ConvertSurfaceAndColorspace(SDL_Su * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety The same destination pixels should not be used from two + * threads at once. It is safe to use the same source pixels + * from multiple threads. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_ConvertPixelsAndColorspace @@ -907,6 +999,10 @@ extern SDL_DECLSPEC bool SDLCALL SDL_ConvertPixels(int width, int height, SDL_Pi * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety The same destination pixels should not be used from two + * threads at once. It is safe to use the same source pixels + * from multiple threads. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_ConvertPixels @@ -931,6 +1027,10 @@ extern SDL_DECLSPEC bool SDLCALL SDL_ConvertPixelsAndColorspace(int width, int h * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety The same destination pixels should not be used from two + * threads at once. It is safe to use the same source pixels + * from multiple threads. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC bool SDLCALL SDL_PremultiplyAlpha(int width, int height, SDL_PixelFormat src_format, const void *src, int src_pitch, SDL_PixelFormat dst_format, void *dst, int dst_pitch, bool linear); @@ -946,6 +1046,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_PremultiplyAlpha(int width, int height, SDL * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC bool SDLCALL SDL_PremultiplySurfaceAlpha(SDL_Surface *surface, bool linear); @@ -966,6 +1068,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_PremultiplySurfaceAlpha(SDL_Surface *surfac * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC bool SDLCALL SDL_ClearSurface(SDL_Surface *surface, float r, float g, float b, float a); @@ -989,6 +1093,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_ClearSurface(SDL_Surface *surface, float r, * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_FillSurfaceRects @@ -1014,6 +1120,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_FillSurfaceRect(SDL_Surface *dst, const SDL * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_FillSurfaceRect @@ -1087,9 +1195,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_FillSurfaceRects(SDL_Surface *dst, const SD * \returns true on success or false on failure; call SDL_GetError() for more * information. * - * \threadsafety The same destination surface should not be used from two - * threads at once. It is safe to use the same source surface - * from multiple threads. + * \threadsafety Only one thread should be using the `src` and `dst` surfaces + * at any given time. * * \since This function is available since SDL 3.2.0. * @@ -1112,9 +1219,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurface(SDL_Surface *src, const SDL_Rec * \returns true on success or false on failure; call SDL_GetError() for more * information. * - * \threadsafety The same destination surface should not be used from two - * threads at once. It is safe to use the same source surface - * from multiple threads. + * \threadsafety Only one thread should be using the `src` and `dst` surfaces + * at any given time. * * \since This function is available since SDL 3.2.0. * @@ -1137,9 +1243,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurfaceUnchecked(SDL_Surface *src, cons * \returns true on success or false on failure; call SDL_GetError() for more * information. * - * \threadsafety The same destination surface should not be used from two - * threads at once. It is safe to use the same source surface - * from multiple threads. + * \threadsafety Only one thread should be using the `src` and `dst` surfaces + * at any given time. * * \since This function is available since SDL 3.2.0. * @@ -1163,9 +1268,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurfaceScaled(SDL_Surface *src, const S * \returns true on success or false on failure; call SDL_GetError() for more * information. * - * \threadsafety The same destination surface should not be used from two - * threads at once. It is safe to use the same source surface - * from multiple threads. + * \threadsafety Only one thread should be using the `src` and `dst` surfaces + * at any given time. * * \since This function is available since SDL 3.2.0. * @@ -1173,6 +1277,28 @@ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurfaceScaled(SDL_Surface *src, const S */ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurfaceUncheckedScaled(SDL_Surface *src, const SDL_Rect *srcrect, SDL_Surface *dst, const SDL_Rect *dstrect, SDL_ScaleMode scaleMode); +/** + * Perform a stretched pixel copy from one surface to another. + * + * \param src the SDL_Surface structure to be copied from. + * \param srcrect the SDL_Rect structure representing the rectangle to be + * copied, may not be NULL. + * \param dst the SDL_Surface structure that is the blit target. + * \param dstrect the SDL_Rect structure representing the target rectangle in + * the destination surface, may not be NULL. + * \param scaleMode the SDL_ScaleMode to be used. + * \returns true on success or false on failure; call SDL_GetError() for more + * information. + * + * \threadsafety Only one thread should be using the `src` and `dst` surfaces + * at any given time. + * + * \since This function is available since SDL 3.4.0. + * + * \sa SDL_BlitSurfaceScaled + */ +extern SDL_DECLSPEC bool SDLCALL SDL_StretchSurface(SDL_Surface *src, const SDL_Rect *srcrect, SDL_Surface *dst, const SDL_Rect *dstrect, SDL_ScaleMode scaleMode); + /** * Perform a tiled blit to a destination surface, which may be of a different * format. @@ -1189,9 +1315,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurfaceUncheckedScaled(SDL_Surface *src * \returns true on success or false on failure; call SDL_GetError() for more * information. * - * \threadsafety The same destination surface should not be used from two - * threads at once. It is safe to use the same source surface - * from multiple threads. + * \threadsafety Only one thread should be using the `src` and `dst` surfaces + * at any given time. * * \since This function is available since SDL 3.2.0. * @@ -1219,9 +1344,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurfaceTiled(SDL_Surface *src, const SD * \returns true on success or false on failure; call SDL_GetError() for more * information. * - * \threadsafety The same destination surface should not be used from two - * threads at once. It is safe to use the same source surface - * from multiple threads. + * \threadsafety Only one thread should be using the `src` and `dst` surfaces + * at any given time. * * \since This function is available since SDL 3.2.0. * @@ -1256,9 +1380,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurfaceTiledWithScale(SDL_Surface *src, * \returns true on success or false on failure; call SDL_GetError() for more * information. * - * \threadsafety The same destination surface should not be used from two - * threads at once. It is safe to use the same source surface - * from multiple threads. + * \threadsafety Only one thread should be using the `src` and `dst` surfaces + * at any given time. * * \since This function is available since SDL 3.2.0. * @@ -1290,6 +1413,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_BlitSurface9Grid(SDL_Surface *src, const SD * \param b the blue component of the pixel in the range 0-255. * \returns a pixel value. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_MapSurfaceRGBA @@ -1321,6 +1446,8 @@ extern SDL_DECLSPEC Uint32 SDLCALL SDL_MapSurfaceRGB(SDL_Surface *surface, Uint8 * \param a the alpha component of the pixel in the range 0-255. * \returns a pixel value. * + * \threadsafety It is safe to call this function from any thread. + * * \since This function is available since SDL 3.2.0. * * \sa SDL_MapSurfaceRGB @@ -1350,6 +1477,8 @@ extern SDL_DECLSPEC Uint32 SDLCALL SDL_MapSurfaceRGBA(SDL_Surface *surface, Uint * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC bool SDLCALL SDL_ReadSurfacePixel(SDL_Surface *surface, int x, int y, Uint8 *r, Uint8 *g, Uint8 *b, Uint8 *a); @@ -1374,6 +1503,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_ReadSurfacePixel(SDL_Surface *surface, int * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC bool SDLCALL SDL_ReadSurfacePixelFloat(SDL_Surface *surface, int x, int y, float *r, float *g, float *b, float *a); @@ -1397,6 +1528,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_ReadSurfacePixelFloat(SDL_Surface *surface, * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC bool SDLCALL SDL_WriteSurfacePixel(SDL_Surface *surface, int x, int y, Uint8 r, Uint8 g, Uint8 b, Uint8 a); @@ -1417,6 +1550,8 @@ extern SDL_DECLSPEC bool SDLCALL SDL_WriteSurfacePixel(SDL_Surface *surface, int * \returns true on success or false on failure; call SDL_GetError() for more * information. * + * \threadsafety This function is not thread safe. + * * \since This function is available since SDL 3.2.0. */ extern SDL_DECLSPEC bool SDLCALL SDL_WriteSurfacePixelFloat(SDL_Surface *surface, int x, int y, float r, float g, float b, float a); diff --git a/vendor/sdl3/include/SDL_test_common.h b/vendor/sdl3/include/SDL_test_common.h index 3ab1ad01c..91efe8ac3 100644 --- a/vendor/sdl3/include/SDL_test_common.h +++ b/vendor/sdl3/include/SDL_test_common.h @@ -177,7 +177,7 @@ extern "C" { * * \returns a newly allocated common state object. */ -SDLTest_CommonState *SDLCALL SDLTest_CommonCreateState(char **argv, SDL_InitFlags flags); +SDLTest_CommonState * SDLCALL SDLTest_CommonCreateState(char **argv, SDL_InitFlags flags); /** * Free the common state object. diff --git a/vendor/sdl3/include/SDL_thread.h b/vendor/sdl3/include/SDL_thread.h index 277535fcb..e981b5429 100644 --- a/vendor/sdl3/include/SDL_thread.h +++ b/vendor/sdl3/include/SDL_thread.h @@ -139,7 +139,7 @@ typedef enum SDL_ThreadState * * \since This datatype is available since SDL 3.2.0. */ -typedef int (SDLCALL * SDL_ThreadFunction) (void *data); +typedef int (SDLCALL *SDL_ThreadFunction) (void *data); #ifdef SDL_WIKI_DOCUMENTATION_SECTION diff --git a/vendor/sdl3/include/SDL_tray.h b/vendor/sdl3/include/SDL_tray.h index 0b05db25b..1780b0ba5 100644 --- a/vendor/sdl3/include/SDL_tray.h +++ b/vendor/sdl3/include/SDL_tray.h @@ -118,7 +118,7 @@ typedef void (SDLCALL *SDL_TrayCallback)(void *userdata, SDL_TrayEntry *entry); * \sa SDL_GetTrayMenu * \sa SDL_DestroyTray */ -extern SDL_DECLSPEC SDL_Tray *SDLCALL SDL_CreateTray(SDL_Surface *icon, const char *tooltip); +extern SDL_DECLSPEC SDL_Tray * SDLCALL SDL_CreateTray(SDL_Surface *icon, const char *tooltip); /** * Updates the system tray icon's icon. @@ -172,7 +172,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_SetTrayTooltip(SDL_Tray *tray, const char * * \sa SDL_GetTrayMenu * \sa SDL_GetTrayMenuParentTray */ -extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_CreateTrayMenu(SDL_Tray *tray); +extern SDL_DECLSPEC SDL_TrayMenu * SDLCALL SDL_CreateTrayMenu(SDL_Tray *tray); /** * Create a submenu for a system tray entry. @@ -196,7 +196,7 @@ extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_CreateTrayMenu(SDL_Tray *tray); * \sa SDL_GetTraySubmenu * \sa SDL_GetTrayMenuParentEntry */ -extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_CreateTraySubmenu(SDL_TrayEntry *entry); +extern SDL_DECLSPEC SDL_TrayMenu * SDLCALL SDL_CreateTraySubmenu(SDL_TrayEntry *entry); /** * Gets a previously created tray menu. @@ -220,7 +220,7 @@ extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_CreateTraySubmenu(SDL_TrayEntry *e * \sa SDL_CreateTray * \sa SDL_CreateTrayMenu */ -extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_GetTrayMenu(SDL_Tray *tray); +extern SDL_DECLSPEC SDL_TrayMenu * SDLCALL SDL_GetTrayMenu(SDL_Tray *tray); /** * Gets a previously created tray entry submenu. @@ -244,14 +244,14 @@ extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_GetTrayMenu(SDL_Tray *tray); * \sa SDL_InsertTrayEntryAt * \sa SDL_CreateTraySubmenu */ -extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_GetTraySubmenu(SDL_TrayEntry *entry); +extern SDL_DECLSPEC SDL_TrayMenu * SDLCALL SDL_GetTraySubmenu(SDL_TrayEntry *entry); /** * Returns a list of entries in the menu, in order. * * \param menu The menu to get entries from. - * \param size An optional pointer to obtain the number of entries in the - * menu. + * \param count An optional pointer to obtain the number of entries in the + * menu. * \returns a NULL-terminated list of entries within the given menu. The * pointer becomes invalid when any function that inserts or deletes * entries in the menu is called. @@ -264,7 +264,7 @@ extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_GetTraySubmenu(SDL_TrayEntry *entr * \sa SDL_RemoveTrayEntry * \sa SDL_InsertTrayEntryAt */ -extern SDL_DECLSPEC const SDL_TrayEntry **SDLCALL SDL_GetTrayEntries(SDL_TrayMenu *menu, int *size); +extern SDL_DECLSPEC const SDL_TrayEntry ** SDLCALL SDL_GetTrayEntries(SDL_TrayMenu *menu, int *count); /** * Removes a tray entry. @@ -307,7 +307,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_RemoveTrayEntry(SDL_TrayEntry *entry); * \sa SDL_RemoveTrayEntry * \sa SDL_GetTrayEntryParent */ -extern SDL_DECLSPEC SDL_TrayEntry *SDLCALL SDL_InsertTrayEntryAt(SDL_TrayMenu *menu, int pos, const char *label, SDL_TrayEntryFlags flags); +extern SDL_DECLSPEC SDL_TrayEntry * SDLCALL SDL_InsertTrayEntryAt(SDL_TrayMenu *menu, int pos, const char *label, SDL_TrayEntryFlags flags); /** * Sets the label of an entry. @@ -348,7 +348,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_SetTrayEntryLabel(SDL_TrayEntry *entry, con * \sa SDL_InsertTrayEntryAt * \sa SDL_SetTrayEntryLabel */ -extern SDL_DECLSPEC const char *SDLCALL SDL_GetTrayEntryLabel(SDL_TrayEntry *entry); +extern SDL_DECLSPEC const char * SDLCALL SDL_GetTrayEntryLabel(SDL_TrayEntry *entry); /** * Sets whether or not an entry is checked. @@ -481,7 +481,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_DestroyTray(SDL_Tray *tray); * * \sa SDL_InsertTrayEntryAt */ -extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_GetTrayEntryParent(SDL_TrayEntry *entry); +extern SDL_DECLSPEC SDL_TrayMenu * SDLCALL SDL_GetTrayEntryParent(SDL_TrayEntry *entry); /** * Gets the entry for which the menu is a submenu, if the current menu is a @@ -501,7 +501,7 @@ extern SDL_DECLSPEC SDL_TrayMenu *SDLCALL SDL_GetTrayEntryParent(SDL_TrayEntry * * \sa SDL_CreateTraySubmenu * \sa SDL_GetTrayMenuParentTray */ -extern SDL_DECLSPEC SDL_TrayEntry *SDLCALL SDL_GetTrayMenuParentEntry(SDL_TrayMenu *menu); +extern SDL_DECLSPEC SDL_TrayEntry * SDLCALL SDL_GetTrayMenuParentEntry(SDL_TrayMenu *menu); /** * Gets the tray for which this menu is the first-level menu, if the current @@ -521,7 +521,7 @@ extern SDL_DECLSPEC SDL_TrayEntry *SDLCALL SDL_GetTrayMenuParentEntry(SDL_TrayMe * \sa SDL_CreateTrayMenu * \sa SDL_GetTrayMenuParentEntry */ -extern SDL_DECLSPEC SDL_Tray *SDLCALL SDL_GetTrayMenuParentTray(SDL_TrayMenu *menu); +extern SDL_DECLSPEC SDL_Tray * SDLCALL SDL_GetTrayMenuParentTray(SDL_TrayMenu *menu); /** * Update the trays. diff --git a/vendor/sdl3/include/SDL_version.h b/vendor/sdl3/include/SDL_version.h index 55014e3c1..a3b6ae82d 100644 --- a/vendor/sdl3/include/SDL_version.h +++ b/vendor/sdl3/include/SDL_version.h @@ -62,7 +62,7 @@ extern "C" { * * \since This macro is available since SDL 3.2.0. */ -#define SDL_MICRO_VERSION 0 +#define SDL_MICRO_VERSION 10 /** * This macro turns the version numbers into a numeric value. diff --git a/vendor/sdl3/include/SDL_vulkan.h b/vendor/sdl3/include/SDL_vulkan.h index 5a487561a..710afbe63 100644 --- a/vendor/sdl3/include/SDL_vulkan.h +++ b/vendor/sdl3/include/SDL_vulkan.h @@ -226,7 +226,7 @@ extern SDL_DECLSPEC char const * const * SDLCALL SDL_Vulkan_GetInstanceExtension extern SDL_DECLSPEC bool SDLCALL SDL_Vulkan_CreateSurface(SDL_Window *window, VkInstance instance, const struct VkAllocationCallbacks *allocator, - VkSurfaceKHR* surface); + VkSurfaceKHR *surface); /** * Destroy the Vulkan rendering surface of a window. diff --git a/vendor/sdl3/sdl3_audio.odin b/vendor/sdl3/sdl3_audio.odin index 43a8a34d2..045f4780e 100644 --- a/vendor/sdl3/sdl3_audio.odin +++ b/vendor/sdl3/sdl3_audio.odin @@ -83,9 +83,9 @@ foreign lib { OpenAudioDevice :: proc(devid: AudioDeviceID, spec: ^AudioSpec) -> AudioDeviceID --- IsAudioDevicePhysical :: proc(devid: AudioDeviceID) -> bool --- IsAudioDevicePlayback :: proc(devid: AudioDeviceID) -> bool --- - PauseAudioDevice :: proc(dev: AudioDeviceID) -> bool --- - ResumeAudioDevice :: proc(dev: AudioDeviceID) -> bool --- - AudioDevicePaused :: proc(dev: AudioDeviceID) -> bool --- + PauseAudioDevice :: proc(devid: AudioDeviceID) -> bool --- + ResumeAudioDevice :: proc(devid: AudioDeviceID) -> bool --- + AudioDevicePaused :: proc(devid: AudioDeviceID) -> bool --- GetAudioDeviceGain :: proc(devid: AudioDeviceID) -> f32 --- SetAudioDeviceGain :: proc(devid: AudioDeviceID, gain: f32) -> bool --- CloseAudioDevice :: proc(devid: AudioDeviceID) --- diff --git a/vendor/sdl3/sdl3_camera.odin b/vendor/sdl3/sdl3_camera.odin index d86d98746..7d46bd2bd 100644 --- a/vendor/sdl3/sdl3_camera.odin +++ b/vendor/sdl3/sdl3_camera.odin @@ -27,7 +27,7 @@ foreign lib { GetCameraDriver :: proc(index: c.int) -> cstring --- GetCurrentCameraDriver :: proc() -> cstring --- GetCameras :: proc(count: ^c.int) -> [^]CameraID --- - GetCameraSupportedFormats :: proc(devid: CameraID, count: ^c.int) -> [^]^CameraSpec --- + GetCameraSupportedFormats :: proc(instance_id: CameraID, count: ^c.int) -> [^]^CameraSpec --- GetCameraName :: proc(instance_id: CameraID) -> cstring --- GetCameraPosition :: proc(instance_id: CameraID) -> CameraPosition --- OpenCamera :: proc(instance_id: CameraID, spec: ^CameraSpec) -> ^Camera --- diff --git a/vendor/sdl3/sdl3_gpu.odin b/vendor/sdl3/sdl3_gpu.odin index da561b62b..ec414f98e 100644 --- a/vendor/sdl3/sdl3_gpu.odin +++ b/vendor/sdl3/sdl3_gpu.odin @@ -516,7 +516,7 @@ GPUVertexBufferDescription :: struct { slot: Uint32, /**< The binding slot of the vertex buffer. */ pitch: Uint32, /**< The byte pitch between consecutive elements of the vertex buffer. */ input_rate: GPUVertexInputRate, /**< Whether attribute addressing is a function of the vertex index or instance index. */ - instance_step_rate: Uint32, /**< The number of instances to draw using the same per-instance data before advancing in the instance buffer by one element. Ignored unless input_rate is GPU_VERTEXINPUTRATE_INSTANCE */ + instance_step_rate: Uint32, /**< Reserved for future use. Must be set to 0. */ } GPUVertexAttribute :: struct { @@ -611,8 +611,8 @@ GPURasterizerState :: struct { GPUMultisampleState :: struct { sample_count: GPUSampleCount, /**< The number of samples to be used in rasterization. */ - sample_mask: Uint32, /**< Determines which samples get updated in the render targets. Treated as 0xFFFFFFFF if enable_mask is false. */ - enable_mask: bool, /**< Enables sample masking. */ + sample_mask: Uint32, /**< Reserved for future use. Must be set to 0. */ + enable_mask: bool, /**< Reserved for future use. Must be set to false. */ _: Uint8, _: Uint8, _: Uint8, diff --git a/vendor/sdl3/sdl3_hints.odin b/vendor/sdl3/sdl3_hints.odin index a7398a124..987010c57 100644 --- a/vendor/sdl3/sdl3_hints.odin +++ b/vendor/sdl3/sdl3_hints.odin @@ -123,6 +123,7 @@ HINT_JOYSTICK_WGI :: "SDL_JOYSTICK_WGI" HINT_JOYSTICK_WHEEL_DEVICES :: "SDL_JOYSTICK_WHEEL_DEVICES" HINT_JOYSTICK_WHEEL_DEVICES_EXCLUDED :: "SDL_JOYSTICK_WHEEL_DEVICES_EXCLUDED" HINT_JOYSTICK_ZERO_CENTERED_DEVICES :: "SDL_JOYSTICK_ZERO_CENTERED_DEVICES" +HINT_JOYSTICK_HAPTIC_AXES :: "SDL_JOYSTICK_HAPTIC_AXES" HINT_KEYCODE_OPTIONS :: "SDL_KEYCODE_OPTIONS" HINT_KMSDRM_DEVICE_INDEX :: "SDL_KMSDRM_DEVICE_INDEX" HINT_KMSDRM_REQUIRE_DRM_MASTER :: "SDL_KMSDRM_REQUIRE_DRM_MASTER" @@ -197,6 +198,7 @@ HINT_VIDEO_WAYLAND_MODE_SCALING :: "SDL_VIDEO_WAYLAND_MODE_SCALING" HINT_VIDEO_WAYLAND_PREFER_LIBDECOR :: "SDL_VIDEO_WAYLAND_PREFER_LIBDECOR" HINT_VIDEO_WAYLAND_SCALE_TO_DISPLAY :: "SDL_VIDEO_WAYLAND_SCALE_TO_DISPLAY" HINT_VIDEO_WIN_D3DCOMPILER :: "SDL_VIDEO_WIN_D3DCOMPILER" +HINT_VIDEO_X11_EXTERNAL_WINDOW_INPUT :: "SDL_VIDEO_X11_EXTERNAL_WINDOW_INPUT" HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR :: "SDL_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR" HINT_VIDEO_X11_NET_WM_PING :: "SDL_VIDEO_X11_NET_WM_PING" HINT_VIDEO_X11_NODIRECTCOLOR :: "SDL_VIDEO_X11_NODIRECTCOLOR" diff --git a/vendor/sdl3/sdl3_pixels.odin b/vendor/sdl3/sdl3_pixels.odin index a9dec79fb..a636d13ff 100644 --- a/vendor/sdl3/sdl3_pixels.odin +++ b/vendor/sdl3/sdl3_pixels.odin @@ -277,6 +277,8 @@ PixelFormat :: enum c.int { /* SDL_DEFINE_PIXELFOURCC('P', '0', '1', '0'), */ EXTERNAL_OES = 0x2053454f, /**< Android video texture format */ /* SDL_DEFINE_PIXELFOURCC('O', 'E', 'S', ' ') */ + MJPG = 0x47504a4d, /**< Motion JPEG */ + /* SDL_DEFINE_PIXELFOURCC('M', 'J', 'P', 'G') */ /* Aliases for RGBA byte arrays of color data, for the current platform */ RGBA32 = RGBA8888 when BYTEORDER == BIG_ENDIAN else ABGR8888, diff --git a/vendor/sdl3/sdl3_surface.odin b/vendor/sdl3/sdl3_surface.odin index 7abbd4ab5..526940147 100644 --- a/vendor/sdl3/sdl3_surface.odin +++ b/vendor/sdl3/sdl3_surface.odin @@ -22,6 +22,7 @@ MUSTLOCK :: proc "c" (S: ^Surface) -> bool { } ScaleMode :: enum c.int { + INVALID = -1, NEAREST, /**< nearest pixel sampling */ LINEAR, /**< linear filtering */ } @@ -45,6 +46,11 @@ Surface :: struct { reserved: rawptr, /**< Reserved for internal use */ } +PROP_SURFACE_SDR_WHITE_POINT_FLOAT :: "SDL.surface.SDR_white_point" +PROP_SURFACE_HDR_HEADROOM_FLOAT :: "SDL.surface.HDR_headroom" +PROP_SURFACE_TONEMAP_OPERATOR_STRING :: "SDL.surface.tonemap" +PROP_SURFACE_HOTSPOT_X_NUMBER :: "SDL.surface.hotspot.x" +PROP_SURFACE_HOTSPOT_Y_NUMBER :: "SDL.surface.hotspot.y" @(default_calling_convention="c", link_prefix="SDL_") foreign lib { @@ -96,6 +102,7 @@ foreign lib { BlitSurfaceUnchecked :: proc(src: ^Surface, srcrect: Maybe(^Rect), dst: ^Surface, dstrect: Maybe(^Rect)) -> bool --- BlitSurfaceScaled :: proc(src: ^Surface, srcrect: Maybe(^Rect), dst: ^Surface, dstrect: Maybe(^Rect), scaleMode: ScaleMode) -> bool --- BlitSurfaceUncheckedScaled :: proc(src: ^Surface, srcrect: Maybe(^Rect), dst: ^Surface, dstrect: Maybe(^Rect), scaleMode: ScaleMode) -> bool --- + StretchSurface :: proc(src: ^Surface, srcrect: Maybe(^Rect), dst: ^Surface, dstrect: Maybe(^Rect), scaleMode: ScaleMode) -> bool --- BlitSurfaceTiled :: proc(src: ^Surface, srcrect: Maybe(^Rect), dst: ^Surface, dstrect: Maybe(^Rect)) -> bool --- BlitSurfaceTiledWithScale :: proc(src: ^Surface, srcrect: Maybe(^Rect), scale: f32, scaleMode: ScaleMode, dst: ^Surface, dstrect: Maybe(^Rect)) -> bool --- BlitSurface9Grid :: proc(src: ^Surface, srcrect: Maybe(^Rect), left_width, right_width, top_height, bottom_height: c.int, scale: f32, scaleMode: ScaleMode, dst: ^Surface, dstrect: Maybe(^Rect)) -> bool --- diff --git a/vendor/sdl3/sdl3_version.odin b/vendor/sdl3/sdl3_version.odin index 9a1f21add..9143a977b 100644 --- a/vendor/sdl3/sdl3_version.odin +++ b/vendor/sdl3/sdl3_version.odin @@ -4,7 +4,7 @@ import "core:c" MAJOR_VERSION :: 3 MINOR_VERSION :: 2 -MICRO_VERSION :: 2 +MICRO_VERSION :: 10 @(require_results) VERSIONNUM :: #force_inline proc "c" (major, minor, patch: c.int) -> c.int { return (major * 1000000) + (minor * 1000) + patch } @(require_results) VERSIONNUM_MAJOR :: #force_inline proc "c" (version: c.int) -> c.int { return version / 1000000 } diff --git a/vendor/wgpu/wgpu.js b/vendor/wgpu/wgpu.js index 055f7abab..f3d29eafd 100644 --- a/vendor/wgpu/wgpu.js +++ b/vendor/wgpu/wgpu.js @@ -528,7 +528,7 @@ class WebGPUInterface { return undefined; } - const off = this.struct(ptr); + const off = this.struct(start); return { view: this.textureViews.get(this.mem.loadPtr(off(4))), @@ -2588,7 +2588,12 @@ class WebGPUInterface { } dynamicOffsetCount = this.unwrapBigInt(dynamicOffsetCount); - const dynamicOffsets = this.array(dynamicOffsetCount, dynamicOffsetsPtr, this.mem.loadU32, 4); + const dynamicOffsets = this.array( + dynamicOffsetCount, + dynamicOffsetsPtr, + (ptr) => this.mem.loadU32(ptr), + 4 + ); renderBundleEncoder.setBindGroup(groupIndex, group, dynamicOffsets); }, @@ -2780,7 +2785,12 @@ class WebGPUInterface { } dynamicOffsetCount = this.unwrapBigInt(dynamicOffsetCount); - const dynamicOffsets = this.array(dynamicOffsetCount, dynamicOffsetsPtr, this.mem.loadU32, 4); + const dynamicOffsets = this.array( + dynamicOffsetCount, + dynamicOffsetsPtr, + (ptr) => this.mem.loadU32(ptr), + 4 + ); renderPassEncoder.setBindGroup(groupIndex, group, dynamicOffsets); }, @@ -3087,7 +3097,7 @@ class WebGPUInterface { * @param {number} surfaceCapabilitiesPtr */ wgpuSurfaceCapabilitiesFreeMembers: (surfaceCapabilitiesPtr) => { - const off = this.struct(capabilitiesPtr); + const off = this.struct(surfaceCapabilitiesPtr); off(4); // nextInChain off(8); // usages off(this.mem.intSize); // formatCount diff --git a/vendor/x11/xlib/xlib_procs.odin b/vendor/x11/xlib/xlib_procs.odin index 2d35ab179..2a8d6832b 100644 --- a/vendor/x11/xlib/xlib_procs.odin +++ b/vendor/x11/xlib/xlib_procs.odin @@ -190,8 +190,8 @@ foreign xlib { display: ^Display, window: Window, mask: WindowChangesMask, - values: XWindowChanges, - ) --- + changes: ^XWindowChanges, + ) -> i32 --- MoveWindow :: proc( display: ^Display, window: Window, @@ -217,6 +217,11 @@ foreign xlib { window: Window, width: u32, ) --- + SetWindowBorder :: proc( + display: ^Display, + window: Window, + pixel: uint, + ) --- // Window: changing stacking order RaiseWindow :: proc(display: ^Display, window: Window) --- LowerWindow :: proc(display: ^Display, window: Window) --- @@ -250,11 +255,11 @@ foreign xlib { display: ^Display, window: Window, cursor: Cursor, - ) --- + ) -> i32 --- UndefineCursor :: proc( display: ^Display, window: Window, - ) --- + ) -> i32 --- // Windows: querying information QueryTree :: proc( display: ^Display, @@ -268,7 +273,7 @@ foreign xlib { display: ^Display, window: Window, attr: ^XWindowAttributes, - ) --- + ) -> i32 --- GetGeometry :: proc( display: ^Display, drawable: Drawable, @@ -352,19 +357,19 @@ foreign xlib { mode: i32, data: rawptr, count: i32, - ) --- + ) -> i32 --- RotateWindowProperties :: proc( display: ^Display, window: Window, props: [^]Atom, nprops: i32, npos: i32, - ) --- + ) -> i32 --- DeleteProperty :: proc( display: ^Display, window: Window, prop: Atom, - ) --- + ) -> i32 --- // Selections SetSelectionOwner :: proc( display: ^Display, @@ -1007,8 +1012,8 @@ foreign xlib { DisableAccessControl :: proc(display: ^Display) --- // Events SelectInput :: proc(display: ^Display, window: Window, mask: EventMask) --- - Flush :: proc(display: ^Display) --- - Sync :: proc(display: ^Display) --- + Flush :: proc(display: ^Display) -> i32 --- + Sync :: proc(display: ^Display, discard: bool) -> i32 --- EventsQueued :: proc(display: ^Display, mode: EventQueueMode) -> i32 --- Pending :: proc(display: ^Display) -> i32 --- NextEvent :: proc(display: ^Display, event: ^XEvent) ---