From d72db2698bdb7b976bfdee075088d6ec697dafd9 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sat, 8 Apr 2023 09:57:47 +0900 Subject: [PATCH 1/3] core/crypto/_fiat: Hedge against LLVM cleverness Recent LLVM is getting smart to the point where the optimizer can change a traditional constant-time conditional swap into a pointer swap. Ensure that this does not happen by force-disabling optimization. Additionally, disable inlining the relevant routines such that manual inspection in optimized builds is still reasonably easy to do. --- core/crypto/_fiat/fiat.odin | 6 ++++-- core/crypto/_fiat/field_curve25519/field51.odin | 6 ++++-- core/crypto/_fiat/field_poly1305/field4344.odin | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/core/crypto/_fiat/fiat.odin b/core/crypto/_fiat/fiat.odin index ae9727149..f0551722f 100644 --- a/core/crypto/_fiat/fiat.odin +++ b/core/crypto/_fiat/fiat.odin @@ -9,14 +9,16 @@ package fiat u1 :: distinct u8 i1 :: distinct i8 -cmovznz_u64 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) { +@(optimization_mode="none") +cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) { x1 := (u64(arg1) * 0xffffffffffffffff) x2 := ((x1 & arg3) | ((~x1) & arg2)) out1 = x2 return } -cmovznz_u32 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) { +@(optimization_mode="none") +cmovznz_u32 :: proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) { x1 := (u32(arg1) * 0xffffffff) x2 := ((x1 & arg3) | ((~x1) & arg2)) out1 = x2 diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin index e4ca98b57..0be94eb51 100644 --- a/core/crypto/_fiat/field_curve25519/field51.odin +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -305,7 +305,8 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele out1[4] = x5 } -fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) { +@(optimization_mode="none") +fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) { x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0]) x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1]) x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2]) @@ -596,7 +597,8 @@ fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) { out1[4] = x5 } -fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) { +@(optimization_mode="none") +fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) { mask := -u64(arg1) x := (out1[0] ~ out2[0]) & mask x1, y1 := out1[0] ~ x, out2[0] ~ x diff --git a/core/crypto/_fiat/field_poly1305/field4344.odin b/core/crypto/_fiat/field_poly1305/field4344.odin index ba9bc2694..8e8a7cc78 100644 --- a/core/crypto/_fiat/field_poly1305/field4344.odin +++ b/core/crypto/_fiat/field_poly1305/field4344.odin @@ -201,7 +201,8 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele out1[2] = x3 } -fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) { +@(optimization_mode="none") +fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) { x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0]) x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1]) x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2]) @@ -342,7 +343,8 @@ fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) { out1[2] = x3 } -fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) { +@(optimization_mode="none") +fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) { mask := -u64(arg1) x := (out1[0] ~ out2[0]) & mask x1, y1 := out1[0] ~ x, out2[0] ~ x From b8c2b0105b42f81f44ad0553a0d28f477cdefcb7 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sat, 8 Apr 2023 10:11:04 +0900 Subject: [PATCH 2/3] core/crypto: Disable optimization for the ct byte compare Hedge against the possibility of a compiler getting clever enough to optimize this pattern as well. --- core/crypto/crypto.odin | 1 + 1 file changed, 1 insertion(+) diff --git a/core/crypto/crypto.odin b/core/crypto/crypto.odin index 35e88c5ed..6cdcacb9c 100644 --- a/core/crypto/crypto.odin +++ b/core/crypto/crypto.odin @@ -26,6 +26,7 @@ compare_constant_time :: proc "contextless" (a, b: []byte) -> int { // // The execution time of this routine is constant regardless of the // contents of the memory being compared. +@(optimization_mode="none") compare_byte_ptrs_constant_time :: proc "contextless" (a, b: ^byte, n: int) -> int { x := mem.slice_ptr(a, n) y := mem.slice_ptr(b, n) From 7fc208154389f1923ebc10906e84b23c1af2b3c5 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sat, 8 Apr 2023 10:15:00 +0900 Subject: [PATCH 3/3] core/crypto: Add private attributes for internals These constants and internal routines are not intended for use outside the actual implementations themselves. --- core/crypto/chacha20/chacha20.odin | 10 ++++++++++ core/crypto/chacha20poly1305/chacha20poly1305.odin | 5 +++++ core/crypto/poly1305/poly1305.odin | 2 ++ core/crypto/x25519/x25519.odin | 3 +++ tests/core/crypto/test_core_crypto_modern.odin | 8 +++++++- 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/core/crypto/chacha20/chacha20.odin b/core/crypto/chacha20/chacha20.odin index 229949c22..b29dc1228 100644 --- a/core/crypto/chacha20/chacha20.odin +++ b/core/crypto/chacha20/chacha20.odin @@ -8,15 +8,23 @@ KEY_SIZE :: 32 NONCE_SIZE :: 12 XNONCE_SIZE :: 24 +@(private) _MAX_CTR_IETF :: 0xffffffff +@(private) _BLOCK_SIZE :: 64 +@(private) _STATE_SIZE_U32 :: 16 +@(private) _ROUNDS :: 20 +@(private) _SIGMA_0 : u32 : 0x61707865 +@(private) _SIGMA_1 : u32 : 0x3320646e +@(private) _SIGMA_2 : u32 : 0x79622d32 +@(private) _SIGMA_3 : u32 : 0x6b206574 Context :: struct { @@ -179,6 +187,7 @@ reset :: proc (ctx: ^Context) { ctx._is_initialized = false } +@(private) _do_blocks :: proc (ctx: ^Context, dst, src: []byte, nr_blocks: int) { // Enforce the maximum consumed keystream per nonce. // @@ -441,6 +450,7 @@ _do_blocks :: proc (ctx: ^Context, dst, src: []byte, nr_blocks: int) { } } +@(private) _hchacha20 :: proc (dst, key, nonce: []byte) { x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3 x4 := util.U32_LE(key[0:4]) diff --git a/core/crypto/chacha20poly1305/chacha20poly1305.odin b/core/crypto/chacha20poly1305/chacha20poly1305.odin index 67d89df56..ae395f9e0 100644 --- a/core/crypto/chacha20poly1305/chacha20poly1305.odin +++ b/core/crypto/chacha20poly1305/chacha20poly1305.odin @@ -10,8 +10,10 @@ KEY_SIZE :: chacha20.KEY_SIZE NONCE_SIZE :: chacha20.NONCE_SIZE TAG_SIZE :: poly1305.TAG_SIZE +@(private) _P_MAX :: 64 * 0xffffffff // 64 * (2^32-1) +@(private) _validate_common_slice_sizes :: proc (tag, key, nonce, aad, text: []byte) { if len(tag) != TAG_SIZE { panic("crypto/chacha20poly1305: invalid destination tag size") @@ -37,7 +39,10 @@ _validate_common_slice_sizes :: proc (tag, key, nonce, aad, text: []byte) { } } +@(private) _PAD: [16]byte + +@(private) _update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) { if pad_len := 16 - (x_len & (16-1)); pad_len != 16 { poly1305.update(ctx, _PAD[:pad_len]) diff --git a/core/crypto/poly1305/poly1305.odin b/core/crypto/poly1305/poly1305.odin index 8986be879..ab320c80c 100644 --- a/core/crypto/poly1305/poly1305.odin +++ b/core/crypto/poly1305/poly1305.odin @@ -8,6 +8,7 @@ import "core:mem" KEY_SIZE :: 32 TAG_SIZE :: 16 +@(private) _BLOCK_SIZE :: 16 sum :: proc (dst, msg, key: []byte) { @@ -141,6 +142,7 @@ reset :: proc (ctx: ^Context) { ctx._is_initialized = false } +@(private) _blocks :: proc (ctx: ^Context, msg: []byte, final := false) { n: field.Tight_Field_Element = --- final_byte := byte(!final) diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin index dfc8daa47..fc446d25c 100644 --- a/core/crypto/x25519/x25519.odin +++ b/core/crypto/x25519/x25519.odin @@ -6,8 +6,10 @@ import "core:mem" SCALAR_SIZE :: 32 POINT_SIZE :: 32 +@(private) _BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +@(private) _scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 { if i < 0 { return 0 @@ -15,6 +17,7 @@ _scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 { return (s[i>>3] >> uint(i&7)) & 1 } +@(private) _scalarmult :: proc (out, scalar, point: ^[32]byte) { // Montgomery pseduo-multiplication taken from Monocypher. diff --git a/tests/core/crypto/test_core_crypto_modern.odin b/tests/core/crypto/test_core_crypto_modern.odin index 6d1ae0047..1d76b715b 100644 --- a/tests/core/crypto/test_core_crypto_modern.odin +++ b/tests/core/crypto/test_core_crypto_modern.odin @@ -269,6 +269,12 @@ TestECDH :: struct { test_x25519 :: proc(t: ^testing.T) { log(t, "Testing X25519") + // Local copy of this so that the base point doesn't need to be exported. + _BASE_POINT: [32]byte = { + 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + } + test_vectors := [?]TestECDH { // Test vectors from RFC 7748 TestECDH{ @@ -295,7 +301,7 @@ test_x25519 :: proc(t: ^testing.T) { // Abuse the test vectors to sanity-check the scalar-basepoint multiply. p1, p2: [x25519.POINT_SIZE]byte x25519.scalarmult_basepoint(p1[:], scalar[:]) - x25519.scalarmult(p2[:], scalar[:], x25519._BASE_POINT[:]) + x25519.scalarmult(p2[:], scalar[:], _BASE_POINT[:]) p1_str, p2_str := hex_string(p1[:]), hex_string(p2[:]) expect(t, p1_str == p2_str, fmt.tprintf("Expected %s for %s * basepoint, but got %s instead", p2_str, v.scalar, p1_str)) }