diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin index ca458e079..a103f6fc7 100644 --- a/core/crypto/_fiat/field_poly1305/field.odin +++ b/core/crypto/_fiat/field_poly1305/field.odin @@ -1,6 +1,6 @@ package field_poly1305 -import "core:crypto/util" +import "core:encoding/endian" import "core:mem" fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element { @@ -11,7 +11,7 @@ fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) return transmute(^Tight_Field_Element)(arg1) } -fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte, sanitize: bool = true) { +fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) { // fiat-crypto's deserialization routine effectively processes a // single byte at a time, and wants 256-bits of input for a value // that will be 128-bits or 129-bits. @@ -22,42 +22,29 @@ fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, a assert(len(arg1) == 16) - when ODIN_ARCH == .i386 || ODIN_ARCH == .amd64 { - // While it may be unwise to do deserialization here on our - // own when fiat-crypto provides equivalent functionality, - // doing it this way provides a little under 3x performance - // improvement when optimization is enabled. - src_p := transmute(^[2]u64)(&arg1[0]) - lo := src_p[0] - hi := src_p[1] + // While it may be unwise to do deserialization here on our + // own when fiat-crypto provides equivalent functionality, + // doing it this way provides a little under 3x performance + // improvement when optimization is enabled. + lo := endian.unchecked_get_u64le(arg1[0:]) + hi := endian.unchecked_get_u64le(arg1[8:]) - // This is inspired by poly1305-donna, though adjustments were - // made since a Tight_Field_Element's limbs are 44-bits, 43-bits, - // and 43-bits wide. - // - // Note: This could be transplated into fe_from_u64s, but that - // code is called once per MAC, and is non-criticial path. - hibit := u64(arg2) << 41 // arg2 << 128 - out1[0] = lo & 0xfffffffffff - out1[1] = ((lo >> 44) | (hi << 20)) & 0x7ffffffffff - out1[2] = ((hi >> 23) & 0x7ffffffffff) | hibit - } else { - tmp: [32]byte - copy_slice(tmp[0:16], arg1[:]) - tmp[16] = arg2 - - _fe_from_bytes(out1, &tmp) - if sanitize { - // This is used to deserialize `s` which is confidential. - mem.zero_explicit(&tmp, size_of(tmp)) - } - } + // This is inspired by poly1305-donna, though adjustments were + // made since a Tight_Field_Element's limbs are 44-bits, 43-bits, + // and 43-bits wide. + // + // Note: This could be transplated into fe_from_u64s, but that + // code is called once per MAC, and is non-criticial path. + hibit := u64(arg2) << 41 // arg2 << 128 + out1[0] = lo & 0xfffffffffff + out1[1] = ((lo >> 44) | (hi << 20)) & 0x7ffffffffff + out1[2] = ((hi >> 23) & 0x7ffffffffff) | hibit } fe_from_u64s :: proc "contextless" (out1: ^Tight_Field_Element, lo, hi: u64) { tmp: [32]byte - util.PUT_U64_LE(tmp[0:8], lo) - util.PUT_U64_LE(tmp[8:16], hi) + endian.unchecked_put_u64le(tmp[0:], lo) + endian.unchecked_put_u64le(tmp[8:], hi) _fe_from_bytes(out1, &tmp) diff --git a/core/crypto/poly1305/poly1305.odin b/core/crypto/poly1305/poly1305.odin index ab320c80c..cf60f7166 100644 --- a/core/crypto/poly1305/poly1305.odin +++ b/core/crypto/poly1305/poly1305.odin @@ -1,8 +1,8 @@ package poly1305 import "core:crypto" -import "core:crypto/util" import field "core:crypto/_fiat/field_poly1305" +import "core:encoding/endian" import "core:mem" KEY_SIZE :: 32 @@ -52,8 +52,8 @@ init :: proc (ctx: ^Context, key: []byte) { // r = le_bytes_to_num(key[0..15]) // r = clamp(r) (r &= 0xffffffc0ffffffc0ffffffc0fffffff) - tmp_lo := util.U64_LE(key[0:8]) & 0x0ffffffc0fffffff - tmp_hi := util.U64_LE(key[8:16]) & 0xffffffc0ffffffc + tmp_lo := endian.unchecked_get_u64le(key[0:]) & 0x0ffffffc0fffffff + tmp_hi := endian.unchecked_get_u64le(key[8:]) & 0xffffffc0ffffffc field.fe_from_u64s(&ctx._r, tmp_lo, tmp_hi) // s = le_bytes_to_num(key[16..31]) @@ -151,7 +151,7 @@ _blocks :: proc (ctx: ^Context, msg: []byte, final := false) { data_len := len(data) for data_len >= _BLOCK_SIZE { // n = le_bytes_to_num(msg[((i-1)*16)..*i*16] | [0x01]) - field.fe_from_bytes(&n, data[:_BLOCK_SIZE], final_byte, false) + field.fe_from_bytes(&n, data[:_BLOCK_SIZE], final_byte) // a += n field.fe_add(field.fe_relax_cast(&ctx._a), &ctx._a, &n) // _a unreduced