mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-19 04:12:22 -07:00
Merge pull request #4124 from Yawning/feature/crypto
core/crypto: More improvements
This commit is contained in:
@@ -25,4 +25,5 @@ GHASH_BLOCK_SIZE :: 16
|
||||
GHASH_TAG_SIZE :: 16
|
||||
|
||||
// RCON is the AES keyschedule round constants.
|
||||
@(rodata)
|
||||
RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
|
||||
package aes_ct64
|
||||
|
||||
import "base:intrinsics"
|
||||
|
||||
// Bitsliced AES for 64-bit general purpose (integer) registers. Each
|
||||
// invocation will process up to 4 blocks at a time. This implementation
|
||||
// is derived from the BearSSL ct64 code, and distributed under a 1-clause
|
||||
@@ -212,11 +210,8 @@ orthogonalize :: proc "contextless" (q: ^[8]u64) {
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
|
||||
if len(w) < 4 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
|
||||
interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check {
|
||||
x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3)
|
||||
x0 |= (x0 << 16)
|
||||
x1 |= (x1 << 16)
|
||||
x2 |= (x2 << 16)
|
||||
|
||||
@@ -22,12 +22,8 @@
|
||||
|
||||
package aes_ct64
|
||||
|
||||
import "base:intrinsics"
|
||||
|
||||
add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check {
|
||||
if len(sk) < 8 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(sk) >= 8, "aes/ct64: invalid round key size")
|
||||
|
||||
q[0] ~= sk[0]
|
||||
q[1] ~= sk[1]
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
|
||||
package aes_ct64
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_aes"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
@@ -42,7 +41,7 @@ sub_word :: proc "contextless" (x: u32) -> u32 {
|
||||
}
|
||||
|
||||
@(private, require_results)
|
||||
keysched :: proc(comp_skey: []u64, key: []byte) -> int {
|
||||
keysched :: proc "contextless" (comp_skey: []u64, key: []byte) -> int {
|
||||
num_rounds, key_len := 0, len(key)
|
||||
switch key_len {
|
||||
case _aes.KEY_SIZE_128:
|
||||
@@ -52,7 +51,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {
|
||||
case _aes.KEY_SIZE_256:
|
||||
num_rounds = _aes.ROUNDS_256
|
||||
case:
|
||||
panic("crypto/aes: invalid AES key size")
|
||||
panic_contextless("crypto/aes: invalid AES key size")
|
||||
}
|
||||
|
||||
skey: [60]u32 = ---
|
||||
@@ -78,7 +77,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {
|
||||
|
||||
q: [8]u64 = ---
|
||||
for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
|
||||
q[0], q[4] = interleave_in(skey[i:])
|
||||
q[0], q[4] = interleave_in(skey[i], skey[i+1], skey[i+2], skey[i+3])
|
||||
q[1] = q[0]
|
||||
q[2] = q[0]
|
||||
q[3] = q[0]
|
||||
@@ -123,57 +122,3 @@ skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
|
||||
skey[v + 3] = (x3 << 4) - x3
|
||||
}
|
||||
}
|
||||
|
||||
orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
|
||||
if len(qq) < 8 || len(key) != 16 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
|
||||
skey: [4]u32 = ---
|
||||
skey[0] = endian.unchecked_get_u32le(key[0:])
|
||||
skey[1] = endian.unchecked_get_u32le(key[4:])
|
||||
skey[2] = endian.unchecked_get_u32le(key[8:])
|
||||
skey[3] = endian.unchecked_get_u32le(key[12:])
|
||||
|
||||
q: [8]u64 = ---
|
||||
q[0], q[4] = interleave_in(skey[:])
|
||||
q[1] = q[0]
|
||||
q[2] = q[0]
|
||||
q[3] = q[0]
|
||||
q[5] = q[4]
|
||||
q[6] = q[4]
|
||||
q[7] = q[4]
|
||||
orthogonalize(&q)
|
||||
|
||||
comp_skey: [2]u64 = ---
|
||||
comp_skey[0] =
|
||||
(q[0] & 0x1111111111111111) |
|
||||
(q[1] & 0x2222222222222222) |
|
||||
(q[2] & 0x4444444444444444) |
|
||||
(q[3] & 0x8888888888888888)
|
||||
comp_skey[1] =
|
||||
(q[4] & 0x1111111111111111) |
|
||||
(q[5] & 0x2222222222222222) |
|
||||
(q[6] & 0x4444444444444444) |
|
||||
(q[7] & 0x8888888888888888)
|
||||
|
||||
for x, u in comp_skey {
|
||||
x0 := x
|
||||
x1, x2, x3 := x0, x0, x0
|
||||
x0 &= 0x1111111111111111
|
||||
x1 &= 0x2222222222222222
|
||||
x2 &= 0x4444444444444444
|
||||
x3 &= 0x8888888888888888
|
||||
x1 >>= 1
|
||||
x2 >>= 2
|
||||
x3 >>= 3
|
||||
qq[u * 4 + 0] = (x0 << 4) - x0
|
||||
qq[u * 4 + 1] = (x1 << 4) - x1
|
||||
qq[u * 4 + 2] = (x2 << 4) - x2
|
||||
qq[u * 4 + 3] = (x3 << 4) - x3
|
||||
}
|
||||
|
||||
mem.zero_explicit(&skey, size_of(skey))
|
||||
mem.zero_explicit(&q, size_of(q))
|
||||
mem.zero_explicit(&comp_skey, size_of(comp_skey))
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
|
||||
package aes_ct64
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_aes"
|
||||
import "core:encoding/endian"
|
||||
|
||||
@@ -64,9 +63,8 @@ rev64 :: proc "contextless" (x: u64) -> u64 {
|
||||
// Note: `dst` is both an input and an output, to support easy implementation
|
||||
// of GCM.
|
||||
ghash :: proc "contextless" (dst, key, data: []byte) {
|
||||
if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(dst) == _aes.GHASH_BLOCK_SIZE)
|
||||
ensure_contextless(len(key) == _aes.GHASH_BLOCK_SIZE)
|
||||
|
||||
buf := data
|
||||
l := len(buf)
|
||||
|
||||
@@ -1,60 +1,61 @@
|
||||
package aes_ct64
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_aes"
|
||||
import "core:encoding/endian"
|
||||
|
||||
load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
|
||||
if len(src) != _aes.BLOCK_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
|
||||
w: [4]u32 = ---
|
||||
w[0] = endian.unchecked_get_u32le(src[0:])
|
||||
w[1] = endian.unchecked_get_u32le(src[4:])
|
||||
w[2] = endian.unchecked_get_u32le(src[8:])
|
||||
w[3] = endian.unchecked_get_u32le(src[12:])
|
||||
q[0], q[4] = interleave_in(w[:])
|
||||
orthogonalize(q)
|
||||
@(require_results)
|
||||
load_interleaved :: proc "contextless" (src: []byte) -> (u64, u64) #no_bounds_check {
|
||||
w0 := endian.unchecked_get_u32le(src[0:])
|
||||
w1 := endian.unchecked_get_u32le(src[4:])
|
||||
w2 := endian.unchecked_get_u32le(src[8:])
|
||||
w3 := endian.unchecked_get_u32le(src[12:])
|
||||
return interleave_in(w0, w1, w2, w3)
|
||||
}
|
||||
|
||||
store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
|
||||
if len(dst) != _aes.BLOCK_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
|
||||
orthogonalize(q)
|
||||
w0, w1, w2, w3 := interleave_out(q[0], q[4])
|
||||
store_interleaved :: proc "contextless" (dst: []byte, a0, a1: u64) #no_bounds_check {
|
||||
w0, w1, w2, w3 := interleave_out(a0, a1)
|
||||
endian.unchecked_put_u32le(dst[0:], w0)
|
||||
endian.unchecked_put_u32le(dst[4:], w1)
|
||||
endian.unchecked_put_u32le(dst[8:], w2)
|
||||
endian.unchecked_put_u32le(dst[12:], w3)
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
xor_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
|
||||
return a0 ~ b0, a1 ~ b1
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
and_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
|
||||
return a0 & b0, a1 & b1
|
||||
}
|
||||
|
||||
load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
|
||||
ensure_contextless(len(src) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
|
||||
|
||||
q[0], q[4] = #force_inline load_interleaved(src)
|
||||
orthogonalize(q)
|
||||
}
|
||||
|
||||
store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
|
||||
ensure_contextless(len(dst) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
|
||||
|
||||
orthogonalize(q)
|
||||
#force_inline store_interleaved(dst, q[0], q[4])
|
||||
}
|
||||
|
||||
load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
|
||||
if n := len(src); n > STRIDE || n == 0 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(src) == 0 || len(src) <= STRIDE, "aes/ct64: invalid block(s) size")
|
||||
|
||||
w: [4]u32 = ---
|
||||
for s, i in src {
|
||||
if len(s) != _aes.BLOCK_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
|
||||
w[0] = endian.unchecked_get_u32le(s[0:])
|
||||
w[1] = endian.unchecked_get_u32le(s[4:])
|
||||
w[2] = endian.unchecked_get_u32le(s[8:])
|
||||
w[3] = endian.unchecked_get_u32le(s[12:])
|
||||
q[i], q[i + 4] = interleave_in(w[:])
|
||||
ensure_contextless(len(s) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
|
||||
q[i], q[i + 4] = #force_inline load_interleaved(s)
|
||||
}
|
||||
orthogonalize(q)
|
||||
}
|
||||
|
||||
store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
|
||||
if n := len(dst); n > STRIDE || n == 0 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(dst) == 0 || len(dst) <= STRIDE, "aes/ct64: invalid block(s) size")
|
||||
|
||||
orthogonalize(q)
|
||||
for d, i in dst {
|
||||
@@ -62,14 +63,7 @@ store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
|
||||
if d == nil {
|
||||
break
|
||||
}
|
||||
if len(d) != _aes.BLOCK_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
|
||||
w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
|
||||
endian.unchecked_put_u32le(d[0:], w0)
|
||||
endian.unchecked_put_u32le(d[4:], w1)
|
||||
endian.unchecked_put_u32le(d[8:], w2)
|
||||
endian.unchecked_put_u32le(d[12:], w3)
|
||||
ensure_contextless(len(d) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
|
||||
#force_inline store_interleaved(d, q[i], q[i + 4])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
|
||||
// that it is right-shifted by 1 bit. The left-shift is relatively
|
||||
// inexpensive, and it can be mutualised.
|
||||
//
|
||||
// Since SSE2 opcodes do not have facilities for shitfting full 128-bit
|
||||
// Since SSE2 opcodes do not have facilities for shifting full 128-bit
|
||||
// values with bit precision, we have to break down values into 64-bit
|
||||
// chunks. We number chunks from 0 to 3 in left to right order.
|
||||
|
||||
@@ -155,7 +155,7 @@ square_f128 :: #force_inline proc "contextless" (kw: x86.__m128i) -> (x86.__m128
|
||||
@(enable_target_feature = "sse2,ssse3,pclmul")
|
||||
ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
|
||||
if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
|
||||
intrinsics.trap()
|
||||
panic_contextless("aes/ghash: invalid dst or key size")
|
||||
}
|
||||
|
||||
// Note: BearSSL opts to copy the remainder into a zero-filled
|
||||
|
||||
@@ -18,6 +18,8 @@ BLAKE2S_SIZE :: 32
|
||||
BLAKE2B_BLOCK_SIZE :: 128
|
||||
BLAKE2B_SIZE :: 64
|
||||
|
||||
MAX_SIZE :: 255
|
||||
|
||||
Blake2s_Context :: struct {
|
||||
h: [8]u32,
|
||||
t: [2]u32,
|
||||
@@ -68,13 +70,13 @@ Blake2_Tree :: struct {
|
||||
is_last_node: bool,
|
||||
}
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
BLAKE2S_IV := [8]u32 {
|
||||
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
|
||||
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
|
||||
}
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
BLAKE2B_IV := [8]u64 {
|
||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
|
||||
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
|
||||
@@ -82,16 +84,13 @@ BLAKE2B_IV := [8]u64 {
|
||||
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
|
||||
}
|
||||
|
||||
init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
|
||||
init :: proc "contextless" (ctx: ^$T, cfg: ^Blake2_Config) {
|
||||
when T == Blake2s_Context {
|
||||
max_size :: BLAKE2S_SIZE
|
||||
} else when T == Blake2b_Context {
|
||||
max_size :: BLAKE2B_SIZE
|
||||
}
|
||||
|
||||
if cfg.size > max_size {
|
||||
panic("blake2: requested output size exceeeds algorithm max")
|
||||
}
|
||||
ensure_contextless(cfg.size <= max_size, "blake2: requested output size exceeeds algorithm max")
|
||||
|
||||
// To save having to allocate a scratch buffer, use the internal
|
||||
// data buffer (`ctx.x`), as it is exactly the correct size.
|
||||
@@ -167,8 +166,8 @@ init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
|
||||
ctx.is_initialized = true
|
||||
}
|
||||
|
||||
update :: proc(ctx: ^$T, p: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
update :: proc "contextless" (ctx: ^$T, p: []byte) {
|
||||
ensure_contextless(ctx.is_initialized)
|
||||
|
||||
p := p
|
||||
when T == Blake2s_Context {
|
||||
@@ -195,8 +194,8 @@ update :: proc(ctx: ^$T, p: []byte) {
|
||||
ctx.nx += copy(ctx.x[ctx.nx:], p)
|
||||
}
|
||||
|
||||
final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
|
||||
assert(ctx.is_initialized)
|
||||
final :: proc "contextless" (ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
|
||||
ensure_contextless(ctx.is_initialized)
|
||||
|
||||
ctx := ctx
|
||||
if finalize_clone {
|
||||
@@ -206,24 +205,19 @@ final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
|
||||
}
|
||||
defer(reset(ctx))
|
||||
|
||||
ensure_contextless(len(hash) >= int(ctx.size), "crypto/blake2: invalid destination digest size")
|
||||
when T == Blake2s_Context {
|
||||
if len(hash) < int(ctx.size) {
|
||||
panic("crypto/blake2s: invalid destination digest size")
|
||||
}
|
||||
blake2s_final(ctx, hash)
|
||||
} else when T == Blake2b_Context {
|
||||
if len(hash) < int(ctx.size) {
|
||||
panic("crypto/blake2b: invalid destination digest size")
|
||||
}
|
||||
blake2b_final(ctx, hash)
|
||||
}
|
||||
}
|
||||
|
||||
clone :: proc(ctx, other: ^$T) {
|
||||
clone :: proc "contextless" (ctx, other: ^$T) {
|
||||
ctx^ = other^
|
||||
}
|
||||
|
||||
reset :: proc(ctx: ^$T) {
|
||||
reset :: proc "contextless" (ctx: ^$T) {
|
||||
if !ctx.is_initialized {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
package _chacha20
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
import "core:mem"
|
||||
@@ -46,9 +45,8 @@ Context :: struct {
|
||||
// derivation is expected to be handled by the caller, so that the
|
||||
// HChaCha call can be suitably accelerated.
|
||||
init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {
|
||||
if len(key) != KEY_SIZE || len(iv) != IV_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(key) == KEY_SIZE, "chacha20: invalid key size")
|
||||
ensure_contextless(len(iv) == IV_SIZE, "chacha20: invalid key size")
|
||||
|
||||
k, n := key, iv
|
||||
|
||||
@@ -76,12 +74,10 @@ init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {
|
||||
|
||||
// seek seeks the (X)ChaCha20 stream counter to the specified block.
|
||||
seek :: proc(ctx: ^Context, block_nr: u64) {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
if ctx._is_ietf_flavor {
|
||||
if block_nr > MAX_CTR_IETF {
|
||||
panic("crypto/chacha20: attempted to seek past maximum counter")
|
||||
}
|
||||
ensure(block_nr <= MAX_CTR_IETF, "crypto/chacha20: attempted to seek past maximum counter")
|
||||
} else {
|
||||
ctx._s[13] = u32(block_nr >> 32)
|
||||
}
|
||||
@@ -102,7 +98,7 @@ check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
//
|
||||
// While all modern "standard" definitions of ChaCha20 use
|
||||
// the IETF 32-bit counter, for XChaCha20 most common
|
||||
// the IETF 32-bit counter, for XChaCha20 historical
|
||||
// implementations allow for a 64-bit counter.
|
||||
//
|
||||
// Honestly, the answer here is "use a MRAE primitive", but
|
||||
@@ -110,14 +106,14 @@ check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {
|
||||
|
||||
ERR_CTR_EXHAUSTED :: "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached"
|
||||
|
||||
ctr_ok: bool
|
||||
if ctx._is_ietf_flavor {
|
||||
if u64(ctx._s[12]) + u64(nr_blocks) > MAX_CTR_IETF {
|
||||
panic(ERR_CTR_EXHAUSTED)
|
||||
}
|
||||
ctr_ok = u64(ctx._s[12]) + u64(nr_blocks) <= MAX_CTR_IETF
|
||||
} else {
|
||||
ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
|
||||
if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
|
||||
panic(ERR_CTR_EXHAUSTED)
|
||||
}
|
||||
_, carry := bits.add_u64(ctr, u64(nr_blocks), 0)
|
||||
ctr_ok = carry == 0
|
||||
}
|
||||
|
||||
ensure(ctr_ok, "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached")
|
||||
}
|
||||
|
||||
@@ -29,11 +29,24 @@ when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
|
||||
// explicitly using simd.u8x16 shuffles.
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: "sse2,ssse3"
|
||||
} else when ODIN_ARCH == .riscv64 {
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: "v"
|
||||
} else {
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: ""
|
||||
}
|
||||
|
||||
// Some targets lack runtime feature detection, and will flat out refuse
|
||||
// to load binaries that have unknown instructions. This is distinct from
|
||||
// `simd.IS_EMULATED` as actually good designs support runtime feature
|
||||
// detection and that constant establishes a baseline.
|
||||
//
|
||||
// See:
|
||||
// - https://github.com/WebAssembly/design/issues/1161
|
||||
@(private = "file")
|
||||
TARGET_IS_DESIGNED_BY_IDIOTS :: (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128")
|
||||
|
||||
@(private = "file")
|
||||
_ROT_7L: simd.u32x4 : {7, 7, 7, 7}
|
||||
@(private = "file")
|
||||
@@ -205,11 +218,13 @@ _store_simd128 :: #force_inline proc "contextless" (
|
||||
// is_performant returns true iff the target and current host both support
|
||||
// "enough" 128-bit SIMD to make this implementation performant.
|
||||
is_performant :: proc "contextless" () -> bool {
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 || ODIN_ARCH == .riscv64 {
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
|
||||
req_features :: info.CPU_Features{.asimd}
|
||||
} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
req_features :: info.CPU_Features{.sse2, .ssse3}
|
||||
} else when ODIN_ARCH == .riscv64 {
|
||||
req_features :: info.CPU_Features{.V}
|
||||
}
|
||||
|
||||
features, ok := info.cpu_features.?
|
||||
@@ -245,8 +260,17 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)
|
||||
|
||||
// 8 blocks at a time.
|
||||
//
|
||||
// Note: This is only worth it on Aarch64.
|
||||
when ODIN_ARCH == .arm64 {
|
||||
// Note:
|
||||
// This uses a ton of registers so it is only worth it on targets
|
||||
// that have something like 32 128-bit registers. This is currently
|
||||
// all ARMv8 targets, and RISC-V Zvl128b (`V` application profile)
|
||||
// targets.
|
||||
//
|
||||
// While our current definition of `.arm32` is 32-bit ARMv8, this
|
||||
// may change in the future (ARMv7 is still relevant), and things
|
||||
// like Cortex-A8/A9 does "pretend" 128-bit SIMD 64-bits at a time
|
||||
// thus needs bemchmarking.
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 {
|
||||
for ; n >= 8; n = n - 8 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
@@ -354,9 +378,11 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)
|
||||
|
||||
// 4 blocks at a time.
|
||||
//
|
||||
// Note: The i386 target lacks the required number of registers
|
||||
// for this to be performant, so it is skipped.
|
||||
when ODIN_ARCH != .i386 {
|
||||
// Note: This is skipped on several targets for various reasons.
|
||||
// - i386 lacks the required number of registers
|
||||
// - Generating code when runtime "hardware" SIMD support is impossible
|
||||
// to detect is pointless, since this will be emulated using GP regs.
|
||||
when ODIN_ARCH != .i386 && !TARGET_IS_DESIGNED_BY_IDIOTS {
|
||||
for ; n >= 4; n = n - 4 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
|
||||
@@ -13,5 +13,5 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)
|
||||
}
|
||||
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
intrinsics.trap()
|
||||
panic_contextless("crypto/chacha20: simd256 implementation unsupported")
|
||||
}
|
||||
@@ -11,7 +11,6 @@ See:
|
||||
- https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html
|
||||
*/
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto"
|
||||
import field "core:crypto/_fiat/field_curve25519"
|
||||
import "core:mem"
|
||||
@@ -32,6 +31,7 @@ import "core:mem"
|
||||
// - The group element decoding routine takes the opinionated stance of
|
||||
// rejecting non-canonical encodings.
|
||||
|
||||
@(rodata)
|
||||
FE_D := field.Tight_Field_Element {
|
||||
929955233495203,
|
||||
466365720129213,
|
||||
@@ -39,7 +39,7 @@ FE_D := field.Tight_Field_Element {
|
||||
2033849074728123,
|
||||
1442794654840575,
|
||||
}
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
FE_A := field.Tight_Field_Element {
|
||||
2251799813685228,
|
||||
2251799813685247,
|
||||
@@ -47,7 +47,7 @@ FE_A := field.Tight_Field_Element {
|
||||
2251799813685247,
|
||||
2251799813685247,
|
||||
}
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
FE_D2 := field.Tight_Field_Element {
|
||||
1859910466990425,
|
||||
932731440258426,
|
||||
@@ -55,7 +55,7 @@ FE_D2 := field.Tight_Field_Element {
|
||||
1815898335770999,
|
||||
633789495995903,
|
||||
}
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
GE_BASEPOINT := Group_Element {
|
||||
field.Tight_Field_Element {
|
||||
1738742601995546,
|
||||
@@ -80,6 +80,7 @@ GE_BASEPOINT := Group_Element {
|
||||
1821297809914039,
|
||||
},
|
||||
}
|
||||
@(rodata)
|
||||
GE_IDENTITY := Group_Element {
|
||||
field.Tight_Field_Element{0, 0, 0, 0, 0},
|
||||
field.Tight_Field_Element{1, 0, 0, 0, 0},
|
||||
@@ -107,9 +108,7 @@ ge_set :: proc "contextless" (ge, a: ^Group_Element) {
|
||||
|
||||
@(require_results)
|
||||
ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
|
||||
if len(b) != 32 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(b) == 32, "edwards25519: invalid group element size")
|
||||
b_ := (^[32]byte)(raw_data(b))
|
||||
|
||||
// Do the work in a scratch element, so that ge is unchanged on
|
||||
@@ -166,9 +165,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
|
||||
}
|
||||
|
||||
ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
|
||||
if len(dst) != 32 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(dst) == 32, "edwards25519: invalid group element size")
|
||||
dst_ := (^[32]byte)(raw_data(dst))
|
||||
|
||||
// Convert the element to affine (x, y) representation.
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
package _edwards25519
|
||||
|
||||
import "base:intrinsics"
|
||||
import field "core:crypto/_fiat/field_scalar25519"
|
||||
import "core:mem"
|
||||
|
||||
@@ -8,7 +7,7 @@ Scalar :: field.Montgomery_Domain_Field_Element
|
||||
|
||||
// WARNING: This is non-canonical and only to be used when checking if
|
||||
// a group element is on the prime-order subgroup.
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
SC_ELL := field.Non_Montgomery_Domain_Field_Element {
|
||||
field.ELL[0],
|
||||
field.ELL[1],
|
||||
@@ -25,17 +24,13 @@ sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {
|
||||
|
||||
@(require_results)
|
||||
sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
|
||||
if len(b) != 32 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(b) == 32, "edwards25519: invalid scalar size")
|
||||
b_ := (^[32]byte)(raw_data(b))
|
||||
return field.fe_from_bytes(sc, b_)
|
||||
}
|
||||
|
||||
sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
|
||||
if len(b) != 32 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(b) == 32, "edwards25519: invalid scalar size")
|
||||
b_ := (^[32]byte)(raw_data(b))
|
||||
field.fe_from_bytes_rfc8032(sc, b_)
|
||||
}
|
||||
|
||||
@@ -42,9 +42,12 @@ import "core:math/bits"
|
||||
Loose_Field_Element :: distinct [5]u64
|
||||
Tight_Field_Element :: distinct [5]u64
|
||||
|
||||
@(rodata)
|
||||
FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0}
|
||||
@(rodata)
|
||||
FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0}
|
||||
|
||||
@(rodata)
|
||||
FE_SQRT_M1 := Tight_Field_Element {
|
||||
1718705420411056,
|
||||
234908883556509,
|
||||
|
||||
@@ -0,0 +1,235 @@
|
||||
package field_curve448
|
||||
|
||||
import "core:mem"
|
||||
|
||||
fe_relax_cast :: #force_inline proc "contextless" (
|
||||
arg1: ^Tight_Field_Element,
|
||||
) -> ^Loose_Field_Element {
|
||||
return (^Loose_Field_Element)(arg1)
|
||||
}
|
||||
|
||||
fe_tighten_cast :: #force_inline proc "contextless" (
|
||||
arg1: ^Loose_Field_Element,
|
||||
) -> ^Tight_Field_Element {
|
||||
return (^Tight_Field_Element)(arg1)
|
||||
}
|
||||
|
||||
fe_clear :: proc "contextless" (
|
||||
arg1: $T,
|
||||
) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
|
||||
mem.zero_explicit(arg1, size_of(arg1^))
|
||||
}
|
||||
|
||||
fe_clear_vec :: proc "contextless" (
|
||||
arg1: $T,
|
||||
) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
|
||||
for fe in arg1 {
|
||||
fe_clear(fe)
|
||||
}
|
||||
}
|
||||
|
||||
fe_carry_mul_small :: proc "contextless" (
|
||||
out1: ^Tight_Field_Element,
|
||||
arg1: ^Loose_Field_Element,
|
||||
arg2: u64,
|
||||
) {
|
||||
arg2_ := Loose_Field_Element{arg2, 0, 0, 0, 0, 0, 0, 0}
|
||||
fe_carry_mul(out1, arg1, &arg2_)
|
||||
}
|
||||
|
||||
fe_carry_pow2k :: proc "contextless" (
|
||||
out1: ^Tight_Field_Element,
|
||||
arg1: ^Loose_Field_Element,
|
||||
arg2: uint,
|
||||
) {
|
||||
// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
|
||||
if arg2 == 0 {
|
||||
fe_one(out1)
|
||||
return
|
||||
}
|
||||
|
||||
fe_carry_square(out1, arg1)
|
||||
for _ in 1 ..< arg2 {
|
||||
fe_carry_square(out1, fe_relax_cast(out1))
|
||||
}
|
||||
}
|
||||
|
||||
fe_carry_inv :: proc "contextless" (
|
||||
out1: ^Tight_Field_Element,
|
||||
arg1: ^Loose_Field_Element,
|
||||
) {
|
||||
// Inversion computation is derived from the addition chain:
|
||||
//
|
||||
// _10 = 2*1
|
||||
// _11 = 1 + _10
|
||||
// _110 = 2*_11
|
||||
// _111 = 1 + _110
|
||||
// _111000 = _111 << 3
|
||||
// _111111 = _111 + _111000
|
||||
// x12 = _111111 << 6 + _111111
|
||||
// x24 = x12 << 12 + x12
|
||||
// i34 = x24 << 6
|
||||
// x30 = _111111 + i34
|
||||
// x48 = i34 << 18 + x24
|
||||
// x96 = x48 << 48 + x48
|
||||
// x192 = x96 << 96 + x96
|
||||
// x222 = x192 << 30 + x30
|
||||
// x223 = 2*x222 + 1
|
||||
// return (x223 << 223 + x222) << 2 + 1
|
||||
//
|
||||
// Operations: 447 squares 13 multiplies
|
||||
//
|
||||
// Generated by github.com/mmcloughlin/addchain v0.4.0.
|
||||
|
||||
t0, t1, t2: Tight_Field_Element = ---, ---, ---
|
||||
|
||||
// Step 1: t0 = x^0x2
|
||||
fe_carry_square(&t0, arg1)
|
||||
|
||||
// Step 2: t0 = x^0x3
|
||||
fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
|
||||
|
||||
// t0.Sqr(t0)
|
||||
fe_carry_square(&t0, fe_relax_cast(&t0))
|
||||
|
||||
// Step 4: t0 = x^0x7
|
||||
fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
|
||||
|
||||
// Step 7: t1 = x^0x38
|
||||
fe_carry_pow2k(&t1, fe_relax_cast(&t0), 3)
|
||||
|
||||
// Step 8: t0 = x^0x3f
|
||||
fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
|
||||
|
||||
// Step 14: t1 = x^0xfc0
|
||||
fe_carry_pow2k(&t1, fe_relax_cast(&t0), 6)
|
||||
|
||||
// Step 15: t1 = x^0xfff
|
||||
fe_carry_mul(&t1, fe_relax_cast(&t0), fe_relax_cast(&t1))
|
||||
|
||||
// Step 27: t2 = x^0xfff000
|
||||
fe_carry_pow2k(&t2, fe_relax_cast(&t1), 12)
|
||||
|
||||
// Step 28: t1 = x^0xffffff
|
||||
fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
|
||||
|
||||
// Step 34: t2 = x^0x3fffffc0
|
||||
fe_carry_pow2k(&t2, fe_relax_cast(&t1), 6)
|
||||
|
||||
// Step 35: t0 = x^0x3fffffff
|
||||
fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t2))
|
||||
|
||||
// Step 53: t2 = x^0xffffff000000
|
||||
fe_carry_pow2k(&t2, fe_relax_cast(&t2), 18)
|
||||
|
||||
// Step 54: t1 = x^0xffffffffffff
|
||||
fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
|
||||
|
||||
// Step 102: t2 = x^0xffffffffffff000000000000
|
||||
fe_carry_pow2k(&t2, fe_relax_cast(&t1), 48)
|
||||
|
||||
// Step 103: t1 = x^0xffffffffffffffffffffffff
|
||||
fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
|
||||
|
||||
// Step 199: t2 = x^0xffffffffffffffffffffffff000000000000000000000000
|
||||
fe_carry_pow2k(&t2, fe_relax_cast(&t1), 96)
|
||||
|
||||
// Step 200: t1 = x^0xffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
|
||||
|
||||
// Step 230: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffc0000000
|
||||
fe_carry_pow2k(&t1, fe_relax_cast(&t1), 30)
|
||||
|
||||
// Step 231: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
|
||||
|
||||
// Step 232: t1 = x^0x7ffffffffffffffffffffffffffffffffffffffffffffffffffffffe
|
||||
fe_carry_square(&t1, fe_relax_cast(&t0))
|
||||
|
||||
// Step 233: t1 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
fe_carry_mul(&t1, arg1, fe_relax_cast(&t1))
|
||||
|
||||
// Step 456: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000000000000000000000000000000000000000000000000000
|
||||
fe_carry_pow2k(&t1, fe_relax_cast(&t1), 223)
|
||||
|
||||
// Step 457: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
||||
fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
|
||||
|
||||
// Step 459: t0 = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffc
|
||||
fe_carry_pow2k(&t0, fe_relax_cast(&t0), 2)
|
||||
|
||||
// Step 460: z = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd
|
||||
fe_carry_mul(out1, arg1, fe_relax_cast(&t0))
|
||||
|
||||
fe_clear_vec([]^Tight_Field_Element{&t0, &t1, &t2})
|
||||
}
|
||||
|
||||
fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
|
||||
out1[0] = 0
|
||||
out1[1] = 0
|
||||
out1[2] = 0
|
||||
out1[3] = 0
|
||||
out1[4] = 0
|
||||
out1[5] = 0
|
||||
out1[6] = 0
|
||||
out1[7] = 0
|
||||
}
|
||||
|
||||
fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
|
||||
out1[0] = 1
|
||||
out1[1] = 0
|
||||
out1[2] = 0
|
||||
out1[3] = 0
|
||||
out1[4] = 0
|
||||
out1[5] = 0
|
||||
out1[6] = 0
|
||||
out1[7] = 0
|
||||
}
|
||||
|
||||
fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
|
||||
x1 := arg1[0]
|
||||
x2 := arg1[1]
|
||||
x3 := arg1[2]
|
||||
x4 := arg1[3]
|
||||
x5 := arg1[4]
|
||||
x6 := arg1[5]
|
||||
x7 := arg1[6]
|
||||
x8 := arg1[7]
|
||||
out1[0] = x1
|
||||
out1[1] = x2
|
||||
out1[2] = x3
|
||||
out1[3] = x4
|
||||
out1[4] = x5
|
||||
out1[5] = x6
|
||||
out1[6] = x7
|
||||
out1[7] = x8
|
||||
}
|
||||
|
||||
@(optimization_mode = "none")
|
||||
fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
|
||||
mask := (u64(arg1) * 0xffffffffffffffff)
|
||||
x := (out1[0] ~ out2[0]) & mask
|
||||
x1, y1 := out1[0] ~ x, out2[0] ~ x
|
||||
x = (out1[1] ~ out2[1]) & mask
|
||||
x2, y2 := out1[1] ~ x, out2[1] ~ x
|
||||
x = (out1[2] ~ out2[2]) & mask
|
||||
x3, y3 := out1[2] ~ x, out2[2] ~ x
|
||||
x = (out1[3] ~ out2[3]) & mask
|
||||
x4, y4 := out1[3] ~ x, out2[3] ~ x
|
||||
x = (out1[4] ~ out2[4]) & mask
|
||||
x5, y5 := out1[4] ~ x, out2[4] ~ x
|
||||
x = (out1[5] ~ out2[5]) & mask
|
||||
x6, y6 := out1[5] ~ x, out2[5] ~ x
|
||||
x = (out1[6] ~ out2[6]) & mask
|
||||
x7, y7 := out1[6] ~ x, out2[6] ~ x
|
||||
x = (out1[7] ~ out2[7]) & mask
|
||||
x8, y8 := out1[7] ~ x, out2[7] ~ x
|
||||
out1[0], out2[0] = x1, y1
|
||||
out1[1], out2[1] = x2, y2
|
||||
out1[2], out2[2] = x3, y3
|
||||
out1[3], out2[3] = x4, y4
|
||||
out1[4], out2[4] = x5, y5
|
||||
out1[5], out2[5] = x6, y6
|
||||
out1[6], out2[6] = x7, y7
|
||||
out1[7], out2[7] = x8, y8
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,5 @@
|
||||
package field_poly1305
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
|
||||
@@ -29,9 +28,7 @@ fe_from_bytes :: #force_inline proc "contextless" (
|
||||
// makes implementing the actual MAC block processing considerably
|
||||
// neater.
|
||||
|
||||
if len(arg1) != 16 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(arg1) == 16, "poly1305: invalid field element size")
|
||||
|
||||
// While it may be unwise to do deserialization here on our
|
||||
// own when fiat-crypto provides equivalent functionality,
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
package field_scalar25519
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
import "core:mem"
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
_TWO_168 := Montgomery_Domain_Field_Element {
|
||||
0x5b8ab432eac74798,
|
||||
0x38afddd6de59d5d7,
|
||||
0xa2c131b399411b7c,
|
||||
0x6329a7ed9ce5a30,
|
||||
}
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
_TWO_336 := Montgomery_Domain_Field_Element {
|
||||
0xbd3d108e2b35ecc5,
|
||||
0x5c3a3718bdf9c90b,
|
||||
@@ -95,9 +94,8 @@ fe_from_bytes_wide :: proc "contextless" (
|
||||
@(private)
|
||||
_fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element, arg1: []byte) {
|
||||
// INVARIANT: len(arg1) < 32.
|
||||
if len(arg1) >= 32 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(arg1) < 32, "edwards25519: oversized short scalar")
|
||||
|
||||
tmp: [32]byte
|
||||
copy(tmp[:], arg1)
|
||||
|
||||
@@ -106,9 +104,7 @@ _fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Eleme
|
||||
}
|
||||
|
||||
fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) {
|
||||
if len(out1) != 32 {
|
||||
intrinsics.trap()
|
||||
}
|
||||
ensure_contextless(len(out1) == 32, "edwards25519: oversized scalar output buffer")
|
||||
|
||||
tmp: Non_Montgomery_Domain_Field_Element
|
||||
fe_from_montgomery(&tmp, arg1)
|
||||
|
||||
+18
-21
@@ -44,7 +44,7 @@ Context :: struct {
|
||||
is_finalized: bool, // For SHAKE (unlimited squeeze is allowed)
|
||||
}
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
keccakf_rndc := [?]u64 {
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
@@ -56,13 +56,13 @@ keccakf_rndc := [?]u64 {
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
|
||||
}
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
keccakf_rotc := [?]int {
|
||||
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
|
||||
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
|
||||
}
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
keccakf_piln := [?]i32 {
|
||||
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
|
||||
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
|
||||
@@ -122,7 +122,7 @@ keccakf :: proc "contextless" (st: ^[25]u64) {
|
||||
}
|
||||
}
|
||||
|
||||
init :: proc(ctx: ^Context) {
|
||||
init :: proc "contextless" (ctx: ^Context) {
|
||||
for i := 0; i < 25; i += 1 {
|
||||
ctx.st.q[i] = 0
|
||||
}
|
||||
@@ -133,9 +133,9 @@ init :: proc(ctx: ^Context) {
|
||||
ctx.is_finalized = false
|
||||
}
|
||||
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
assert(!ctx.is_finalized)
|
||||
update :: proc "contextless" (ctx: ^Context, data: []byte) {
|
||||
ensure_contextless(ctx.is_initialized)
|
||||
ensure_contextless(!ctx.is_finalized)
|
||||
|
||||
j := ctx.pt
|
||||
for i := 0; i < len(data); i += 1 {
|
||||
@@ -149,12 +149,9 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
ctx.pt = j
|
||||
}
|
||||
|
||||
final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
assert(ctx.is_initialized)
|
||||
|
||||
if len(hash) < ctx.mdlen {
|
||||
panic("crypto/sha3: invalid destination digest size")
|
||||
}
|
||||
final :: proc "contextless" (ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
ensure_contextless(ctx.is_initialized)
|
||||
ensure_contextless(len(hash) >= ctx.mdlen, "crypto/sha3: invalid destination digest size")
|
||||
|
||||
ctx := ctx
|
||||
if finalize_clone {
|
||||
@@ -173,11 +170,11 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
}
|
||||
}
|
||||
|
||||
clone :: proc(ctx, other: ^Context) {
|
||||
clone :: proc "contextless" (ctx, other: ^Context) {
|
||||
ctx^ = other^
|
||||
}
|
||||
|
||||
reset :: proc(ctx: ^Context) {
|
||||
reset :: proc "contextless" (ctx: ^Context) {
|
||||
if !ctx.is_initialized {
|
||||
return
|
||||
}
|
||||
@@ -185,9 +182,9 @@ reset :: proc(ctx: ^Context) {
|
||||
mem.zero_explicit(ctx, size_of(ctx^))
|
||||
}
|
||||
|
||||
shake_xof :: proc(ctx: ^Context) {
|
||||
assert(ctx.is_initialized)
|
||||
assert(!ctx.is_finalized)
|
||||
shake_xof :: proc "contextless" (ctx: ^Context) {
|
||||
ensure_contextless(ctx.is_initialized)
|
||||
ensure_contextless(!ctx.is_finalized)
|
||||
|
||||
ctx.st.b[ctx.pt] ~= ctx.dsbyte
|
||||
ctx.st.b[ctx.rsiz - 1] ~= 0x80
|
||||
@@ -197,9 +194,9 @@ shake_xof :: proc(ctx: ^Context) {
|
||||
ctx.is_finalized = true // No more absorb, unlimited squeeze.
|
||||
}
|
||||
|
||||
shake_out :: proc(ctx: ^Context, hash: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
assert(ctx.is_finalized)
|
||||
shake_out :: proc "contextless" (ctx: ^Context, hash: []byte) {
|
||||
ensure_contextless(ctx.is_initialized)
|
||||
ensure_contextless(ctx.is_finalized)
|
||||
|
||||
j := ctx.pt
|
||||
for i := 0; i < len(hash); i += 1 {
|
||||
|
||||
@@ -3,7 +3,7 @@ package _sha3
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
|
||||
init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) {
|
||||
init_cshake :: proc "contextless" (ctx: ^Context, n, s: []byte, sec_strength: int) {
|
||||
ctx.mdlen = sec_strength / 8
|
||||
|
||||
// No domain separator is equivalent to vanilla SHAKE.
|
||||
@@ -18,7 +18,7 @@ init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) {
|
||||
bytepad(ctx, [][]byte{n, s}, rate_cshake(sec_strength))
|
||||
}
|
||||
|
||||
final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
|
||||
final_cshake :: proc "contextless" (ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
|
||||
ctx := ctx
|
||||
if finalize_clone {
|
||||
tmp_ctx: Context
|
||||
@@ -32,7 +32,7 @@ final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
|
||||
shake_out(ctx, dst)
|
||||
}
|
||||
|
||||
rate_cshake :: #force_inline proc(sec_strength: int) -> int {
|
||||
rate_cshake :: #force_inline proc "contextless" (sec_strength: int) -> int {
|
||||
switch sec_strength {
|
||||
case 128:
|
||||
return RATE_128
|
||||
@@ -40,7 +40,7 @@ rate_cshake :: #force_inline proc(sec_strength: int) -> int {
|
||||
return RATE_256
|
||||
}
|
||||
|
||||
panic("crypto/sha3: invalid security strength")
|
||||
panic_contextless("crypto/sha3: invalid security strength")
|
||||
}
|
||||
|
||||
// right_encode and left_encode are defined to support 0 <= x < 2^2040
|
||||
@@ -52,10 +52,10 @@ rate_cshake :: #force_inline proc(sec_strength: int) -> int {
|
||||
//
|
||||
// Thus we support 0 <= x < 2^128.
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
_PAD: [RATE_128]byte // Biggest possible value of w per spec.
|
||||
|
||||
bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
|
||||
bytepad :: proc "contextless" (ctx: ^Context, x_strings: [][]byte, w: int) {
|
||||
// 1. z = left_encode(w) || X.
|
||||
z_hi: u64
|
||||
z_lo := left_right_encode(ctx, 0, u64(w), true)
|
||||
@@ -70,9 +70,7 @@ bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
|
||||
|
||||
// This isn't actually possible, at least with the currently
|
||||
// defined SP 800-185 routines.
|
||||
if carry != 0 {
|
||||
panic("crypto/sha3: bytepad input length overflow")
|
||||
}
|
||||
ensure_contextless(carry == 0, "crypto/sha3: bytepad input length overflow")
|
||||
}
|
||||
|
||||
// We skip this step as we are doing a byte-oriented implementation
|
||||
@@ -95,7 +93,7 @@ bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
|
||||
}
|
||||
}
|
||||
|
||||
encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) {
|
||||
encode_string :: #force_inline proc "contextless" (ctx: ^Context, s: []byte) -> (u64, u64) {
|
||||
l := encode_byte_len(ctx, len(s), true) // left_encode
|
||||
update(ctx, s)
|
||||
|
||||
@@ -104,13 +102,13 @@ encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) {
|
||||
return hi, lo
|
||||
}
|
||||
|
||||
encode_byte_len :: #force_inline proc(ctx: ^Context, l: int, is_left: bool) -> u64 {
|
||||
encode_byte_len :: #force_inline proc "contextless" (ctx: ^Context, l: int, is_left: bool) -> u64 {
|
||||
hi, lo := bits.mul_u64(u64(l), 8)
|
||||
return left_right_encode(ctx, hi, lo, is_left)
|
||||
}
|
||||
|
||||
@(private)
|
||||
left_right_encode :: proc(ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 {
|
||||
left_right_encode :: proc "contextless" (ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 {
|
||||
HI_OFFSET :: 1
|
||||
LO_OFFSET :: HI_OFFSET + 8
|
||||
RIGHT_OFFSET :: LO_OFFSET + 8
|
||||
|
||||
@@ -16,7 +16,7 @@ seal_oneshot :: proc(algo: Algorithm, dst, tag, key, iv, aad, plaintext: []byte,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
// dst and ciphertext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open_oneshot :: proc(algo: Algorithm, dst, key, iv, aad, ciphertext, tag: []byte, impl: Implementation = nil) -> bool {
|
||||
ctx: Context
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
package aead
|
||||
|
||||
import "core:crypto/aegis"
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/chacha20"
|
||||
import "core:crypto/chacha20poly1305"
|
||||
import "core:crypto/deoxysii"
|
||||
import "core:reflect"
|
||||
|
||||
// Implementation is an AEAD implementation. Most callers will not need
|
||||
@@ -15,7 +17,7 @@ Implementation :: union {
|
||||
|
||||
// MAX_TAG_SIZE is the maximum size tag that can be returned by any of the
|
||||
// Algorithms supported via this package.
|
||||
MAX_TAG_SIZE :: 16
|
||||
MAX_TAG_SIZE :: 32
|
||||
|
||||
// Algorithm is the algorithm identifier associated with a given Context.
|
||||
Algorithm :: enum {
|
||||
@@ -25,9 +27,14 @@ Algorithm :: enum {
|
||||
AES_GCM_256,
|
||||
CHACHA20POLY1305,
|
||||
XCHACHA20POLY1305,
|
||||
AEGIS_128L,
|
||||
AEGIS_128L_256, // AEGIS-128L (256-bit tag)
|
||||
AEGIS_256,
|
||||
AEGIS_256_256, // AEGIS-256 (256-bit tag)
|
||||
DEOXYS_II_256,
|
||||
}
|
||||
|
||||
// ALGORITM_NAMES is the Agorithm to algorithm name string.
|
||||
// ALGORITM_NAMES is the Algorithm to algorithm name string.
|
||||
ALGORITHM_NAMES := [Algorithm]string {
|
||||
.Invalid = "Invalid",
|
||||
.AES_GCM_128 = "AES-GCM-128",
|
||||
@@ -35,6 +42,11 @@ ALGORITHM_NAMES := [Algorithm]string {
|
||||
.AES_GCM_256 = "AES-GCM-256",
|
||||
.CHACHA20POLY1305 = "chacha20poly1305",
|
||||
.XCHACHA20POLY1305 = "xchacha20poly1305",
|
||||
.AEGIS_128L = "AEGIS-128L",
|
||||
.AEGIS_128L_256 = "AEGIS-128L-256",
|
||||
.AEGIS_256 = "AEGIS-256",
|
||||
.AEGIS_256_256 = "AEGIS-256-256",
|
||||
.DEOXYS_II_256 = "Deoxys-II-256",
|
||||
}
|
||||
|
||||
// TAG_SIZES is the Algorithm to tag size in bytes.
|
||||
@@ -45,6 +57,11 @@ TAG_SIZES := [Algorithm]int {
|
||||
.AES_GCM_256 = aes.GCM_TAG_SIZE,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
|
||||
.AEGIS_128L = aegis.TAG_SIZE_128,
|
||||
.AEGIS_128L_256 = aegis.TAG_SIZE_256,
|
||||
.AEGIS_256 = aegis.TAG_SIZE_128,
|
||||
.AEGIS_256_256 = aegis.TAG_SIZE_256,
|
||||
.DEOXYS_II_256 = deoxysii.TAG_SIZE,
|
||||
}
|
||||
|
||||
// KEY_SIZES is the Algorithm to key size in bytes.
|
||||
@@ -55,6 +72,11 @@ KEY_SIZES := [Algorithm]int {
|
||||
.AES_GCM_256 = aes.KEY_SIZE_256,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
|
||||
.AEGIS_128L = aegis.KEY_SIZE_128L,
|
||||
.AEGIS_128L_256 = aegis.KEY_SIZE_128L,
|
||||
.AEGIS_256 = aegis.KEY_SIZE_256,
|
||||
.AEGIS_256_256 = aegis.KEY_SIZE_256,
|
||||
.DEOXYS_II_256 = deoxysii.KEY_SIZE,
|
||||
}
|
||||
|
||||
// IV_SIZES is the Algorithm to initialization vector size in bytes.
|
||||
@@ -67,6 +89,11 @@ IV_SIZES := [Algorithm]int {
|
||||
.AES_GCM_256 = aes.GCM_IV_SIZE,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.IV_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE,
|
||||
.AEGIS_128L = aegis.IV_SIZE_128L,
|
||||
.AEGIS_128L_256 = aegis.IV_SIZE_128L,
|
||||
.AEGIS_256 = aegis.IV_SIZE_256,
|
||||
.AEGIS_256_256 = aegis.IV_SIZE_256,
|
||||
.DEOXYS_II_256 = deoxysii.IV_SIZE,
|
||||
}
|
||||
|
||||
// Context is a concrete instantiation of a specific AEAD algorithm.
|
||||
@@ -75,6 +102,8 @@ Context :: struct {
|
||||
_impl: union {
|
||||
aes.Context_GCM,
|
||||
chacha20poly1305.Context,
|
||||
aegis.Context,
|
||||
deoxysii.Context,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -86,6 +115,11 @@ _IMPL_IDS := [Algorithm]typeid {
|
||||
.AES_GCM_256 = typeid_of(aes.Context_GCM),
|
||||
.CHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
|
||||
.XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
|
||||
.AEGIS_128L = typeid_of(aegis.Context),
|
||||
.AEGIS_128L_256 = typeid_of(aegis.Context),
|
||||
.AEGIS_256 = typeid_of(aegis.Context),
|
||||
.AEGIS_256_256 = typeid_of(aegis.Context),
|
||||
.DEOXYS_II_256 = typeid_of(deoxysii.Context),
|
||||
}
|
||||
|
||||
// init initializes a Context with a specific AEAD Algorithm.
|
||||
@@ -94,9 +128,7 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
|
||||
reset(ctx)
|
||||
}
|
||||
|
||||
if len(key) != KEY_SIZES[algorithm] {
|
||||
panic("crypto/aead: invalid key size")
|
||||
}
|
||||
ensure(len(key) == KEY_SIZES[algorithm], "crypto/aead: invalid key size")
|
||||
|
||||
// Directly specialize the union by setting the type ID (save a copy).
|
||||
reflect.set_union_variant_typeid(
|
||||
@@ -113,6 +145,12 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
|
||||
case .XCHACHA20POLY1305:
|
||||
impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
|
||||
chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_)
|
||||
case .AEGIS_128L, .AEGIS_128L_256, .AEGIS_256, .AEGIS_256_256:
|
||||
impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
|
||||
aegis.init(&ctx._impl.(aegis.Context), key, impl_)
|
||||
case .DEOXYS_II_256:
|
||||
impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
|
||||
deoxysii.init(&ctx._impl.(deoxysii.Context), key, impl_)
|
||||
case .Invalid:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
case:
|
||||
@@ -127,11 +165,17 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
ensure(len(tag) == TAG_SIZES[ctx._algo], "crypto/aead: invalid tag size")
|
||||
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext)
|
||||
case chacha20poly1305.Context:
|
||||
chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext)
|
||||
case aegis.Context:
|
||||
aegis.seal(&impl, dst, tag, iv, aad, plaintext)
|
||||
case deoxysii.Context:
|
||||
deoxysii.seal(&impl, dst, tag, iv, aad, plaintext)
|
||||
case:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
}
|
||||
@@ -145,11 +189,17 @@ seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
ensure(len(tag) == TAG_SIZES[ctx._algo], "crypto/aead: invalid tag size")
|
||||
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case chacha20poly1305.Context:
|
||||
return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case aegis.Context:
|
||||
return aegis.open(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case deoxysii.Context:
|
||||
return deoxysii.open(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
}
|
||||
@@ -163,6 +213,10 @@ reset :: proc(ctx: ^Context) {
|
||||
aes.reset_gcm(&impl)
|
||||
case chacha20poly1305.Context:
|
||||
chacha20poly1305.reset(&impl)
|
||||
case aegis.Context:
|
||||
aegis.reset(&impl)
|
||||
case deoxysii.Context:
|
||||
deoxysii.reset(&impl)
|
||||
case:
|
||||
// Calling reset repeatedly is fine.
|
||||
}
|
||||
|
||||
@@ -0,0 +1,213 @@
|
||||
/*
|
||||
package aegis implements the AEGIS-128L and AEGIS-256 Authenticated
|
||||
Encryption with Additional Data algorithms.
|
||||
|
||||
See:
|
||||
- [[ https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-12.txt ]]
|
||||
*/
|
||||
package aegis
|
||||
|
||||
import "core:bytes"
|
||||
import "core:crypto"
|
||||
import "core:crypto/aes"
|
||||
import "core:mem"
|
||||
|
||||
// KEY_SIZE_128L is the AEGIS-128L key size in bytes.
|
||||
KEY_SIZE_128L :: 16
|
||||
// KEY_SIZE_256 is the AEGIS-256 key size in bytes.
|
||||
KEY_SIZE_256 :: 32
|
||||
// IV_SIZE_128L is the AEGIS-128L IV size in bytes.
|
||||
IV_SIZE_128L :: 16
|
||||
// IV_SIZE_256 is the AEGIS-256 IV size in bytes.
|
||||
IV_SIZE_256 :: 32
|
||||
// TAG_SIZE_128 is the AEGIS-128L or AEGIS-256 128-bit tag size in bytes.
|
||||
TAG_SIZE_128 :: 16
|
||||
// TAG_SIZE_256 is the AEGIS-128L or AEGIS-256 256-bit tag size in bytes.
|
||||
TAG_SIZE_256 :: 32
|
||||
|
||||
@(private)
|
||||
_RATE_128L :: 32
|
||||
@(private)
|
||||
_RATE_256 :: 16
|
||||
@(private)
|
||||
_RATE_MAX :: _RATE_128L
|
||||
|
||||
@(private, rodata)
|
||||
_C0 := [16]byte{
|
||||
0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
|
||||
0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
|
||||
}
|
||||
|
||||
@(private, rodata)
|
||||
_C1 := [16]byte {
|
||||
0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
|
||||
0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
|
||||
}
|
||||
|
||||
// Context is a keyed AEGIS-128L or AEGIS-256 instance.
|
||||
Context :: struct {
|
||||
_key: [KEY_SIZE_256]byte,
|
||||
_key_len: int,
|
||||
_impl: aes.Implementation,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
|
||||
@(private)
|
||||
_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) {
|
||||
switch len(tag) {
|
||||
case TAG_SIZE_128, TAG_SIZE_256:
|
||||
case:
|
||||
panic("crypto/aegis: invalid tag size")
|
||||
}
|
||||
|
||||
iv_ok: bool
|
||||
switch ctx._key_len {
|
||||
case KEY_SIZE_128L:
|
||||
iv_ok = len(iv) == IV_SIZE_128L
|
||||
case KEY_SIZE_256:
|
||||
iv_ok = len(iv) == IV_SIZE_256
|
||||
}
|
||||
ensure(iv_ok,"crypto/aegis: invalid IV size")
|
||||
|
||||
#assert(size_of(int) == 8 || size_of(int) <= 4)
|
||||
// As A_MAX and P_MAX are both defined to be 2^61 - 1 bytes, and
|
||||
// the maximum length of a slice is bound by `size_of(int)`, where
|
||||
// `int` is register sized, there is no need to check AAD/text
|
||||
// lengths.
|
||||
}
|
||||
|
||||
// init initializes a Context with the provided key, for AEGIS-128L or AEGIS-256.
|
||||
init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) {
|
||||
switch len(key) {
|
||||
case KEY_SIZE_128L, KEY_SIZE_256:
|
||||
case:
|
||||
panic("crypto/aegis: invalid key size")
|
||||
}
|
||||
|
||||
copy(ctx._key[:], key)
|
||||
ctx._key_len = len(key)
|
||||
ctx._impl = impl
|
||||
if ctx._impl == .Hardware && !is_hardware_accelerated() {
|
||||
ctx._impl = .Portable
|
||||
}
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seal encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
_validate_common_slice_sizes(ctx, tag, iv, aad, plaintext)
|
||||
ensure(len(dst) == len(plaintext), "crypto/aegis: invalid destination ciphertext size")
|
||||
ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aegis: dst and plaintext alias inexactly")
|
||||
|
||||
switch ctx._impl {
|
||||
case .Hardware:
|
||||
st: State_HW
|
||||
defer reset_state_hw(&st)
|
||||
|
||||
init_hw(ctx, &st, iv)
|
||||
|
||||
aad_len, pt_len := len(aad), len(plaintext)
|
||||
if aad_len > 0 {
|
||||
absorb_hw(&st, aad)
|
||||
}
|
||||
|
||||
if pt_len > 0 {
|
||||
enc_hw(&st, dst, plaintext)
|
||||
}
|
||||
|
||||
finalize_hw(&st, tag, aad_len, pt_len)
|
||||
case .Portable:
|
||||
st: State_SW
|
||||
defer reset_state_sw(&st)
|
||||
|
||||
init_sw(ctx, &st, iv)
|
||||
|
||||
aad_len, pt_len := len(aad), len(plaintext)
|
||||
if aad_len > 0 {
|
||||
absorb_sw(&st, aad)
|
||||
}
|
||||
|
||||
if pt_len > 0 {
|
||||
enc_sw(&st, dst, plaintext)
|
||||
}
|
||||
|
||||
finalize_sw(&st, tag, aad_len, pt_len)
|
||||
case:
|
||||
panic("core/crypto/aegis: not implemented")
|
||||
}
|
||||
}
|
||||
|
||||
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
_validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext)
|
||||
ensure(len(dst) == len(ciphertext), "crypto/aegis: invalid destination plaintext size")
|
||||
ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aegis: dst and ciphertext alias inexactly")
|
||||
|
||||
tmp: [TAG_SIZE_256]byte
|
||||
derived_tag := tmp[:len(tag)]
|
||||
aad_len, ct_len := len(aad), len(ciphertext)
|
||||
|
||||
switch ctx._impl {
|
||||
case .Hardware:
|
||||
st: State_HW
|
||||
defer reset_state_hw(&st)
|
||||
|
||||
init_hw(ctx, &st, iv)
|
||||
|
||||
if aad_len > 0 {
|
||||
absorb_hw(&st, aad)
|
||||
}
|
||||
|
||||
if ct_len > 0 {
|
||||
dec_hw(&st, dst, ciphertext)
|
||||
}
|
||||
|
||||
finalize_hw(&st, derived_tag, aad_len, ct_len)
|
||||
case .Portable:
|
||||
st: State_SW
|
||||
defer reset_state_sw(&st)
|
||||
|
||||
init_sw(ctx, &st, iv)
|
||||
|
||||
if aad_len > 0 {
|
||||
absorb_sw(&st, aad)
|
||||
}
|
||||
|
||||
if ct_len > 0 {
|
||||
dec_sw(&st, dst, ciphertext)
|
||||
}
|
||||
|
||||
finalize_sw(&st, derived_tag, aad_len, ct_len)
|
||||
case:
|
||||
panic("core/crypto/aegis: not implemented")
|
||||
}
|
||||
|
||||
if crypto.compare_constant_time(tag, derived_tag) != 1 {
|
||||
mem.zero_explicit(raw_data(derived_tag), len(derived_tag))
|
||||
mem.zero_explicit(raw_data(dst), ct_len)
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be
|
||||
// re-initialized to be used again.
|
||||
reset :: proc "contextless" (ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._key, len(ctx._key))
|
||||
ctx._key_len = 0
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
@@ -0,0 +1,452 @@
|
||||
package aegis
|
||||
|
||||
import aes "core:crypto/_aes/ct64"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
|
||||
// This uses the bitlsiced 64-bit general purpose register SWAR AES
|
||||
// round function. The intermediate state is stored in interleaved
|
||||
// but NOT orthogonalized form, as leaving things in the orthgonalized
|
||||
// format would overly complicate the update implementation.
|
||||
//
|
||||
// Note/perf: Per Frank Denis and a review of the specification, it is
|
||||
// possible to gain slightly more performance by leaving the state in
|
||||
// orthogonalized form while doing initialization, finalization, and
|
||||
// absorbing AAD. This implementation opts out of those optimizations
|
||||
// for the sake of simplicity.
|
||||
//
|
||||
// The update function leverages the paralleism (4xblocks) at once.
|
||||
|
||||
@(private)
|
||||
State_SW :: struct {
|
||||
s0_0, s0_1: u64,
|
||||
s1_0, s1_1: u64,
|
||||
s2_0, s2_1: u64,
|
||||
s3_0, s3_1: u64,
|
||||
s4_0, s4_1: u64,
|
||||
s5_0, s5_1: u64,
|
||||
s6_0, s6_1: u64,
|
||||
s7_0, s7_1: u64,
|
||||
q_k, q_b: [8]u64,
|
||||
rate: int,
|
||||
}
|
||||
|
||||
@(private)
|
||||
init_sw :: proc "contextless" (ctx: ^Context, st: ^State_SW, iv: []byte) {
|
||||
switch ctx._key_len {
|
||||
case KEY_SIZE_128L:
|
||||
key_0, key_1 := aes.load_interleaved(ctx._key[:16])
|
||||
iv_0, iv_1 := aes.load_interleaved(iv)
|
||||
|
||||
st.s0_0, st.s0_1 = aes.xor_interleaved(key_0, key_1, iv_0, iv_1)
|
||||
st.s1_0, st.s1_1 = aes.load_interleaved(_C1[:])
|
||||
st.s2_0, st.s2_1 = aes.load_interleaved(_C0[:])
|
||||
st.s3_0, st.s3_1 = st.s1_0, st.s1_1
|
||||
st.s4_0, st.s4_1 = st.s0_0, st.s0_1
|
||||
st.s5_0, st.s5_1 = aes.xor_interleaved(key_0, key_1, st.s2_0, st.s2_1)
|
||||
st.s6_0, st.s6_1 = aes.xor_interleaved(key_0, key_1, st.s1_0, st.s1_1)
|
||||
st.s7_0, st.s7_1 = st.s5_0, st.s5_1
|
||||
st.rate = _RATE_128L
|
||||
|
||||
for _ in 0 ..< 10 {
|
||||
update_sw_128l(st, iv_0, iv_1, key_0, key_1)
|
||||
}
|
||||
case KEY_SIZE_256:
|
||||
k0_0, k0_1 := aes.load_interleaved(ctx._key[:16])
|
||||
k1_0, k1_1 := aes.load_interleaved(ctx._key[16:])
|
||||
n0_0, n0_1 := aes.load_interleaved(iv[:16])
|
||||
n1_0, n1_1 := aes.load_interleaved(iv[16:])
|
||||
|
||||
st.s0_0, st.s0_1 = aes.xor_interleaved(k0_0, k0_1, n0_0, n0_1)
|
||||
st.s1_0, st.s1_1 = aes.xor_interleaved(k1_0, k1_1, n1_0, n1_1)
|
||||
st.s2_0, st.s2_1 = aes.load_interleaved(_C1[:])
|
||||
st.s3_0, st.s3_1 = aes.load_interleaved(_C0[:])
|
||||
st.s4_0, st.s4_1 = aes.xor_interleaved(k0_0, k0_1, st.s3_0, st.s3_1)
|
||||
st.s5_0, st.s5_1 = aes.xor_interleaved(k1_0, k1_1, st.s2_0, st.s2_1)
|
||||
st.rate = _RATE_256
|
||||
|
||||
u0_0, u0_1, u1_0, u1_1 := st.s0_0, st.s0_1, st.s1_0, st.s1_1
|
||||
for _ in 0 ..< 4 {
|
||||
update_sw_256(st, k0_0, k0_1)
|
||||
update_sw_256(st, k1_0, k1_1)
|
||||
update_sw_256(st, u0_0, u0_1)
|
||||
update_sw_256(st, u1_0, u1_1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
update_sw_128l :: proc "contextless" (st: ^State_SW, m0_0, m0_1, m1_0, m1_1: u64) {
|
||||
st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m0_0, m0_1)
|
||||
st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1
|
||||
st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1
|
||||
st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1
|
||||
aes.orthogonalize(&st.q_k)
|
||||
|
||||
st.q_b[0], st.q_b[4] = st.s7_0, st.s7_1
|
||||
st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1
|
||||
st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1
|
||||
st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
aes.sub_bytes(&st.q_b)
|
||||
aes.shift_rows(&st.q_b)
|
||||
aes.mix_columns(&st.q_b)
|
||||
aes.add_round_key(&st.q_b, st.q_k[:])
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4]
|
||||
st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5]
|
||||
st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6]
|
||||
s3_0, s3_1 := st.q_b[3], st.q_b[7]
|
||||
|
||||
st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s4_0, st.s4_1, m1_0, m1_1)
|
||||
st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1
|
||||
st.q_k[2], st.q_k[6] = st.s6_0, st.s6_1
|
||||
st.q_k[3], st.q_k[7] = st.s7_0, st.s7_1
|
||||
aes.orthogonalize(&st.q_k)
|
||||
|
||||
st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1
|
||||
st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1
|
||||
st.q_b[2], st.q_b[6] = st.s5_0, st.s5_1
|
||||
st.q_b[3], st.q_b[7] = st.s6_0, st.s6_1
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
aes.sub_bytes(&st.q_b)
|
||||
aes.shift_rows(&st.q_b)
|
||||
aes.mix_columns(&st.q_b)
|
||||
aes.add_round_key(&st.q_b, st.q_k[:])
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
st.s3_0, st.s3_1 = s3_0, s3_1
|
||||
st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4]
|
||||
st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5]
|
||||
st.s6_0, st.s6_1 = st.q_b[2], st.q_b[6]
|
||||
st.s7_0, st.s7_1 = st.q_b[3], st.q_b[7]
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
update_sw_256 :: proc "contextless" (st: ^State_SW, m_0, m_1: u64) {
|
||||
st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m_0, m_1)
|
||||
st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1
|
||||
st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1
|
||||
st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1
|
||||
aes.orthogonalize(&st.q_k)
|
||||
|
||||
st.q_b[0], st.q_b[4] = st.s5_0, st.s5_1
|
||||
st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1
|
||||
st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1
|
||||
st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
aes.sub_bytes(&st.q_b)
|
||||
aes.shift_rows(&st.q_b)
|
||||
aes.mix_columns(&st.q_b)
|
||||
aes.add_round_key(&st.q_b, st.q_k[:])
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4]
|
||||
st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5]
|
||||
st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6]
|
||||
s3_0, s3_1 := st.q_b[3], st.q_b[7]
|
||||
|
||||
st.q_k[0], st.q_k[4] = st.s4_0, st.s4_1
|
||||
st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1
|
||||
aes.orthogonalize(&st.q_k)
|
||||
|
||||
st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1
|
||||
st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
aes.sub_bytes(&st.q_b)
|
||||
aes.shift_rows(&st.q_b)
|
||||
aes.mix_columns(&st.q_b)
|
||||
aes.add_round_key(&st.q_b, st.q_k[:])
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
st.s3_0, st.s3_1 = s3_0, s3_1
|
||||
st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4]
|
||||
st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5]
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
absorb_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) #no_bounds_check {
|
||||
t0_0, t0_1 := aes.load_interleaved(ai[:16])
|
||||
t1_0, t1_1 := aes.load_interleaved(ai[16:])
|
||||
update_sw_128l(st, t0_0, t0_1, t1_0, t1_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
absorb_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) {
|
||||
m_0, m_1 := aes.load_interleaved(ai)
|
||||
update_sw_256(st, m_0, m_1)
|
||||
}
|
||||
|
||||
@(private)
|
||||
absorb_sw :: proc "contextless" (st: ^State_SW, aad: []byte) #no_bounds_check {
|
||||
ai, l := aad, len(aad)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
absorb_sw_128l(st, ai)
|
||||
ai = ai[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
absorb_sw_256(st, ai)
|
||||
|
||||
ai = ai[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Pad out the remainder with `0`s till it is rate sized.
|
||||
if l > 0 {
|
||||
tmp: [_RATE_MAX]byte // AAD is not confidential.
|
||||
copy(tmp[:], ai)
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
absorb_sw_128l(st, tmp[:])
|
||||
case _RATE_256:
|
||||
absorb_sw_256(st, tmp[:])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", require_results)
|
||||
z_sw_128l :: proc "contextless" (st: ^State_SW) -> (u64, u64, u64, u64) {
|
||||
z0_0, z0_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1)
|
||||
z0_0, z0_1 = aes.xor_interleaved(st.s1_0, st.s1_1, z0_0, z0_1)
|
||||
z0_0, z0_1 = aes.xor_interleaved(st.s6_0, st.s6_1, z0_0, z0_1)
|
||||
|
||||
z1_0, z1_1 := aes.and_interleaved(st.s6_0, st.s6_1, st.s7_0, st.s7_1)
|
||||
z1_0, z1_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z1_0, z1_1)
|
||||
z1_0, z1_1 = aes.xor_interleaved(st.s2_0, st.s2_1, z1_0, z1_1)
|
||||
|
||||
return z0_0, z0_1, z1_0, z1_1
|
||||
}
|
||||
|
||||
@(private = "file", require_results)
|
||||
z_sw_256 :: proc "contextless" (st: ^State_SW) -> (u64, u64) {
|
||||
z_0, z_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1)
|
||||
z_0, z_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z_0, z_1)
|
||||
z_0, z_1 = aes.xor_interleaved(st.s4_0, st.s4_1, z_0, z_1)
|
||||
return aes.xor_interleaved(st.s1_0, st.s1_1, z_0, z_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
enc_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check {
|
||||
z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
|
||||
|
||||
t0_0, t0_1 := aes.load_interleaved(xi[:16])
|
||||
t1_0, t1_1 := aes.load_interleaved(xi[16:])
|
||||
update_sw_128l(st, t0_0, t0_1, t1_0, t1_1)
|
||||
|
||||
out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
|
||||
out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
|
||||
aes.store_interleaved(ci[:16], out0_0, out0_1)
|
||||
aes.store_interleaved(ci[16:], out1_0, out1_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
enc_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check {
|
||||
z_0, z_1 := z_sw_256(st)
|
||||
|
||||
xi_0, xi_1 := aes.load_interleaved(xi)
|
||||
update_sw_256(st, xi_0, xi_1)
|
||||
|
||||
ci_0, ci_1 := aes.xor_interleaved(xi_0, xi_1, z_0, z_1)
|
||||
aes.store_interleaved(ci, ci_0, ci_1)
|
||||
}
|
||||
|
||||
@(private)
|
||||
enc_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check {
|
||||
ci, xi, l := dst, src, len(src)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
enc_sw_128l(st, ci, xi)
|
||||
ci = ci[_RATE_128L:]
|
||||
xi = xi[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
enc_sw_256(st, ci, xi)
|
||||
ci = ci[_RATE_256:]
|
||||
xi = xi[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Pad out the remainder with `0`s till it is rate sized.
|
||||
if l > 0 {
|
||||
tmp: [_RATE_MAX]byte // Ciphertext is not confidential.
|
||||
copy(tmp[:], xi)
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
enc_sw_128l(st, tmp[:], tmp[:])
|
||||
case _RATE_256:
|
||||
enc_sw_256(st, tmp[:], tmp[:])
|
||||
}
|
||||
copy(ci, tmp[:l])
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
dec_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check {
|
||||
z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
|
||||
|
||||
t0_0, t0_1 := aes.load_interleaved(ci[:16])
|
||||
t1_0, t1_1 := aes.load_interleaved(ci[16:])
|
||||
out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
|
||||
out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
|
||||
|
||||
update_sw_128l(st, out0_0, out0_1, out1_0, out1_1)
|
||||
aes.store_interleaved(xi[:16], out0_0, out0_1)
|
||||
aes.store_interleaved(xi[16:], out1_0, out1_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
dec_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check {
|
||||
z_0, z_1 := z_sw_256(st)
|
||||
|
||||
ci_0, ci_1 := aes.load_interleaved(ci)
|
||||
xi_0, xi_1 := aes.xor_interleaved(ci_0, ci_1, z_0, z_1)
|
||||
|
||||
update_sw_256(st, xi_0, xi_1)
|
||||
aes.store_interleaved(xi, xi_0, xi_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
dec_partial_sw_128l :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check {
|
||||
tmp: [_RATE_128L]byte
|
||||
defer mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
|
||||
copy(tmp[:], cn)
|
||||
|
||||
t0_0, t0_1 := aes.load_interleaved(tmp[:16])
|
||||
t1_0, t1_1 := aes.load_interleaved(tmp[16:])
|
||||
out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
|
||||
out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
|
||||
|
||||
aes.store_interleaved(tmp[:16], out0_0, out0_1)
|
||||
aes.store_interleaved(tmp[16:], out1_0, out1_1)
|
||||
copy(xn, tmp[:])
|
||||
|
||||
for off := len(xn); off < _RATE_128L; off += 1 {
|
||||
tmp[off] = 0
|
||||
}
|
||||
out0_0, out0_1 = aes.load_interleaved(tmp[:16])
|
||||
out1_0, out1_1 = aes.load_interleaved(tmp[16:])
|
||||
update_sw_128l(st, out0_0, out0_1, out1_0, out1_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
dec_partial_sw_256 :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check {
|
||||
tmp: [_RATE_256]byte
|
||||
defer mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
z_0, z_1 := z_sw_256(st)
|
||||
copy(tmp[:], cn)
|
||||
|
||||
cn_0, cn_1 := aes.load_interleaved(tmp[:])
|
||||
xn_0, xn_1 := aes.xor_interleaved(cn_0, cn_1, z_0, z_1)
|
||||
|
||||
aes.store_interleaved(tmp[:], xn_0, xn_1)
|
||||
copy(xn, tmp[:])
|
||||
|
||||
for off := len(xn); off < _RATE_256; off += 1 {
|
||||
tmp[off] = 0
|
||||
}
|
||||
xn_0, xn_1 = aes.load_interleaved(tmp[:])
|
||||
update_sw_256(st, xn_0, xn_1)
|
||||
}
|
||||
|
||||
@(private)
|
||||
dec_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check {
|
||||
xi, ci, l := dst, src, len(src)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
dec_sw_128l(st, xi, ci)
|
||||
xi = xi[_RATE_128L:]
|
||||
ci = ci[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
dec_sw_256(st, xi, ci)
|
||||
xi = xi[_RATE_256:]
|
||||
ci = ci[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Process the remainder.
|
||||
if l > 0 {
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
dec_partial_sw_128l(st, xi, ci)
|
||||
case _RATE_256:
|
||||
dec_partial_sw_256(st, xi, ci)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
finalize_sw :: proc "contextless" (st: ^State_SW, tag: []byte, ad_len, msg_len: int) {
|
||||
tmp: [16]byte
|
||||
endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8)
|
||||
endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8)
|
||||
|
||||
t_0, t_1 := aes.load_interleaved(tmp[:])
|
||||
|
||||
t0_0, t0_1, t1_0, t1_1: u64 = ---, ---, ---, ---
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
t_0, t_1 = aes.xor_interleaved(st.s2_0, st.s2_1, t_0, t_1)
|
||||
for _ in 0 ..< 7 {
|
||||
update_sw_128l(st, t_0, t_1, t_0, t_1)
|
||||
}
|
||||
|
||||
t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1)
|
||||
t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1)
|
||||
t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s3_0, st.s3_1)
|
||||
|
||||
t1_0, t1_1 = aes.xor_interleaved(st.s4_0, st.s4_1, st.s5_0, st.s5_1)
|
||||
t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s6_0, st.s6_1)
|
||||
if len(tag) == TAG_SIZE_256 {
|
||||
t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s7_0, st.s7_1)
|
||||
}
|
||||
case _RATE_256:
|
||||
t_0, t_1 = aes.xor_interleaved(st.s3_0, st.s3_1, t_0, t_1)
|
||||
for _ in 0 ..< 7 {
|
||||
update_sw_256(st, t_0, t_1)
|
||||
}
|
||||
|
||||
t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1)
|
||||
t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1)
|
||||
|
||||
t1_0, t1_1 = aes.xor_interleaved(st.s3_0, st.s3_1, st.s4_0, st.s4_1)
|
||||
t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s5_0, st.s5_1)
|
||||
}
|
||||
switch len(tag) {
|
||||
case TAG_SIZE_128:
|
||||
t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, t1_0, t1_1)
|
||||
aes.store_interleaved(tag, t0_0, t0_1)
|
||||
case TAG_SIZE_256:
|
||||
aes.store_interleaved(tag[:16], t0_0, t0_1)
|
||||
aes.store_interleaved(tag[16:], t1_0, t1_1)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
reset_state_sw :: proc "contextless" (st: ^State_SW) {
|
||||
mem.zero_explicit(st, size_of(st^))
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
#+build !amd64
|
||||
package aegis
|
||||
|
||||
@(private = "file")
|
||||
ERR_HW_NOT_SUPPORTED :: "crypto/aegis: hardware implementation unsupported"
|
||||
|
||||
@(private)
|
||||
State_HW :: struct {}
|
||||
|
||||
// is_hardware_accelerated returns true iff hardware accelerated AEGIS
|
||||
// is supported.
|
||||
is_hardware_accelerated :: proc "contextless" () -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
@(private)
|
||||
init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
reset_state_hw :: proc "contextless" (st: ^State_HW) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
@@ -0,0 +1,389 @@
|
||||
#+build amd64
|
||||
package aegis
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/aes"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
import "core:simd/x86"
|
||||
|
||||
@(private)
|
||||
State_HW :: struct {
|
||||
s0: x86.__m128i,
|
||||
s1: x86.__m128i,
|
||||
s2: x86.__m128i,
|
||||
s3: x86.__m128i,
|
||||
s4: x86.__m128i,
|
||||
s5: x86.__m128i,
|
||||
s6: x86.__m128i,
|
||||
s7: x86.__m128i,
|
||||
rate: int,
|
||||
}
|
||||
|
||||
// is_hardware_accelerated returns true iff hardware accelerated AEGIS
|
||||
// is supported.
|
||||
is_hardware_accelerated :: proc "contextless" () -> bool {
|
||||
return aes.is_hardware_accelerated()
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) {
|
||||
switch ctx._key_len {
|
||||
case KEY_SIZE_128L:
|
||||
key := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0]))
|
||||
iv := intrinsics.unaligned_load((^x86.__m128i)(raw_data(iv)))
|
||||
|
||||
st.s0 = x86._mm_xor_si128(key, iv)
|
||||
st.s1 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0]))
|
||||
st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0]))
|
||||
st.s3 = st.s1
|
||||
st.s4 = st.s0
|
||||
st.s5 = x86._mm_xor_si128(key, st.s2) // key ^ C0
|
||||
st.s6 = x86._mm_xor_si128(key, st.s1) // key ^ C1
|
||||
st.s7 = st.s5
|
||||
st.rate = _RATE_128L
|
||||
|
||||
for _ in 0 ..< 10 {
|
||||
update_hw_128l(st, iv, key)
|
||||
}
|
||||
case KEY_SIZE_256:
|
||||
k0 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0]))
|
||||
k1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[16]))
|
||||
n0 := intrinsics.unaligned_load((^x86.__m128i)(&iv[0]))
|
||||
n1 := intrinsics.unaligned_load((^x86.__m128i)(&iv[16]))
|
||||
|
||||
st.s0 = x86._mm_xor_si128(k0, n0)
|
||||
st.s1 = x86._mm_xor_si128(k1, n1)
|
||||
st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0]))
|
||||
st.s3 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0]))
|
||||
st.s4 = x86._mm_xor_si128(k0, st.s3) // k0 ^ C0
|
||||
st.s5 = x86._mm_xor_si128(k1, st.s2) // k1 ^ C1
|
||||
st.rate = _RATE_256
|
||||
|
||||
u0, u1 := st.s0, st.s1
|
||||
for _ in 0 ..< 4 {
|
||||
update_hw_256(st, k0)
|
||||
update_hw_256(st, k1)
|
||||
update_hw_256(st, u0)
|
||||
update_hw_256(st, u1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
update_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, m0, m1: x86.__m128i) {
|
||||
s0_ := x86._mm_aesenc_si128(st.s7, x86._mm_xor_si128(st.s0, m0))
|
||||
s1_ := x86._mm_aesenc_si128(st.s0, st.s1)
|
||||
s2_ := x86._mm_aesenc_si128(st.s1, st.s2)
|
||||
s3_ := x86._mm_aesenc_si128(st.s2, st.s3)
|
||||
s4_ := x86._mm_aesenc_si128(st.s3, x86._mm_xor_si128(st.s4, m1))
|
||||
s5_ := x86._mm_aesenc_si128(st.s4, st.s5)
|
||||
s6_ := x86._mm_aesenc_si128(st.s5, st.s6)
|
||||
s7_ := x86._mm_aesenc_si128(st.s6, st.s7)
|
||||
st.s0, st.s1, st.s2, st.s3, st.s4, st.s5, st.s6, st.s7 = s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
update_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, m: x86.__m128i) {
|
||||
s0_ := x86._mm_aesenc_si128(st.s5, x86._mm_xor_si128(st.s0, m))
|
||||
s1_ := x86._mm_aesenc_si128(st.s0, st.s1)
|
||||
s2_ := x86._mm_aesenc_si128(st.s1, st.s2)
|
||||
s3_ := x86._mm_aesenc_si128(st.s2, st.s3)
|
||||
s4_ := x86._mm_aesenc_si128(st.s3, st.s4)
|
||||
s5_ := x86._mm_aesenc_si128(st.s4, st.s5)
|
||||
st.s0, st.s1, st.s2, st.s3, st.s4, st.s5 = s0_, s1_, s2_, s3_, s4_, s5_
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
absorb_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) {
|
||||
t0 := intrinsics.unaligned_load((^x86.__m128i)(&ai[0]))
|
||||
t1 := intrinsics.unaligned_load((^x86.__m128i)(&ai[16]))
|
||||
update_hw_128l(st, t0, t1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
absorb_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) {
|
||||
m := intrinsics.unaligned_load((^x86.__m128i)(&ai[0]))
|
||||
update_hw_256(st, m)
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) #no_bounds_check {
|
||||
ai, l := aad, len(aad)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
absorb_hw_128l(st, ai)
|
||||
ai = ai[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
absorb_hw_256(st, ai)
|
||||
|
||||
ai = ai[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Pad out the remainder with `0`s till it is rate sized.
|
||||
if l > 0 {
|
||||
tmp: [_RATE_MAX]byte // AAD is not confidential.
|
||||
copy(tmp[:], ai)
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
absorb_hw_128l(st, tmp[:])
|
||||
case _RATE_256:
|
||||
absorb_hw_256(st, tmp[:])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2", require_results)
|
||||
z_hw_128l :: #force_inline proc "contextless" (st: ^State_HW) -> (x86.__m128i, x86.__m128i) {
|
||||
z0 := x86._mm_xor_si128(
|
||||
st.s6,
|
||||
x86._mm_xor_si128(
|
||||
st.s1,
|
||||
x86._mm_and_si128(st.s2, st.s3),
|
||||
),
|
||||
)
|
||||
z1 := x86._mm_xor_si128(
|
||||
st.s2,
|
||||
x86._mm_xor_si128(
|
||||
st.s5,
|
||||
x86._mm_and_si128(st.s6, st.s7),
|
||||
),
|
||||
)
|
||||
return z0, z1
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2", require_results)
|
||||
z_hw_256 :: #force_inline proc "contextless" (st: ^State_HW) -> x86.__m128i {
|
||||
return x86._mm_xor_si128(
|
||||
st.s1,
|
||||
x86._mm_xor_si128(
|
||||
st.s4,
|
||||
x86._mm_xor_si128(
|
||||
st.s5,
|
||||
x86._mm_and_si128(st.s2, st.s3),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
enc_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check {
|
||||
z0, z1 := z_hw_128l(st)
|
||||
|
||||
t0 := intrinsics.unaligned_load((^x86.__m128i)(&xi[0]))
|
||||
t1 := intrinsics.unaligned_load((^x86.__m128i)(&xi[16]))
|
||||
update_hw_128l(st, t0, t1)
|
||||
|
||||
out0 := x86._mm_xor_si128(t0, z0)
|
||||
out1 := x86._mm_xor_si128(t1, z1)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ci[0]), out0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ci[16]), out1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
enc_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check {
|
||||
z := z_hw_256(st)
|
||||
|
||||
xi_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(xi)))
|
||||
update_hw_256(st, xi_)
|
||||
|
||||
ci_ := x86._mm_xor_si128(xi_, z)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(ci)), ci_)
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check {
|
||||
ci, xi, l := dst, src, len(src)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
enc_hw_128l(st, ci, xi)
|
||||
ci = ci[_RATE_128L:]
|
||||
xi = xi[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
enc_hw_256(st, ci, xi)
|
||||
ci = ci[_RATE_256:]
|
||||
xi = xi[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Pad out the remainder with `0`s till it is rate sized.
|
||||
if l > 0 {
|
||||
tmp: [_RATE_MAX]byte // Ciphertext is not confidential.
|
||||
copy(tmp[:], xi)
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
enc_hw_128l(st, tmp[:], tmp[:])
|
||||
case _RATE_256:
|
||||
enc_hw_256(st, tmp[:], tmp[:])
|
||||
}
|
||||
copy(ci, tmp[:l])
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
dec_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check {
|
||||
z0, z1 := z_hw_128l(st)
|
||||
|
||||
t0 := intrinsics.unaligned_load((^x86.__m128i)(&ci[0]))
|
||||
t1 := intrinsics.unaligned_load((^x86.__m128i)(&ci[16]))
|
||||
out0 := x86._mm_xor_si128(t0, z0)
|
||||
out1 := x86._mm_xor_si128(t1, z1)
|
||||
|
||||
update_hw_128l(st, out0, out1)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&xi[0]), out0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&xi[16]), out1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
dec_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check {
|
||||
z := z_hw_256(st)
|
||||
|
||||
ci_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(ci)))
|
||||
xi_ := x86._mm_xor_si128(ci_, z)
|
||||
|
||||
update_hw_256(st, xi_)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(xi)), xi_)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
dec_partial_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check {
|
||||
tmp: [_RATE_128L]byte
|
||||
defer mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
z0, z1 := z_hw_128l(st)
|
||||
copy(tmp[:], cn)
|
||||
|
||||
t0 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
|
||||
t1 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[16]))
|
||||
out0 := x86._mm_xor_si128(t0, z0)
|
||||
out1 := x86._mm_xor_si128(t1, z1)
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), out0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tmp[16]), out1)
|
||||
copy(xn, tmp[:])
|
||||
|
||||
for off := len(xn); off < _RATE_128L; off += 1 {
|
||||
tmp[off] = 0
|
||||
}
|
||||
out0 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) // v0
|
||||
out1 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[16])) // v1
|
||||
update_hw_128l(st, out0, out1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
dec_partial_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check {
|
||||
tmp: [_RATE_256]byte
|
||||
defer mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
z := z_hw_256(st)
|
||||
copy(tmp[:], cn)
|
||||
|
||||
cn_ := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
|
||||
xn_ := x86._mm_xor_si128(cn_, z)
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), xn_)
|
||||
copy(xn, tmp[:])
|
||||
|
||||
for off := len(xn); off < _RATE_256; off += 1 {
|
||||
tmp[off] = 0
|
||||
}
|
||||
xn_ = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
|
||||
update_hw_256(st, xn_)
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check {
|
||||
xi, ci, l := dst, src, len(src)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
dec_hw_128l(st, xi, ci)
|
||||
xi = xi[_RATE_128L:]
|
||||
ci = ci[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
dec_hw_256(st, xi, ci)
|
||||
xi = xi[_RATE_256:]
|
||||
ci = ci[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Process the remainder.
|
||||
if l > 0 {
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
dec_partial_hw_128l(st, xi, ci)
|
||||
case _RATE_256:
|
||||
dec_partial_hw_256(st, xi, ci)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) {
|
||||
tmp: [16]byte
|
||||
endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8)
|
||||
endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8)
|
||||
|
||||
t := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
|
||||
|
||||
t0, t1: x86.__m128i = ---, ---
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
t = x86._mm_xor_si128(st.s2, t)
|
||||
for _ in 0 ..< 7 {
|
||||
update_hw_128l(st, t, t)
|
||||
}
|
||||
|
||||
t0 = x86._mm_xor_si128(st.s0, st.s1)
|
||||
t0 = x86._mm_xor_si128(t0, st.s2)
|
||||
t0 = x86._mm_xor_si128(t0, st.s3)
|
||||
|
||||
t1 = x86._mm_xor_si128(st.s4, st.s5)
|
||||
t1 = x86._mm_xor_si128(t1, st.s6)
|
||||
if len(tag) == TAG_SIZE_256 {
|
||||
t1 = x86._mm_xor_si128(t1, st.s7)
|
||||
}
|
||||
case _RATE_256:
|
||||
t = x86._mm_xor_si128(st.s3, t)
|
||||
for _ in 0 ..< 7 {
|
||||
update_hw_256(st, t)
|
||||
}
|
||||
|
||||
t0 = x86._mm_xor_si128(st.s0, st.s1)
|
||||
t0 = x86._mm_xor_si128(t0, st.s2)
|
||||
|
||||
t1 = x86._mm_xor_si128(st.s3, st.s4)
|
||||
t1 = x86._mm_xor_si128(t1, st.s5)
|
||||
}
|
||||
switch len(tag) {
|
||||
case TAG_SIZE_128:
|
||||
t0 = x86._mm_xor_si128(t0, t1)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0)
|
||||
case TAG_SIZE_256:
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tag[16]), t1)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
reset_state_hw :: proc "contextless" (st: ^State_HW) {
|
||||
mem.zero_explicit(st, size_of(st^))
|
||||
}
|
||||
@@ -21,9 +21,7 @@ Context_CTR :: struct {
|
||||
|
||||
// init_ctr initializes a Context_CTR with the provided key and IV.
|
||||
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
if len(iv) != CTR_IV_SIZE {
|
||||
panic("crypto/aes: invalid CTR IV size")
|
||||
}
|
||||
ensure(len(iv) == CTR_IV_SIZE, "crypto/aes: invalid CTR IV size")
|
||||
|
||||
init_impl(&ctx._impl, key, impl)
|
||||
ctx._off = BLOCK_SIZE
|
||||
@@ -36,16 +34,14 @@ init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTAT
|
||||
// keystream, and writes the resulting output to dst. dst and src MUST
|
||||
// alias exactly or not at all.
|
||||
xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
src, dst := src, dst
|
||||
if dst_len := len(dst); dst_len < len(src) {
|
||||
src = src[:dst_len]
|
||||
}
|
||||
|
||||
if bytes.alias_inexactly(dst, src) {
|
||||
panic("crypto/aes: dst and src alias inexactly")
|
||||
}
|
||||
ensure(!bytes.alias_inexactly(dst, src), "crypto/aes: dst and src alias inexactly")
|
||||
|
||||
#no_bounds_check for remaining := len(src); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
@@ -82,7 +78,7 @@ xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
|
||||
|
||||
// keystream_bytes_ctr fills dst with the raw AES-CTR keystream output.
|
||||
keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
dst := dst
|
||||
#no_bounds_check for remaining := len(dst); remaining > 0; {
|
||||
|
||||
@@ -19,11 +19,9 @@ init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := DEFAULT_IMPLEMENTATION)
|
||||
|
||||
// encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst.
|
||||
encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
|
||||
panic("crypto/aes: invalid buffer size(s)")
|
||||
}
|
||||
ensure(ctx._is_initialized)
|
||||
ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid dst size")
|
||||
ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid src size")
|
||||
|
||||
switch &impl in ctx._impl {
|
||||
case ct64.Context:
|
||||
@@ -35,11 +33,9 @@ encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
|
||||
|
||||
// decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst.
|
||||
decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
|
||||
panic("crypto/aes: invalid buffer size(s)")
|
||||
}
|
||||
ensure(ctx._is_initialized)
|
||||
ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid dst size")
|
||||
ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid src size")
|
||||
|
||||
switch &impl in ctx._impl {
|
||||
case ct64.Context:
|
||||
|
||||
@@ -36,15 +36,11 @@ init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := DEFAULT_IMPLEMENTATION)
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
gcm_validate_common_slice_sizes(tag, iv, aad, plaintext)
|
||||
if len(dst) != len(plaintext) {
|
||||
panic("crypto/aes: invalid destination ciphertext size")
|
||||
}
|
||||
if bytes.alias_inexactly(dst, plaintext) {
|
||||
panic("crypto/aes: dst and plaintext alias inexactly")
|
||||
}
|
||||
ensure(len(dst) == len(plaintext), "crypto/aes: invalid destination ciphertext size")
|
||||
ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aes: dst and plaintext alias inexactly")
|
||||
|
||||
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
|
||||
gcm_seal_hw(&impl, dst, tag, iv, aad, plaintext)
|
||||
@@ -76,15 +72,11 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open_gcm :: proc(ctx: ^Context_GCM, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
gcm_validate_common_slice_sizes(tag, iv, aad, ciphertext)
|
||||
if len(dst) != len(ciphertext) {
|
||||
panic("crypto/aes: invalid destination plaintext size")
|
||||
}
|
||||
if bytes.alias_inexactly(dst, ciphertext) {
|
||||
panic("crypto/aes: dst and ciphertext alias inexactly")
|
||||
}
|
||||
ensure(len(dst) == len(ciphertext), "crypto/aes: invalid destination plaintext size")
|
||||
ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aes: dst and ciphertext alias inexactly")
|
||||
|
||||
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
|
||||
return gcm_open_hw(&impl, dst, iv, aad, ciphertext, tag)
|
||||
@@ -122,21 +114,13 @@ reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
|
||||
|
||||
@(private = "file")
|
||||
gcm_validate_common_slice_sizes :: proc(tag, iv, aad, text: []byte) {
|
||||
if len(tag) != GCM_TAG_SIZE {
|
||||
panic("crypto/aes: invalid GCM tag size")
|
||||
}
|
||||
ensure(len(tag) == GCM_TAG_SIZE, "crypto/aes: invalid GCM tag size")
|
||||
|
||||
// The specification supports IVs in the range [1, 2^64) bits.
|
||||
if l := len(iv); l == 0 || u64(l) >= GCM_IV_SIZE_MAX {
|
||||
panic("crypto/aes: invalid GCM IV size")
|
||||
}
|
||||
ensure(len(iv) == 0 || u64(len(iv)) <= GCM_IV_SIZE_MAX, "crypto/aes: invalid GCM IV size")
|
||||
|
||||
if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
|
||||
panic("crypto/aes: oversized GCM aad")
|
||||
}
|
||||
if text_len := u64(len(text)); text_len > GCM_P_MAX {
|
||||
panic("crypto/aes: oversized GCM src data")
|
||||
}
|
||||
ensure(u64(len(aad)) <= GCM_A_MAX, "crypto/aes: oversized GCM aad")
|
||||
ensure(u64(len(text)) <= GCM_P_MAX, "crypto/aes: oversized GCM data")
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
|
||||
@@ -235,7 +235,7 @@ gctr_hw :: proc(
|
||||
// BUG: Sticking this in gctr_hw (like the other implementations) crashes
|
||||
// the compiler.
|
||||
//
|
||||
// src/check_expr.cpp(7892): Assertion Failure: `c->curr_proc_decl->entity`
|
||||
// src/check_expr.cpp(8104): Assertion Failure: `c->curr_proc_decl->entity`
|
||||
@(private = "file", enable_target_feature = "sse4.1")
|
||||
hw_inc_ctr32 :: #force_inline proc "contextless" (src: ^x86.__m128i, ctr: u32) -> (x86.__m128i, u32) {
|
||||
ret := x86._mm_insert_epi32(src^, i32(intrinsics.byte_swap(ctr)), 3)
|
||||
|
||||
@@ -18,7 +18,7 @@ package blake2b
|
||||
import "../_blake2"
|
||||
|
||||
// DIGEST_SIZE is the BLAKE2b digest size in bytes.
|
||||
DIGEST_SIZE :: 64
|
||||
DIGEST_SIZE :: _blake2.BLAKE2B_SIZE
|
||||
|
||||
// BLOCK_SIZE is the BLAKE2b block size in bytes.
|
||||
BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE
|
||||
@@ -27,9 +27,11 @@ BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE
|
||||
Context :: _blake2.Blake2b_Context
|
||||
|
||||
// init initializes a Context with the default BLAKE2b config.
|
||||
init :: proc(ctx: ^Context) {
|
||||
init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) {
|
||||
ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2b: invalid digest size")
|
||||
|
||||
cfg: _blake2.Blake2_Config
|
||||
cfg.size = _blake2.BLAKE2B_SIZE
|
||||
cfg.size = u8(digest_size)
|
||||
_blake2.init(ctx, &cfg)
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ package blake2s
|
||||
import "../_blake2"
|
||||
|
||||
// DIGEST_SIZE is the BLAKE2s digest size in bytes.
|
||||
DIGEST_SIZE :: 32
|
||||
DIGEST_SIZE :: _blake2.BLAKE2S_SIZE
|
||||
|
||||
// BLOCK_SIZE is the BLAKE2s block size in bytes.
|
||||
BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE
|
||||
@@ -27,9 +27,11 @@ BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE
|
||||
Context :: _blake2.Blake2s_Context
|
||||
|
||||
// init initializes a Context with the default BLAKE2s config.
|
||||
init :: proc(ctx: ^Context) {
|
||||
init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) {
|
||||
ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2s: invalid digest size")
|
||||
|
||||
cfg: _blake2.Blake2_Config
|
||||
cfg.size = _blake2.BLAKE2S_SIZE
|
||||
cfg.size = u8(digest_size)
|
||||
_blake2.init(ctx, &cfg)
|
||||
}
|
||||
|
||||
|
||||
@@ -27,12 +27,8 @@ Context :: struct {
|
||||
// init inititializes a Context for ChaCha20 or XChaCha20 with the provided
|
||||
// key and iv.
|
||||
init :: proc(ctx: ^Context, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/chacha20: invalid (X)ChaCha20 key size")
|
||||
}
|
||||
if l := len(iv); l != IV_SIZE && l != XIV_SIZE {
|
||||
panic("crypto/chacha20: invalid (X)ChaCha20 IV size")
|
||||
}
|
||||
ensure(len(key) == KEY_SIZE, "crypto/chacha20: invalid (X)ChaCha20 key size")
|
||||
ensure(len(iv) == IV_SIZE || len(iv) == XIV_SIZE, "crypto/chacha20: invalid (X)ChaCha20 IV size")
|
||||
|
||||
k, n := key, iv
|
||||
|
||||
@@ -67,16 +63,14 @@ seek :: proc(ctx: ^Context, block_nr: u64) {
|
||||
// keystream, and writes the resulting output to dst. Dst and src MUST
|
||||
// alias exactly or not at all.
|
||||
xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
assert(ctx._state._is_initialized)
|
||||
ensure(ctx._state._is_initialized)
|
||||
|
||||
src, dst := src, dst
|
||||
if dst_len := len(dst); dst_len < len(src) {
|
||||
src = src[:dst_len]
|
||||
}
|
||||
|
||||
if bytes.alias_inexactly(dst, src) {
|
||||
panic("crypto/chacha20: dst and src alias inexactly")
|
||||
}
|
||||
ensure(!bytes.alias_inexactly(dst, src), "crypto/chacha20: dst and src alias inexactly")
|
||||
|
||||
st := &ctx._state
|
||||
#no_bounds_check for remaining := len(src); remaining > 0; {
|
||||
@@ -114,7 +108,7 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
|
||||
// keystream_bytes fills dst with the raw (X)ChaCha20 keystream output.
|
||||
keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
|
||||
assert(ctx._state._is_initialized)
|
||||
ensure(ctx._state._is_initialized)
|
||||
|
||||
dst, st := dst, &ctx._state
|
||||
#no_bounds_check for remaining := len(dst); remaining > 0; {
|
||||
|
||||
@@ -29,13 +29,9 @@ _P_MAX :: 64 * 0xffffffff // 64 * (2^32-1)
|
||||
|
||||
@(private)
|
||||
_validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bool) {
|
||||
if len(tag) != TAG_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid destination tag size")
|
||||
}
|
||||
expected_iv_len := is_xchacha ? XIV_SIZE : IV_SIZE
|
||||
if len(iv) != expected_iv_len {
|
||||
panic("crypto/chacha20poly1305: invalid IV size")
|
||||
}
|
||||
ensure(len(tag) == TAG_SIZE, "crypto/chacha20poly1305: invalid destination tag size")
|
||||
ensure(len(iv) == expected_iv_len, "crypto/chacha20poly1305: invalid IV size")
|
||||
|
||||
#assert(size_of(int) == 8 || size_of(int) <= 4)
|
||||
when size_of(int) == 8 {
|
||||
@@ -45,13 +41,11 @@ _validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bo
|
||||
// A_MAX is limited by size_of(int), so there is no need to
|
||||
// enforce it. P_MAX only needs to be checked on 64-bit targets,
|
||||
// for reasons that should be obvious.
|
||||
if text_len := len(text); text_len > _P_MAX {
|
||||
panic("crypto/chacha20poly1305: oversized src data")
|
||||
}
|
||||
ensure(len(text) <= _P_MAX, "crypto/chacha20poly1305: oversized src data")
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
_PAD: [16]byte
|
||||
|
||||
@(private)
|
||||
@@ -71,9 +65,7 @@ Context :: struct {
|
||||
|
||||
// init initializes a Context with the provided key, for AEAD_CHACHA20_POLY1305.
|
||||
init :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) {
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid key size")
|
||||
}
|
||||
ensure(len(key) == KEY_SIZE, "crypto/chacha20poly1305: invalid key size")
|
||||
|
||||
copy(ctx._key[:], key)
|
||||
ctx._impl = impl
|
||||
@@ -96,11 +88,11 @@ init_xchacha :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEM
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
ciphertext := dst
|
||||
_validate_common_slice_sizes(tag, iv, aad, plaintext, ctx._is_xchacha)
|
||||
if len(ciphertext) != len(plaintext) {
|
||||
panic("crypto/chacha20poly1305: invalid destination ciphertext size")
|
||||
}
|
||||
ensure(len(ciphertext) == len(plaintext), "crypto/chacha20poly1305: invalid destination ciphertext size")
|
||||
|
||||
stream_ctx: chacha20.Context = ---
|
||||
chacha20.init(&stream_ctx, ctx._key[:],iv, ctx._impl)
|
||||
@@ -151,11 +143,11 @@ seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
plaintext := dst
|
||||
_validate_common_slice_sizes(tag, iv, aad, ciphertext, ctx._is_xchacha)
|
||||
if len(ciphertext) != len(plaintext) {
|
||||
panic("crypto/chacha20poly1305: invalid destination plaintext size")
|
||||
}
|
||||
ensure(len(ciphertext) == len(plaintext), "crypto/chacha20poly1305: invalid destination plaintext size")
|
||||
|
||||
// Note: Unlike encrypt, this can fail early, so use defer for
|
||||
// sanitization rather than assuming control flow reaches certain
|
||||
|
||||
@@ -0,0 +1,280 @@
|
||||
/*
|
||||
package deoxysii implements the Deoxys-II-256 Authenticated Encryption
|
||||
with Additional Data algorithm.
|
||||
|
||||
- [[ https://sites.google.com/view/deoxyscipher ]]
|
||||
- [[ https://thomaspeyrin.github.io/web/assets/docs/papers/Jean-etal-JoC2021.pdf ]]
|
||||
*/
|
||||
package deoxysii
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:bytes"
|
||||
import "core:crypto/aes"
|
||||
import "core:mem"
|
||||
import "core:simd"
|
||||
|
||||
// KEY_SIZE is the Deoxys-II-256 key size in bytes.
|
||||
KEY_SIZE :: 32
|
||||
// IV_SIZE iss the Deoxys-II-256 IV size in bytes.
|
||||
IV_SIZE :: 15 // 120-bits
|
||||
// TAG_SIZE is the Deoxys-II-256 tag size in bytes.
|
||||
TAG_SIZE :: 16
|
||||
|
||||
@(private)
|
||||
PREFIX_AD_BLOCK :: 0b0010
|
||||
@(private)
|
||||
PREFIX_AD_FINAL :: 0b0110
|
||||
@(private)
|
||||
PREFIX_MSG_BLOCK :: 0b0000
|
||||
@(private)
|
||||
PREFIX_MSG_FINAL :: 0b0100
|
||||
@(private)
|
||||
PREFIX_TAG :: 0b0001
|
||||
@(private)
|
||||
PREFIX_SHIFT :: 4
|
||||
|
||||
@(private)
|
||||
BC_ROUNDS :: 16
|
||||
@(private)
|
||||
BLOCK_SIZE :: aes.BLOCK_SIZE
|
||||
|
||||
@(private = "file")
|
||||
_LFSR2_MASK :: simd.u8x16{
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
}
|
||||
@(private = "file")
|
||||
_LFSR3_MASK :: simd.u8x16{
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
}
|
||||
@(private = "file")
|
||||
_LFSR_SH1 :: _LFSR2_MASK
|
||||
@(private = "file")
|
||||
_LFSR_SH5 :: simd.u8x16{
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
|
||||
}
|
||||
@(private = "file")
|
||||
_LFSR_SH7 :: simd.u8x16{
|
||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
|
||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
|
||||
}
|
||||
@(private = "file", rodata)
|
||||
_RCONS := []byte {
|
||||
0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a,
|
||||
0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,
|
||||
0x72,
|
||||
}
|
||||
|
||||
// Context is a keyed Deoxys-II-256 instance.
|
||||
Context :: struct {
|
||||
_subkeys: [BC_ROUNDS+1][16]byte,
|
||||
_impl: aes.Implementation,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
|
||||
@(private)
|
||||
_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) {
|
||||
ensure(len(tag) == TAG_SIZE, "crypto/deoxysii: invalid tag size")
|
||||
ensure(len(iv) == IV_SIZE, "crypto/deoxysii: invalid IV size")
|
||||
|
||||
#assert(size_of(int) == 8 || size_of(int) <= 4)
|
||||
// For the nonce-misuse resistant mode, the total size of the
|
||||
// associated data and the total size of the message do not exceed
|
||||
// `16 * 2^max_l * 2^max_m bytes`, thus 2^128 bytes for all variants
|
||||
// of Deoxys-II. Moreover, the maximum number of messages that can
|
||||
// be handled for a same key is 2^max_m, that is 2^64 for all variants
|
||||
// of Deoxys.
|
||||
}
|
||||
|
||||
// init initializes a Context with the provided key.
|
||||
init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) {
|
||||
ensure(len(key) == KEY_SIZE, "crypto/deoxysii: invalid key size")
|
||||
|
||||
ctx._impl = impl
|
||||
if ctx._impl == .Hardware && !is_hardware_accelerated() {
|
||||
ctx._impl = .Portable
|
||||
}
|
||||
|
||||
derive_ks(ctx, key)
|
||||
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seal encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
_validate_common_slice_sizes(ctx, tag, iv, aad, plaintext)
|
||||
ensure(len(dst) == len(plaintext), "crypto/deoxysii: invalid destination ciphertext size")
|
||||
ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/deoxysii: dst and plaintext alias inexactly")
|
||||
|
||||
switch ctx._impl {
|
||||
case .Hardware:
|
||||
e_hw(ctx, dst, tag, iv, aad, plaintext)
|
||||
case .Portable:
|
||||
e_ref(ctx, dst, tag, iv, aad, plaintext)
|
||||
}
|
||||
}
|
||||
|
||||
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
_validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext)
|
||||
ensure(len(dst) == len(ciphertext), "crypto/deoxysii: invalid destination plaintext size")
|
||||
ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/deoxysii: dst and ciphertext alias inexactly")
|
||||
|
||||
ok: bool
|
||||
switch ctx._impl {
|
||||
case .Hardware:
|
||||
ok = d_hw(ctx, dst, iv, aad, ciphertext, tag)
|
||||
case .Portable:
|
||||
ok = d_ref(ctx, dst, iv, aad, ciphertext, tag)
|
||||
}
|
||||
if !ok {
|
||||
mem.zero_explicit(raw_data(dst), len(ciphertext))
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be
|
||||
// re-initialized to be used again.
|
||||
reset :: proc "contextless" (ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._subkeys, len(ctx._subkeys))
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
derive_ks :: proc "contextless" (ctx: ^Context, key: []byte) {
|
||||
// Derive the constant component of each subtweakkey.
|
||||
//
|
||||
// The key schedule is as thus:
|
||||
//
|
||||
// STK_i = TK1_i ^ TK2_i ^ TK3_i ^ RC_i
|
||||
//
|
||||
// TK1_i = h(TK1_(i-1))
|
||||
// TK2_i = h(LFSR2(TK2_(i-1)))
|
||||
// TK3_i = h(LFSR3(TK2_(i-1)))
|
||||
//
|
||||
// where:
|
||||
//
|
||||
// KT = K || T
|
||||
// W3 = KT[:16]
|
||||
// W2 = KT[16:32]
|
||||
// W1 = KT[32:]
|
||||
//
|
||||
// TK1_0 = W1
|
||||
// TK2_0 = W2
|
||||
// TK3_0 = W3
|
||||
//
|
||||
// As `K` is fixed per Context, the XORs of `TK3_0 .. TK3_n`,
|
||||
// `TK2_0 .. TK2_n` and RC_i can be precomputed in advance like
|
||||
// thus:
|
||||
//
|
||||
// subkey_i = TK3_i ^ TK2_i ^ RC_i
|
||||
//
|
||||
// When it is time to actually call Deoxys-BC-384, it is then
|
||||
// a simple matter of deriving each round subtweakkey via:
|
||||
//
|
||||
// TK1_0 = T (Tweak)
|
||||
// STK_0 = subkey_0 ^ TK1_0
|
||||
// STK_i = subkey_i (precomputed) ^ H(TK1_(i-1))
|
||||
//
|
||||
// We opt to use SIMD here and for the subtweakkey deriviation
|
||||
// as `H()` is typically a single vector instruction.
|
||||
|
||||
tk2 := intrinsics.unaligned_load((^simd.u8x16)(raw_data(key[16:])))
|
||||
tk3 := intrinsics.unaligned_load((^simd.u8x16)(raw_data(key)))
|
||||
|
||||
// subkey_0 does not apply LFSR2/3 or H.
|
||||
intrinsics.unaligned_store(
|
||||
(^simd.u8x16)(&ctx._subkeys[0]),
|
||||
simd.bit_xor(
|
||||
tk2,
|
||||
simd.bit_xor(
|
||||
tk3,
|
||||
rcon(0),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
// Precompute k_1 .. k_16.
|
||||
for i in 1 ..< BC_ROUNDS+1 {
|
||||
tk2 = h(lfsr2(tk2))
|
||||
tk3 = h(lfsr3(tk3))
|
||||
intrinsics.unaligned_store(
|
||||
(^simd.u8x16)(&ctx._subkeys[i]),
|
||||
simd.bit_xor(
|
||||
tk2,
|
||||
simd.bit_xor(
|
||||
tk3,
|
||||
rcon(i),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
lfsr2 :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 {
|
||||
// LFSR2 is a application of the following LFSR to each byte of input.
|
||||
// (x7||x6||x5||x4||x3||x2||x1||x0) -> (x6||x5||x4||x3||x2||x1||x0||x7 ^ x5)
|
||||
return simd.bit_or(
|
||||
simd.shl(tk, _LFSR_SH1),
|
||||
simd.bit_and(
|
||||
simd.bit_xor(
|
||||
simd.shr(tk, _LFSR_SH7), // x7
|
||||
simd.shr(tk, _LFSR_SH5), // x5
|
||||
),
|
||||
_LFSR2_MASK,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
lfsr3 :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 {
|
||||
// LFSR3 is a application of the following LFSR to each byte of input.
|
||||
// (x7||x6||x5||x4||x3||x2||x1||x0) -> (x0 ^ x6||x7||x6||x5||x4||x3||x2||x1)
|
||||
return simd.bit_or(
|
||||
simd.shr(tk, _LFSR_SH1),
|
||||
simd.bit_and(
|
||||
simd.bit_xor(
|
||||
simd.shl(tk, _LFSR_SH7), // x0
|
||||
simd.shl(tk, _LFSR_SH1), // x6
|
||||
),
|
||||
_LFSR3_MASK,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
@(private)
|
||||
h :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 {
|
||||
return simd.swizzle(
|
||||
tk,
|
||||
0x01, 0x06, 0x0b, 0x0c, 0x05, 0x0a, 0x0f, 0x00,
|
||||
0x09, 0x0e, 0x03, 0x04, 0x0d, 0x02, 0x07, 0x08,
|
||||
)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
rcon :: #force_inline proc "contextless" (rd: int) -> simd.u8x16 #no_bounds_check {
|
||||
rc := _RCONS[rd]
|
||||
return simd.u8x16{
|
||||
1, 2, 4, 8,
|
||||
rc, rc, rc, rc,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,399 @@
|
||||
package deoxysii
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto"
|
||||
import aes "core:crypto/_aes/ct64"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
import "core:simd"
|
||||
|
||||
// This uses the bitlsiced 64-bit general purpose register SWAR AES
|
||||
// round function. The encryption pass skips orthogonalizing the
|
||||
// AES round function input as it is aways going to be the leading 0
|
||||
// padded IV, and doing a 64-byte copy is faster.
|
||||
|
||||
@(private = "file")
|
||||
TWEAK_SIZE :: 16
|
||||
|
||||
@(private = "file")
|
||||
State_SW :: struct {
|
||||
ctx: ^Context,
|
||||
q_stk, q_b: [8]u64,
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
auth_tweak :: #force_inline proc "contextless" (
|
||||
dst: ^[TWEAK_SIZE]byte,
|
||||
prefix: byte,
|
||||
block_nr: int,
|
||||
) {
|
||||
endian.unchecked_put_u64be(dst[8:], u64(block_nr))
|
||||
endian.unchecked_put_u64le(dst[0:], u64(prefix) << PREFIX_SHIFT) // dst[0] = prefix << PREFIX_SHIFT
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
enc_tweak :: #force_inline proc "contextless" (
|
||||
dst: ^[TWEAK_SIZE]byte,
|
||||
tag: ^[TAG_SIZE]byte,
|
||||
block_nr: int,
|
||||
) {
|
||||
tmp: [8]byte
|
||||
endian.unchecked_put_u64be(tmp[:], u64(block_nr))
|
||||
|
||||
copy(dst[:], tag[:])
|
||||
dst[0] |= 0x80
|
||||
for i in 0 ..< 8 {
|
||||
dst[i+8] ~= tmp[i]
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
enc_plaintext :: #force_inline proc "contextless" (
|
||||
dst: ^[8]u64,
|
||||
iv: []byte,
|
||||
) {
|
||||
tmp: [BLOCK_SIZE]byte = ---
|
||||
tmp[0] = 0
|
||||
copy(tmp[1:], iv[:])
|
||||
|
||||
q_0, q_1 := aes.load_interleaved(tmp[:])
|
||||
for i in 0 ..< 4 {
|
||||
dst[i], dst[i+4] = q_0, q_1
|
||||
}
|
||||
aes.orthogonalize(dst)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
bc_x4 :: proc "contextless" (
|
||||
ctx: ^Context,
|
||||
dst: []byte,
|
||||
tweaks: ^[4][TWEAK_SIZE]byte,
|
||||
q_stk: ^[8]u64,
|
||||
q_b: ^[8]u64, // Orthogonalized
|
||||
n: int,
|
||||
) {
|
||||
tk1s: [4]simd.u8x16
|
||||
for j in 0 ..< n {
|
||||
tk1s[j] = intrinsics.unaligned_load((^simd.u8x16)(&tweaks[j]))
|
||||
}
|
||||
|
||||
// Deoxys-BC-384
|
||||
for i in 0 ..= BC_ROUNDS {
|
||||
// Derive the round's subtweakkey
|
||||
sk := intrinsics.unaligned_load((^simd.u8x16)(&ctx._subkeys[i]))
|
||||
for j in 0 ..< n {
|
||||
if i != 0 {
|
||||
tk1s[j] = h(tk1s[j])
|
||||
}
|
||||
intrinsics.unaligned_store(
|
||||
(^simd.u8x16)(raw_data(dst)),
|
||||
simd.bit_xor(sk, tk1s[j]),
|
||||
)
|
||||
q_stk[j], q_stk[j+4] = aes.load_interleaved(dst[:])
|
||||
}
|
||||
aes.orthogonalize(q_stk)
|
||||
|
||||
if i != 0 {
|
||||
aes.sub_bytes(q_b)
|
||||
aes.shift_rows(q_b)
|
||||
aes.mix_columns(q_b)
|
||||
}
|
||||
aes.add_round_key(q_b, q_stk[:])
|
||||
}
|
||||
|
||||
aes.orthogonalize(q_b)
|
||||
for i in 0 ..< n {
|
||||
aes.store_interleaved(dst[i*BLOCK_SIZE:], q_b[i], q_b[i+4])
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", require_results)
|
||||
bc_absorb :: proc "contextless" (
|
||||
st: ^State_SW,
|
||||
dst: []byte,
|
||||
src: []byte,
|
||||
tweak_prefix: byte,
|
||||
stk_block_nr: int,
|
||||
) -> int {
|
||||
tweaks: [4][TWEAK_SIZE]byte = ---
|
||||
tmp: [BLOCK_SIZE*4]byte = ---
|
||||
|
||||
src, stk_block_nr := src, stk_block_nr
|
||||
dst_ := intrinsics.unaligned_load((^simd.u8x16)(raw_data(dst)))
|
||||
|
||||
nr_blocks := len(src) / BLOCK_SIZE
|
||||
for nr_blocks > 0 {
|
||||
// Derive the tweak(s), orthogonalize the plaintext
|
||||
n := min(nr_blocks, 4)
|
||||
for i in 0 ..< n {
|
||||
auth_tweak(&tweaks[i], tweak_prefix, stk_block_nr + i)
|
||||
st.q_b[i], st.q_b[i + 4] = aes.load_interleaved(src)
|
||||
src = src[BLOCK_SIZE:]
|
||||
}
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
// Deoxys-BC-384
|
||||
bc_x4(st.ctx, tmp[:], &tweaks, &st.q_stk, &st.q_b, n)
|
||||
|
||||
// XOR in the existing Auth/tag
|
||||
for i in 0 ..< n {
|
||||
dst_ = simd.bit_xor(
|
||||
dst_,
|
||||
intrinsics.unaligned_load((^simd.u8x16)(raw_data(tmp[i*BLOCK_SIZE:]))),
|
||||
)
|
||||
}
|
||||
|
||||
stk_block_nr += n
|
||||
nr_blocks -= n
|
||||
}
|
||||
|
||||
intrinsics.unaligned_store((^simd.u8x16)(raw_data(dst)), dst_)
|
||||
|
||||
mem.zero_explicit(&tweaks, size_of(tweaks))
|
||||
mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
return stk_block_nr
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
bc_final :: proc "contextless" (
|
||||
st: ^State_SW,
|
||||
dst: []byte,
|
||||
iv: []byte,
|
||||
) {
|
||||
tweaks: [4][TWEAK_SIZE]byte = ---
|
||||
|
||||
tweaks[0][0] = PREFIX_TAG << PREFIX_SHIFT
|
||||
copy(tweaks[0][1:], iv)
|
||||
|
||||
st.q_b[0], st.q_b[4] = aes.load_interleaved(dst)
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
bc_x4(st.ctx, dst, &tweaks, &st.q_stk, &st.q_b, 1)
|
||||
}
|
||||
|
||||
@(private = "file", require_results)
|
||||
bc_encrypt :: proc "contextless" (
|
||||
st: ^State_SW,
|
||||
dst: []byte,
|
||||
src: []byte,
|
||||
q_n: ^[8]u64, // Orthogonalized
|
||||
tweak_tag: ^[TAG_SIZE]byte,
|
||||
stk_block_nr: int,
|
||||
) -> int {
|
||||
tweaks: [4][TWEAK_SIZE]byte = ---
|
||||
tmp: [BLOCK_SIZE*4]byte = ---
|
||||
|
||||
dst, src, stk_block_nr := dst, src, stk_block_nr
|
||||
|
||||
nr_blocks := len(src) / BLOCK_SIZE
|
||||
for nr_blocks > 0 {
|
||||
// Derive the tweak(s)
|
||||
n := min(nr_blocks, 4)
|
||||
for i in 0 ..< n {
|
||||
enc_tweak(&tweaks[i], tweak_tag, stk_block_nr + i)
|
||||
}
|
||||
st.q_b = q_n^ // The plaintext is always `0^8 || N`
|
||||
|
||||
// Deoxys-BC-384
|
||||
bc_x4(st.ctx, tmp[:], &tweaks, &st.q_stk, &st.q_b, n)
|
||||
|
||||
// XOR the ciphertext
|
||||
for i in 0 ..< n {
|
||||
intrinsics.unaligned_store(
|
||||
(^simd.u8x16)(raw_data(dst[i*BLOCK_SIZE:])),
|
||||
simd.bit_xor(
|
||||
intrinsics.unaligned_load((^simd.u8x16)(raw_data(src[i*BLOCK_SIZE:]))),
|
||||
intrinsics.unaligned_load((^simd.u8x16)(raw_data(tmp[i*BLOCK_SIZE:]))),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
dst, src = dst[n*BLOCK_SIZE:], src[n*BLOCK_SIZE:]
|
||||
stk_block_nr += n
|
||||
nr_blocks -= n
|
||||
}
|
||||
|
||||
mem.zero_explicit(&tweaks, size_of(tweaks))
|
||||
mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
return stk_block_nr
|
||||
}
|
||||
|
||||
@(private)
|
||||
e_ref :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check {
|
||||
st: State_SW = ---
|
||||
st.ctx = ctx
|
||||
|
||||
// Algorithm 3
|
||||
//
|
||||
// Associated data
|
||||
// A_1 || ... || A_la || A_∗ <- A where each |A_i| = n and |A_∗| < n
|
||||
// Auth <- 0^n
|
||||
// for i = 0 to la − 1 do
|
||||
// Auth <- Auth ^ EK(0010 || i, A_i+1)
|
||||
// end
|
||||
// if A_∗ != nil then
|
||||
// Auth <- Auth ^ EK(0110 || la, pad10∗(A_∗))
|
||||
// end
|
||||
auth: [TAG_SIZE]byte
|
||||
aad := aad
|
||||
n := bc_absorb(&st, auth[:], aad, PREFIX_AD_BLOCK, 0)
|
||||
aad = aad[n*BLOCK_SIZE:]
|
||||
if l := len(aad); l > 0 {
|
||||
a_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(a_star[:], aad)
|
||||
a_star[l] = 0x80
|
||||
|
||||
_ = bc_absorb(&st, auth[:], a_star[:], PREFIX_AD_FINAL, n)
|
||||
}
|
||||
|
||||
// Message authentication and tag generation
|
||||
// M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n
|
||||
// tag <- Auth
|
||||
// for j = 0 to l − 1 do
|
||||
// tag <- tag ^ EK(0000 || j, M_j+1)
|
||||
// end
|
||||
// if M_∗ != nil then
|
||||
// tag <- tag ^ EK(0100 || l, pad10∗(M_∗))
|
||||
// end
|
||||
// tag <- EK(0001 || 0^4 || N, tag)
|
||||
m := plaintext
|
||||
n = bc_absorb(&st, auth[:], m, PREFIX_MSG_BLOCK, 0)
|
||||
m = m[n*BLOCK_SIZE:]
|
||||
if l := len(m); l > 0 {
|
||||
m_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(m_star[:], m)
|
||||
m_star[l] = 0x80
|
||||
|
||||
_ = bc_absorb(&st, auth[:], m_star[:], PREFIX_MSG_FINAL, n)
|
||||
}
|
||||
bc_final(&st, auth[:], iv)
|
||||
|
||||
// Message encryption
|
||||
// for j = 0 to l − 1 do
|
||||
// C_j <- M_j ^ EK(1 || tag ^ j, 0^8 || N)
|
||||
// end
|
||||
// if M_∗ != nil then
|
||||
// C_∗ <- M_* ^ EK(1 || tag ^ l, 0^8 || N)
|
||||
// end
|
||||
//
|
||||
// return (C_1 || ... || C_l || C_∗, tag)
|
||||
q_iv: [8]u64 = ---
|
||||
enc_plaintext(&q_iv, iv)
|
||||
|
||||
m = plaintext
|
||||
n = bc_encrypt(&st, dst, m, &q_iv, &auth, 0)
|
||||
m = m[n*BLOCK_SIZE:]
|
||||
if l := len(m); l > 0 {
|
||||
m_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(m_star[:], m)
|
||||
_ = bc_encrypt(&st, m_star[:], m_star[:], &q_iv, &auth, n)
|
||||
|
||||
copy(dst[n*BLOCK_SIZE:], m_star[:])
|
||||
|
||||
mem.zero_explicit(&m_star, size_of(m_star))
|
||||
}
|
||||
|
||||
copy(tag, auth[:])
|
||||
|
||||
mem.zero_explicit(&st.q_stk, size_of(st.q_stk))
|
||||
mem.zero_explicit(&st.q_b, size_of(st.q_b))
|
||||
}
|
||||
|
||||
@(private, require_results)
|
||||
d_ref :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
st: State_SW = ---
|
||||
st.ctx = ctx
|
||||
|
||||
// Algorithm 4
|
||||
//
|
||||
// Message decryption
|
||||
// C_1 || ... || C_l || C_∗ <- C where each |C_j| = n and |C_∗| < n
|
||||
// for j = 0 to l − 1 do
|
||||
// M_j <- C_j ^ EK(1 || tag ^ j, 0^8 || N)
|
||||
// end
|
||||
// if C_∗ != nil then
|
||||
// M_∗ <- C_∗ ^ EK(1 || tag ^ l, 0^8 || N)
|
||||
// end
|
||||
q_iv: [8]u64 = ---
|
||||
enc_plaintext(&q_iv, iv)
|
||||
|
||||
auth: [TAG_SIZE]byte
|
||||
copy(auth[:], tag)
|
||||
|
||||
m := ciphertext
|
||||
n := bc_encrypt(&st, dst, m, &q_iv, &auth, 0)
|
||||
m = m[n*BLOCK_SIZE:]
|
||||
if l := len(m); l > 0 {
|
||||
m_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(m_star[:], m)
|
||||
_ = bc_encrypt(&st, m_star[:], m_star[:], &q_iv, &auth, n)
|
||||
|
||||
copy(dst[n*BLOCK_SIZE:], m_star[:])
|
||||
|
||||
mem.zero_explicit(&m_star, size_of(m_star))
|
||||
}
|
||||
|
||||
// Associated data
|
||||
// A_1 || ... || Al_a || A_∗ <- A where each |Ai_| = n and |A_∗| < n
|
||||
// Auth <- 0
|
||||
// for i = 0 to la − 1 do
|
||||
// Auth <- Auth ^ EK(0010 || i, A_i+1)
|
||||
// end
|
||||
// if A∗ != nil then
|
||||
// Auth <- Auth ^ EK(0110| | l_a, pad10∗(A_∗))
|
||||
// end
|
||||
auth = 0
|
||||
aad := aad
|
||||
n = bc_absorb(&st, auth[:], aad, PREFIX_AD_BLOCK, 0)
|
||||
aad = aad[n*BLOCK_SIZE:]
|
||||
if l := len(aad); l > 0 {
|
||||
a_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(a_star[:], aad)
|
||||
a_star[l] = 0x80
|
||||
|
||||
_ = bc_absorb(&st, auth[:], a_star[:], PREFIX_AD_FINAL, n)
|
||||
}
|
||||
|
||||
// Message authentication and tag generation
|
||||
// M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n
|
||||
// tag0 <- Auth
|
||||
// for j = 0 to l − 1 do
|
||||
// tag0 <- tag0 ^ EK(0000 || j, M_j+1)
|
||||
// end
|
||||
// if M_∗ != nil then
|
||||
// tag0 <- tag0 ^ EK(0100 || l, pad10∗(M_∗))
|
||||
// end
|
||||
// tag0 <- EK(0001 || 0^4 || N, tag0)
|
||||
m = dst[:len(ciphertext)]
|
||||
n = bc_absorb(&st, auth[:], m, PREFIX_MSG_BLOCK, 0)
|
||||
m = m[n*BLOCK_SIZE:]
|
||||
if l := len(m); l > 0 {
|
||||
m_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(m_star[:], m)
|
||||
m_star[l] = 0x80
|
||||
|
||||
_ = bc_absorb(&st, auth[:], m_star[:], PREFIX_MSG_FINAL, n)
|
||||
|
||||
mem.zero_explicit(&m_star, size_of(m_star))
|
||||
}
|
||||
bc_final(&st, auth[:], iv)
|
||||
|
||||
// Tag verification
|
||||
// if tag0 = tag then return (M_1 || ... || M_l || M_∗)
|
||||
// else return false
|
||||
ok := crypto.compare_constant_time(auth[:], tag) == 1
|
||||
|
||||
mem.zero_explicit(&auth, size_of(auth))
|
||||
mem.zero_explicit(&st.q_stk, size_of(st.q_stk))
|
||||
mem.zero_explicit(&st.q_b, size_of(st.q_b))
|
||||
|
||||
return ok
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
#+build !amd64
|
||||
package deoxysii
|
||||
|
||||
@(private = "file")
|
||||
ERR_HW_NOT_SUPPORTED :: "crypto/deoxysii: hardware implementation unsupported"
|
||||
|
||||
// is_hardware_accelerated returns true iff hardware accelerated Deoxys-II
|
||||
// is supported.
|
||||
is_hardware_accelerated :: proc "contextless" () -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
@(private)
|
||||
e_hw :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private, require_results)
|
||||
d_hw :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
@@ -0,0 +1,434 @@
|
||||
#+build amd64
|
||||
package deoxysii
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto"
|
||||
import "core:crypto/aes"
|
||||
import "core:mem"
|
||||
import "core:simd"
|
||||
import "core:simd/x86"
|
||||
|
||||
// This processes a maximum of 4 blocks at a time, as that is suitable
|
||||
// for most current hardware that doesn't say "Xeon".
|
||||
|
||||
@(private = "file")
|
||||
_BIT_ENC :: x86.__m128i{0x80, 0}
|
||||
@(private = "file")
|
||||
_PREFIX_AD_BLOCK :: x86.__m128i{PREFIX_AD_BLOCK << PREFIX_SHIFT, 0}
|
||||
@(private = "file")
|
||||
_PREFIX_AD_FINAL :: x86.__m128i{PREFIX_AD_FINAL << PREFIX_SHIFT, 0}
|
||||
@(private = "file")
|
||||
_PREFIX_MSG_BLOCK :: x86.__m128i{PREFIX_MSG_BLOCK << PREFIX_SHIFT, 0}
|
||||
@(private = "file")
|
||||
_PREFIX_MSG_FINAL :: x86.__m128i{PREFIX_MSG_FINAL << PREFIX_SHIFT, 0}
|
||||
|
||||
// is_hardware_accelerated returns true iff hardware accelerated Deoxys-II
|
||||
// is supported.
|
||||
is_hardware_accelerated :: proc "contextless" () -> bool {
|
||||
return aes.is_hardware_accelerated()
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse4.1", require_results)
|
||||
auth_tweak :: #force_inline proc "contextless" (
|
||||
prefix: x86.__m128i,
|
||||
block_nr: int,
|
||||
) -> x86.__m128i {
|
||||
return x86._mm_insert_epi64(prefix, i64(intrinsics.byte_swap(u64(block_nr))), 1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2", require_results)
|
||||
enc_tweak :: #force_inline proc "contextless" (
|
||||
tag: x86.__m128i,
|
||||
block_nr: int,
|
||||
) -> x86.__m128i {
|
||||
return x86._mm_xor_si128(
|
||||
x86._mm_or_si128(tag, _BIT_ENC),
|
||||
x86.__m128i{0, i64(intrinsics.byte_swap(u64(block_nr)))},
|
||||
)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "ssse3", require_results)
|
||||
h_ :: #force_inline proc "contextless" (tk1: x86.__m128i) -> x86.__m128i {
|
||||
return transmute(x86.__m128i)h(transmute(simd.u8x16)tk1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results)
|
||||
bc_x4 :: #force_inline proc "contextless" (
|
||||
ctx: ^Context,
|
||||
s_0, s_1, s_2, s_3: x86.__m128i,
|
||||
tweak_0, tweak_1, tweak_2, tweak_3: x86.__m128i,
|
||||
) -> (x86.__m128i, x86.__m128i, x86.__m128i, x86.__m128i) #no_bounds_check {
|
||||
s_0, s_1, s_2, s_3 := s_0, s_1, s_2, s_3
|
||||
tk1_0, tk1_1, tk1_2, tk1_3 := tweak_0, tweak_1, tweak_2, tweak_3
|
||||
|
||||
sk := intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[0]))
|
||||
stk_0 := x86._mm_xor_si128(tk1_0, sk)
|
||||
stk_1 := x86._mm_xor_si128(tk1_1, sk)
|
||||
stk_2 := x86._mm_xor_si128(tk1_2, sk)
|
||||
stk_3 := x86._mm_xor_si128(tk1_3, sk)
|
||||
|
||||
s_0 = x86._mm_xor_si128(s_0, stk_0)
|
||||
s_1 = x86._mm_xor_si128(s_1, stk_1)
|
||||
s_2 = x86._mm_xor_si128(s_2, stk_2)
|
||||
s_3 = x86._mm_xor_si128(s_3, stk_3)
|
||||
|
||||
for i in 1 ..= BC_ROUNDS {
|
||||
sk = intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[i]))
|
||||
|
||||
tk1_0 = h_(tk1_0)
|
||||
tk1_1 = h_(tk1_1)
|
||||
tk1_2 = h_(tk1_2)
|
||||
tk1_3 = h_(tk1_3)
|
||||
|
||||
stk_0 = x86._mm_xor_si128(tk1_0, sk)
|
||||
stk_1 = x86._mm_xor_si128(tk1_1, sk)
|
||||
stk_2 = x86._mm_xor_si128(tk1_2, sk)
|
||||
stk_3 = x86._mm_xor_si128(tk1_3, sk)
|
||||
|
||||
s_0 = x86._mm_aesenc_si128(s_0, stk_0)
|
||||
s_1 = x86._mm_aesenc_si128(s_1, stk_1)
|
||||
s_2 = x86._mm_aesenc_si128(s_2, stk_2)
|
||||
s_3 = x86._mm_aesenc_si128(s_3, stk_3)
|
||||
}
|
||||
|
||||
return s_0, s_1, s_2, s_3
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results)
|
||||
bc_x1 :: #force_inline proc "contextless" (
|
||||
ctx: ^Context,
|
||||
s: x86.__m128i,
|
||||
tweak: x86.__m128i,
|
||||
) -> x86.__m128i #no_bounds_check {
|
||||
s, tk1 := s, tweak
|
||||
|
||||
sk := intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[0]))
|
||||
stk := x86._mm_xor_si128(tk1, sk)
|
||||
|
||||
s = x86._mm_xor_si128(s, stk)
|
||||
|
||||
for i in 1 ..= BC_ROUNDS {
|
||||
sk = intrinsics.unaligned_load((^x86.__m128i)(&ctx._subkeys[i]))
|
||||
|
||||
tk1 = h_(tk1)
|
||||
|
||||
stk = x86._mm_xor_si128(tk1, sk)
|
||||
|
||||
s = x86._mm_aesenc_si128(s, stk)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,ssse3,sse4.1,aes", require_results)
|
||||
bc_absorb :: proc "contextless" (
|
||||
ctx: ^Context,
|
||||
tag: x86.__m128i,
|
||||
src: []byte,
|
||||
tweak_prefix: x86.__m128i,
|
||||
stk_block_nr: int,
|
||||
) -> (x86.__m128i, int) #no_bounds_check {
|
||||
src, stk_block_nr, tag := src, stk_block_nr, tag
|
||||
|
||||
nr_blocks := len(src) / BLOCK_SIZE
|
||||
for nr_blocks >= 4 {
|
||||
d_0, d_1, d_2, d_3 := bc_x4(
|
||||
ctx,
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))),
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[BLOCK_SIZE:]))),
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[2*BLOCK_SIZE:]))),
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[3*BLOCK_SIZE:]))),
|
||||
auth_tweak(tweak_prefix, stk_block_nr),
|
||||
auth_tweak(tweak_prefix, stk_block_nr + 1),
|
||||
auth_tweak(tweak_prefix, stk_block_nr + 2),
|
||||
auth_tweak(tweak_prefix, stk_block_nr + 3),
|
||||
)
|
||||
|
||||
tag = x86._mm_xor_si128(tag, d_0)
|
||||
tag = x86._mm_xor_si128(tag, d_1)
|
||||
tag = x86._mm_xor_si128(tag, d_2)
|
||||
tag = x86._mm_xor_si128(tag, d_3)
|
||||
|
||||
src = src[4*BLOCK_SIZE:]
|
||||
stk_block_nr += 4
|
||||
nr_blocks -= 4
|
||||
}
|
||||
|
||||
for nr_blocks > 0 {
|
||||
d := bc_x1(
|
||||
ctx,
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))),
|
||||
auth_tweak(tweak_prefix, stk_block_nr),
|
||||
)
|
||||
|
||||
tag = x86._mm_xor_si128(tag, d)
|
||||
|
||||
src = src[BLOCK_SIZE:]
|
||||
stk_block_nr += 1
|
||||
nr_blocks -= 1
|
||||
}
|
||||
|
||||
return tag, stk_block_nr
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results)
|
||||
bc_final :: proc "contextless" (
|
||||
ctx: ^Context,
|
||||
tag: x86.__m128i,
|
||||
iv: []byte,
|
||||
) -> x86.__m128i {
|
||||
tmp: [BLOCK_SIZE]byte
|
||||
|
||||
tmp[0] = PREFIX_TAG << PREFIX_SHIFT
|
||||
copy(tmp[1:], iv)
|
||||
|
||||
tweak := intrinsics.unaligned_load((^x86.__m128i)(&tmp))
|
||||
|
||||
return bc_x1(ctx, tag, tweak)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,ssse3,aes", require_results)
|
||||
bc_encrypt :: proc "contextless" (
|
||||
ctx: ^Context,
|
||||
dst: []byte,
|
||||
src: []byte,
|
||||
iv: x86.__m128i,
|
||||
tweak_tag: x86.__m128i,
|
||||
stk_block_nr: int,
|
||||
) -> int {
|
||||
dst, src, stk_block_nr := dst, src, stk_block_nr
|
||||
|
||||
nr_blocks := len(src) / BLOCK_SIZE
|
||||
for nr_blocks >= 4 {
|
||||
d_0, d_1, d_2, d_3 := bc_x4(
|
||||
ctx,
|
||||
iv, iv, iv, iv,
|
||||
enc_tweak(tweak_tag, stk_block_nr),
|
||||
enc_tweak(tweak_tag, stk_block_nr + 1),
|
||||
enc_tweak(tweak_tag, stk_block_nr + 2),
|
||||
enc_tweak(tweak_tag, stk_block_nr + 3),
|
||||
)
|
||||
|
||||
intrinsics.unaligned_store(
|
||||
(^x86.__m128i)(raw_data(dst)),
|
||||
x86._mm_xor_si128(
|
||||
d_0,
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))),
|
||||
),
|
||||
)
|
||||
intrinsics.unaligned_store(
|
||||
(^x86.__m128i)(raw_data(dst[BLOCK_SIZE:])),
|
||||
x86._mm_xor_si128(
|
||||
d_1,
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[BLOCK_SIZE:]))),
|
||||
),
|
||||
)
|
||||
intrinsics.unaligned_store(
|
||||
(^x86.__m128i)(raw_data(dst[2*BLOCK_SIZE:])),
|
||||
x86._mm_xor_si128(
|
||||
d_2,
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[2*BLOCK_SIZE:]))),
|
||||
),
|
||||
)
|
||||
intrinsics.unaligned_store(
|
||||
(^x86.__m128i)(raw_data(dst[3*BLOCK_SIZE:])),
|
||||
x86._mm_xor_si128(
|
||||
d_3,
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[3*BLOCK_SIZE:]))),
|
||||
),
|
||||
)
|
||||
|
||||
src, dst = src[4*BLOCK_SIZE:], dst[4*BLOCK_SIZE:]
|
||||
stk_block_nr += 4
|
||||
nr_blocks -= 4
|
||||
}
|
||||
|
||||
for nr_blocks > 0 {
|
||||
d := bc_x1(
|
||||
ctx,
|
||||
iv,
|
||||
enc_tweak(tweak_tag, stk_block_nr),
|
||||
)
|
||||
|
||||
intrinsics.unaligned_store(
|
||||
(^x86.__m128i)(raw_data(dst)),
|
||||
x86._mm_xor_si128(
|
||||
d,
|
||||
intrinsics.unaligned_load((^x86.__m128i)(raw_data(src))),
|
||||
),
|
||||
)
|
||||
|
||||
src, dst = src[BLOCK_SIZE:], dst[BLOCK_SIZE:]
|
||||
stk_block_nr += 1
|
||||
nr_blocks -= 1
|
||||
}
|
||||
|
||||
return stk_block_nr
|
||||
}
|
||||
|
||||
@(private)
|
||||
e_hw :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check {
|
||||
tmp: [BLOCK_SIZE]byte
|
||||
copy(tmp[1:], iv)
|
||||
iv_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(&tmp)))
|
||||
|
||||
// Algorithm 3
|
||||
//
|
||||
// Associated data
|
||||
// A_1 || ... || A_la || A_∗ <- A where each |A_i| = n and |A_∗| < n
|
||||
// Auth <- 0^n
|
||||
// for i = 0 to la − 1 do
|
||||
// Auth <- Auth ^ EK(0010 || i, A_i+1)
|
||||
// end
|
||||
// if A_∗ != nil then
|
||||
// Auth <- Auth ^ EK(0110 || la, pad10∗(A_∗))
|
||||
// end
|
||||
auth: x86.__m128i
|
||||
n: int
|
||||
|
||||
aad := aad
|
||||
auth, n = bc_absorb(ctx, auth, aad, _PREFIX_AD_BLOCK, 0)
|
||||
aad = aad[n*BLOCK_SIZE:]
|
||||
if l := len(aad); l > 0 {
|
||||
a_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(a_star[:], aad)
|
||||
a_star[l] = 0x80
|
||||
|
||||
auth, _ = bc_absorb(ctx, auth, a_star[:], _PREFIX_AD_FINAL, n)
|
||||
}
|
||||
|
||||
// Message authentication and tag generation
|
||||
// M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n
|
||||
// tag <- Auth
|
||||
// for j = 0 to l − 1 do
|
||||
// tag <- tag ^ EK(0000 || j, M_j+1)
|
||||
// end
|
||||
// if M_∗ != nil then
|
||||
// tag <- tag ^ EK(0100 || l, pad10∗(M_∗))
|
||||
// end
|
||||
// tag <- EK(0001 || 0^4 ||N, tag)
|
||||
m := plaintext
|
||||
auth, n = bc_absorb(ctx, auth, m, _PREFIX_MSG_BLOCK, 0)
|
||||
m = m[n*BLOCK_SIZE:]
|
||||
if l := len(m); l > 0 {
|
||||
m_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(m_star[:], m)
|
||||
m_star[l] = 0x80
|
||||
|
||||
auth, _ = bc_absorb(ctx, auth, m_star[:], _PREFIX_MSG_FINAL, n)
|
||||
}
|
||||
auth = bc_final(ctx, auth, iv)
|
||||
|
||||
// Message encryption
|
||||
// for j = 0 to l − 1 do
|
||||
// C_j <- M_j ^ EK(1 || tag ^ j, 0^8 || N)
|
||||
// end
|
||||
// if M_∗ != nil then
|
||||
// C_∗ <- M_* ^ EK(1 || tag ^ l, 0^8 || N)
|
||||
// end
|
||||
//
|
||||
// return (C_1 || ... || C_l || C_∗, tag)
|
||||
m = plaintext
|
||||
n = bc_encrypt(ctx, dst, m, iv_, auth, 0)
|
||||
m = m[n*BLOCK_SIZE:]
|
||||
if l := len(m); l > 0 {
|
||||
m_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(m_star[:], m)
|
||||
_ = bc_encrypt(ctx, m_star[:], m_star[:], iv_, auth, n)
|
||||
|
||||
copy(dst[n*BLOCK_SIZE:], m_star[:])
|
||||
}
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(tag)), auth)
|
||||
}
|
||||
|
||||
@(private, require_results)
|
||||
d_hw :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
tmp: [BLOCK_SIZE]byte
|
||||
copy(tmp[1:], iv)
|
||||
iv_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(&tmp)))
|
||||
|
||||
// Algorithm 4
|
||||
//
|
||||
// Message decryption
|
||||
// C_1 || ... || C_l || C_∗ <- C where each |C_j| = n and |C_∗| < n
|
||||
// for j = 0 to l − 1 do
|
||||
// M_j <- C_j ^ EK(1 || tag ^ j, 0^8 || N)
|
||||
// end
|
||||
// if C_∗ != nil then
|
||||
// M_∗ <- C_∗ ^ EK(1 || tag ^ l, 0^8 || N)
|
||||
// end
|
||||
auth := intrinsics.unaligned_load((^x86.__m128i)(raw_data(tag)))
|
||||
|
||||
m := ciphertext
|
||||
n := bc_encrypt(ctx, dst, m, iv_, auth, 0)
|
||||
m = m[n*BLOCK_SIZE:]
|
||||
if l := len(m); l > 0 {
|
||||
m_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(m_star[:], m)
|
||||
_ = bc_encrypt(ctx, m_star[:], m_star[:], iv_, auth, n)
|
||||
|
||||
copy(dst[n*BLOCK_SIZE:], m_star[:])
|
||||
|
||||
mem.zero_explicit(&m_star, size_of(m_star))
|
||||
}
|
||||
|
||||
// Associated data
|
||||
// A_1 || ... || Al_a || A_∗ <- A where each |Ai_| = n and |A_∗| < n
|
||||
// Auth <- 0
|
||||
// for i = 0 to la − 1 do
|
||||
// Auth <- Auth ^ EK(0010 || i, A_i+1)
|
||||
// end
|
||||
// if A∗ != nil then
|
||||
// Auth <- Auth ^ EK(0110| | l_a, pad10∗(A_∗))
|
||||
// end
|
||||
auth = x86.__m128i{0, 0}
|
||||
aad := aad
|
||||
auth, n = bc_absorb(ctx, auth, aad, _PREFIX_AD_BLOCK, 0)
|
||||
aad = aad[BLOCK_SIZE*n:]
|
||||
if l := len(aad); l > 0 {
|
||||
a_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(a_star[:], aad)
|
||||
a_star[l] = 0x80
|
||||
|
||||
auth, _ = bc_absorb(ctx, auth, a_star[:], _PREFIX_AD_FINAL, n)
|
||||
}
|
||||
|
||||
// Message authentication and tag generation
|
||||
// M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n
|
||||
// tag0 <- Auth
|
||||
// for j = 0 to l − 1 do
|
||||
// tag0 <- tag0 ^ EK(0000 || j, M_j+1)
|
||||
// end
|
||||
// if M_∗ != nil then
|
||||
// tag0 <- tag0 ^ EK(0100 || l, pad10∗(M_∗))
|
||||
// end
|
||||
// tag0 <- EK(0001 || 0^4 || N, tag0)
|
||||
m = dst[:len(ciphertext)]
|
||||
auth, n = bc_absorb(ctx, auth, m, _PREFIX_MSG_BLOCK, 0)
|
||||
m = m[n*BLOCK_SIZE:]
|
||||
if l := len(m); l > 0 {
|
||||
m_star: [BLOCK_SIZE]byte
|
||||
|
||||
copy(m_star[:], m)
|
||||
m_star[l] = 0x80
|
||||
|
||||
auth, _ = bc_absorb(ctx, auth, m_star[:], _PREFIX_MSG_FINAL, n)
|
||||
}
|
||||
auth = bc_final(ctx, auth, iv)
|
||||
|
||||
// Tag verification
|
||||
// if tag0 = tag then return (M_1 || ... || M_l || M_∗)
|
||||
// else return false
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(&tmp)), auth)
|
||||
ok := crypto.compare_constant_time(tmp[:], tag) == 1
|
||||
|
||||
mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
return ok
|
||||
}
|
||||
@@ -81,12 +81,8 @@ private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool {
|
||||
|
||||
// private_key_bytes sets dst to byte-encoding of priv_key.
|
||||
private_key_bytes :: proc(priv_key: ^Private_Key, dst: []byte) {
|
||||
if !priv_key._is_initialized {
|
||||
panic("crypto/ed25519: uninitialized private key")
|
||||
}
|
||||
if len(dst) != PRIVATE_KEY_SIZE {
|
||||
panic("crypto/ed25519: invalid destination size")
|
||||
}
|
||||
ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized private key")
|
||||
ensure(len(dst) == PRIVATE_KEY_SIZE, "crypto/ed25519: invalid destination size")
|
||||
|
||||
copy(dst, priv_key._b[:])
|
||||
}
|
||||
@@ -98,12 +94,8 @@ private_key_clear :: proc "contextless" (priv_key: ^Private_Key) {
|
||||
|
||||
// sign writes the signature by priv_key over msg to sig.
|
||||
sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) {
|
||||
if !priv_key._is_initialized {
|
||||
panic("crypto/ed25519: uninitialized private key")
|
||||
}
|
||||
if len(sig) != SIGNATURE_SIZE {
|
||||
panic("crypto/ed25519: invalid destination size")
|
||||
}
|
||||
ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized private key")
|
||||
ensure(len(sig) == SIGNATURE_SIZE, "crypto/ed25519: invalid destination size")
|
||||
|
||||
// 1. Compute the hash of the private key d, H(d) = (h_0, h_1, ..., h_2b-1)
|
||||
// using SHA-512 for Ed25519. H(d) may be precomputed.
|
||||
@@ -178,9 +170,7 @@ public_key_set_bytes :: proc "contextless" (pub_key: ^Public_Key, b: []byte) ->
|
||||
|
||||
// public_key_set_priv sets pub_key to the public component of priv_key.
|
||||
public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) {
|
||||
if !priv_key._is_initialized {
|
||||
panic("crypto/ed25519: uninitialized public key")
|
||||
}
|
||||
ensure(priv_key._is_initialized, "crypto/ed25519: uninitialized public key")
|
||||
|
||||
src := &priv_key._pub_key
|
||||
copy(pub_key._b[:], src._b[:])
|
||||
@@ -191,21 +181,15 @@ public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) {
|
||||
|
||||
// public_key_bytes sets dst to byte-encoding of pub_key.
|
||||
public_key_bytes :: proc(pub_key: ^Public_Key, dst: []byte) {
|
||||
if !pub_key._is_initialized {
|
||||
panic("crypto/ed25519: uninitialized public key")
|
||||
}
|
||||
if len(dst) != PUBLIC_KEY_SIZE {
|
||||
panic("crypto/ed25519: invalid destination size")
|
||||
}
|
||||
ensure(pub_key._is_initialized, "crypto/ed25519: uninitialized public key")
|
||||
ensure(len(dst) == PUBLIC_KEY_SIZE, "crypto/ed25519: invalid destination size")
|
||||
|
||||
copy(dst, pub_key._b[:])
|
||||
}
|
||||
|
||||
// public_key_equal returns true iff pub_key is equal to other.
|
||||
public_key_equal :: proc(pub_key, other: ^Public_Key) -> bool {
|
||||
if !pub_key._is_initialized || !other._is_initialized {
|
||||
panic("crypto/ed25519: uninitialized public key")
|
||||
}
|
||||
ensure(pub_key._is_initialized && other._is_initialized, "crypto/ed25519: uninitialized public key")
|
||||
|
||||
return crypto.compare_constant_time(pub_key._b[:], other._b[:]) == 1
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ init :: proc(ctx: ^Context, algorithm: hash.Algorithm, key: []byte) {
|
||||
|
||||
// update adds more data to the Context.
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
hash.update(&ctx._i_hash, data)
|
||||
}
|
||||
@@ -64,13 +64,10 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
// final finalizes the Context, writes the tag to dst, and calls
|
||||
// reset on the Context.
|
||||
final :: proc(ctx: ^Context, dst: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
defer (reset(ctx))
|
||||
|
||||
if len(dst) != ctx._tag_sz {
|
||||
panic("crypto/hmac: invalid destination tag size")
|
||||
}
|
||||
ensure(ctx._is_initialized)
|
||||
ensure(len(dst) == ctx._tag_sz, "crypto/hmac: invalid destination tag size")
|
||||
|
||||
hash.final(&ctx._i_hash, dst) // H((k ^ ipad) || text)
|
||||
|
||||
@@ -105,14 +102,14 @@ reset :: proc(ctx: ^Context) {
|
||||
|
||||
// algorithm returns the Algorithm used by a Context instance.
|
||||
algorithm :: proc(ctx: ^Context) -> hash.Algorithm {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
return hash.algorithm(&ctx._i_hash)
|
||||
}
|
||||
|
||||
// tag_size returns the tag size of a Context instance in bytes.
|
||||
tag_size :: proc(ctx: ^Context) -> int {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
return ctx._tag_sz
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ sum :: proc(sec_strength: int, dst, msg, key, domain_sep: []byte) {
|
||||
// tag is valid.
|
||||
verify :: proc(sec_strength: int, tag, msg, key, domain_sep: []byte, allocator := context.temp_allocator) -> bool {
|
||||
derived_tag := make([]byte, len(tag), allocator)
|
||||
defer(delete(derived_tag))
|
||||
|
||||
sum(sec_strength, derived_tag, msg, key, domain_sep)
|
||||
|
||||
@@ -59,8 +60,6 @@ init_256 :: proc(ctx: ^Context, key, domain_sep: []byte) {
|
||||
|
||||
// update adds more data to the Context.
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
|
||||
shake.write((^shake.Context)(ctx), data)
|
||||
}
|
||||
|
||||
@@ -68,12 +67,9 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
// on the Context. This routine will panic if the dst length is less than
|
||||
// MIN_TAG_SIZE.
|
||||
final :: proc(ctx: ^Context, dst: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
defer reset(ctx)
|
||||
|
||||
if len(dst) < MIN_TAG_SIZE {
|
||||
panic("crypto/kmac: invalid KMAC tag_size, too short")
|
||||
}
|
||||
ensure(len(dst) >= MIN_TAG_SIZE, "crypto/kmac: invalid KMAC tag_size, too short")
|
||||
|
||||
_sha3.final_cshake((^_sha3.Context)(ctx), dst)
|
||||
}
|
||||
@@ -103,14 +99,12 @@ _init_kmac :: proc(ctx: ^Context, key, s: []byte, sec_strength: int) {
|
||||
reset(ctx)
|
||||
}
|
||||
|
||||
if len(key) < sec_strength / 8 {
|
||||
panic("crypto/kmac: invalid KMAC key, too short")
|
||||
}
|
||||
ensure(len(key) >= sec_strength / 8, "crypto/kmac: invalid KMAC key, too short")
|
||||
|
||||
ctx_ := (^_sha3.Context)(ctx)
|
||||
_sha3.init_cshake(ctx_, N_KMAC, s, sec_strength)
|
||||
_sha3.bytepad(ctx_, [][]byte{key}, _sha3.rate_cshake(sec_strength))
|
||||
}
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
N_KMAC := []byte{'K', 'M', 'A', 'C'}
|
||||
|
||||
@@ -40,37 +40,37 @@ BLOCK_SIZE_512 :: _sha3.RATE_512
|
||||
Context :: distinct _sha3.Context
|
||||
|
||||
// init_224 initializes a Context for Keccak-224.
|
||||
init_224 :: proc(ctx: ^Context) {
|
||||
init_224 :: proc "contextless" (ctx: ^Context) {
|
||||
ctx.mdlen = DIGEST_SIZE_224
|
||||
_init(ctx)
|
||||
}
|
||||
|
||||
// init_256 initializes a Context for Keccak-256.
|
||||
init_256 :: proc(ctx: ^Context) {
|
||||
init_256 :: proc "contextless" (ctx: ^Context) {
|
||||
ctx.mdlen = DIGEST_SIZE_256
|
||||
_init(ctx)
|
||||
}
|
||||
|
||||
// init_384 initializes a Context for Keccak-384.
|
||||
init_384 :: proc(ctx: ^Context) {
|
||||
init_384 :: proc "contextless" (ctx: ^Context) {
|
||||
ctx.mdlen = DIGEST_SIZE_384
|
||||
_init(ctx)
|
||||
}
|
||||
|
||||
// init_512 initializes a Context for Keccak-512.
|
||||
init_512 :: proc(ctx: ^Context) {
|
||||
init_512 :: proc "contextless" (ctx: ^Context) {
|
||||
ctx.mdlen = DIGEST_SIZE_512
|
||||
_init(ctx)
|
||||
}
|
||||
|
||||
@(private)
|
||||
_init :: proc(ctx: ^Context) {
|
||||
_init :: proc "contextless" (ctx: ^Context) {
|
||||
ctx.dsbyte = _sha3.DS_KECCAK
|
||||
_sha3.init((^_sha3.Context)(ctx))
|
||||
}
|
||||
|
||||
// update adds more data to the Context.
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
update :: proc "contextless" (ctx: ^Context, data: []byte) {
|
||||
_sha3.update((^_sha3.Context)(ctx), data)
|
||||
}
|
||||
|
||||
@@ -79,17 +79,17 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
//
|
||||
// Iff finalize_clone is set, final will work on a copy of the Context,
|
||||
// which is useful for for calculating rolling digests.
|
||||
final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
final :: proc "contextless" (ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
_sha3.final((^_sha3.Context)(ctx), hash, finalize_clone)
|
||||
}
|
||||
|
||||
// clone clones the Context other into ctx.
|
||||
clone :: proc(ctx, other: ^Context) {
|
||||
clone :: proc "contextless" (ctx, other: ^Context) {
|
||||
_sha3.clone((^_sha3.Context)(ctx), (^_sha3.Context)(other))
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be re-initialized to
|
||||
// be used again.
|
||||
reset :: proc(ctx: ^Context) {
|
||||
reset :: proc "contextless" (ctx: ^Context) {
|
||||
_sha3.reset((^_sha3.Context)(ctx))
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ init :: proc(ctx: ^Context) {
|
||||
|
||||
// update adds more data to the Context.
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
ensure(ctx.is_initialized)
|
||||
|
||||
for i := 0; i < len(data); i += 1 {
|
||||
ctx.data[ctx.datalen] = data[i]
|
||||
@@ -72,11 +72,8 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
// Iff finalize_clone is set, final will work on a copy of the Context,
|
||||
// which is useful for for calculating rolling digests.
|
||||
final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
assert(ctx.is_initialized)
|
||||
|
||||
if len(hash) < DIGEST_SIZE {
|
||||
panic("crypto/md5: invalid destination digest size")
|
||||
}
|
||||
ensure(ctx.is_initialized)
|
||||
ensure(len(hash) >= DIGEST_SIZE, "crypto/md5: invalid destination digest size")
|
||||
|
||||
ctx := ctx
|
||||
if finalize_clone {
|
||||
|
||||
@@ -60,7 +60,7 @@ init :: proc(ctx: ^Context) {
|
||||
|
||||
// update adds more data to the Context.
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
ensure(ctx.is_initialized)
|
||||
|
||||
for i := 0; i < len(data); i += 1 {
|
||||
ctx.data[ctx.datalen] = data[i]
|
||||
@@ -79,11 +79,8 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
// Iff finalize_clone is set, final will work on a copy of the Context,
|
||||
// which is useful for for calculating rolling digests.
|
||||
final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
assert(ctx.is_initialized)
|
||||
|
||||
if len(hash) < DIGEST_SIZE {
|
||||
panic("crypto/sha1: invalid destination digest size")
|
||||
}
|
||||
ensure(ctx.is_initialized)
|
||||
ensure(len(hash) >= DIGEST_SIZE, "crypto/sha1: invalid destination digest size")
|
||||
|
||||
ctx := ctx
|
||||
if finalize_clone {
|
||||
|
||||
@@ -60,9 +60,7 @@ Context :: struct {
|
||||
// init initializes a Context with the specified key. The key SHOULD be
|
||||
// unique and MUST be unpredictable for each invocation.
|
||||
init :: proc(ctx: ^Context, key: []byte) {
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/poly1305: invalid key size")
|
||||
}
|
||||
ensure(len(key) == KEY_SIZE, "crypto/poly1305: invalid key size")
|
||||
|
||||
// r = le_bytes_to_num(key[0..15])
|
||||
// r = clamp(r) (r &= 0xffffffc0ffffffc0ffffffc0fffffff)
|
||||
@@ -85,7 +83,7 @@ init :: proc(ctx: ^Context, key: []byte) {
|
||||
|
||||
// update adds more data to the Context.
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
msg := data
|
||||
msg_len := len(data)
|
||||
@@ -124,12 +122,10 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
// final finalizes the Context, writes the tag to dst, and calls
|
||||
// reset on the Context.
|
||||
final :: proc(ctx: ^Context, dst: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
defer reset(ctx)
|
||||
|
||||
if len(dst) != TAG_SIZE {
|
||||
panic("poly1305: invalid destination tag size")
|
||||
}
|
||||
ensure(ctx._is_initialized)
|
||||
ensure(len(dst) == TAG_SIZE, "poly1305: invalid destination tag size")
|
||||
|
||||
// Process remaining block
|
||||
if ctx._leftover > 0 {
|
||||
|
||||
@@ -16,7 +16,7 @@ ELEMENT_SIZE :: 32
|
||||
// group element.
|
||||
WIDE_ELEMENT_SIZE :: 64
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
FE_NEG_ONE := field.Tight_Field_Element {
|
||||
2251799813685228,
|
||||
2251799813685247,
|
||||
@@ -24,7 +24,7 @@ FE_NEG_ONE := field.Tight_Field_Element {
|
||||
2251799813685247,
|
||||
2251799813685247,
|
||||
}
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element {
|
||||
278908739862762,
|
||||
821645201101625,
|
||||
@@ -32,7 +32,7 @@ FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element {
|
||||
1777959178193151,
|
||||
2118520810568447,
|
||||
}
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
FE_ONE_MINUS_D_SQ := field.Tight_Field_Element {
|
||||
1136626929484150,
|
||||
1998550399581263,
|
||||
@@ -40,7 +40,7 @@ FE_ONE_MINUS_D_SQ := field.Tight_Field_Element {
|
||||
118527312129759,
|
||||
45110755273534,
|
||||
}
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element {
|
||||
1507062230895904,
|
||||
1572317787530805,
|
||||
@@ -48,7 +48,7 @@ FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element {
|
||||
317374165784489,
|
||||
1572899562415810,
|
||||
}
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
FE_SQRT_AD_MINUS_ONE := field.Tight_Field_Element {
|
||||
2241493124984347,
|
||||
425987919032274,
|
||||
@@ -76,7 +76,7 @@ ge_clear :: proc "contextless" (ge: ^Group_Element) {
|
||||
|
||||
// ge_set sets `ge = a`.
|
||||
ge_set :: proc(ge, a: ^Group_Element) {
|
||||
_ge_assert_initialized([]^Group_Element{a})
|
||||
_ge_ensure_initialized([]^Group_Element{a})
|
||||
|
||||
grp.ge_set(&ge._p, &a._p)
|
||||
ge._is_initialized = true
|
||||
@@ -199,9 +199,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
|
||||
// ge_set_wide_bytes sets ge to the result of deriving a ristretto255
|
||||
// group element, from a wide (512-bit) byte string.
|
||||
ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) {
|
||||
if len(b) != WIDE_ELEMENT_SIZE {
|
||||
panic("crypto/ristretto255: invalid wide input size")
|
||||
}
|
||||
ensure(len(b) == WIDE_ELEMENT_SIZE, "crypto/ristretto255: invalid wide input size")
|
||||
|
||||
// The element derivation function on an input string b proceeds as
|
||||
// follows:
|
||||
@@ -222,10 +220,8 @@ ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) {
|
||||
|
||||
// ge_bytes sets dst to the canonical encoding of ge.
|
||||
ge_bytes :: proc(ge: ^Group_Element, dst: []byte) {
|
||||
_ge_assert_initialized([]^Group_Element{ge})
|
||||
if len(dst) != ELEMENT_SIZE {
|
||||
panic("crypto/ristretto255: invalid destination size")
|
||||
}
|
||||
_ge_ensure_initialized([]^Group_Element{ge})
|
||||
ensure(len(dst) == ELEMENT_SIZE, "crypto/ristretto255: invalid destination size")
|
||||
|
||||
x0, y0, z0, t0 := &ge._p.x, &ge._p.y, &ge._p.z, &ge._p.t
|
||||
|
||||
@@ -306,7 +302,7 @@ ge_bytes :: proc(ge: ^Group_Element, dst: []byte) {
|
||||
|
||||
// ge_add sets `ge = a + b`.
|
||||
ge_add :: proc(ge, a, b: ^Group_Element) {
|
||||
_ge_assert_initialized([]^Group_Element{a, b})
|
||||
_ge_ensure_initialized([]^Group_Element{a, b})
|
||||
|
||||
grp.ge_add(&ge._p, &a._p, &b._p)
|
||||
ge._is_initialized = true
|
||||
@@ -314,7 +310,7 @@ ge_add :: proc(ge, a, b: ^Group_Element) {
|
||||
|
||||
// ge_double sets `ge = a + a`.
|
||||
ge_double :: proc(ge, a: ^Group_Element) {
|
||||
_ge_assert_initialized([]^Group_Element{a})
|
||||
_ge_ensure_initialized([]^Group_Element{a})
|
||||
|
||||
grp.ge_double(&ge._p, &a._p)
|
||||
ge._is_initialized = true
|
||||
@@ -322,7 +318,7 @@ ge_double :: proc(ge, a: ^Group_Element) {
|
||||
|
||||
// ge_negate sets `ge = -a`.
|
||||
ge_negate :: proc(ge, a: ^Group_Element) {
|
||||
_ge_assert_initialized([]^Group_Element{a})
|
||||
_ge_ensure_initialized([]^Group_Element{a})
|
||||
|
||||
grp.ge_negate(&ge._p, &a._p)
|
||||
ge._is_initialized = true
|
||||
@@ -330,7 +326,7 @@ ge_negate :: proc(ge, a: ^Group_Element) {
|
||||
|
||||
// ge_scalarmult sets `ge = A * sc`.
|
||||
ge_scalarmult :: proc(ge, A: ^Group_Element, sc: ^Scalar) {
|
||||
_ge_assert_initialized([]^Group_Element{A})
|
||||
_ge_ensure_initialized([]^Group_Element{A})
|
||||
|
||||
grp.ge_scalarmult(&ge._p, &A._p, sc)
|
||||
ge._is_initialized = true
|
||||
@@ -344,7 +340,7 @@ ge_scalarmult_generator :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar)
|
||||
|
||||
// ge_scalarmult_vartime sets `ge = A * sc` in variable time.
|
||||
ge_scalarmult_vartime :: proc(ge, A: ^Group_Element, sc: ^Scalar) {
|
||||
_ge_assert_initialized([]^Group_Element{A})
|
||||
_ge_ensure_initialized([]^Group_Element{A})
|
||||
|
||||
grp.ge_scalarmult_vartime(&ge._p, &A._p, sc)
|
||||
ge._is_initialized = true
|
||||
@@ -358,7 +354,7 @@ ge_double_scalarmult_generator_vartime :: proc(
|
||||
A: ^Group_Element,
|
||||
b: ^Scalar,
|
||||
) {
|
||||
_ge_assert_initialized([]^Group_Element{A})
|
||||
_ge_ensure_initialized([]^Group_Element{A})
|
||||
|
||||
grp.ge_double_scalarmult_basepoint_vartime(&ge._p, a, &A._p, b)
|
||||
ge._is_initialized = true
|
||||
@@ -367,7 +363,7 @@ ge_double_scalarmult_generator_vartime :: proc(
|
||||
// ge_cond_negate sets `ge = a` iff `ctrl == 0` and `ge = -a` iff `ctrl == 1`.
|
||||
// Behavior for all other values of ctrl are undefined,
|
||||
ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) {
|
||||
_ge_assert_initialized([]^Group_Element{a})
|
||||
_ge_ensure_initialized([]^Group_Element{a})
|
||||
|
||||
grp.ge_cond_negate(&ge._p, &a._p, ctrl)
|
||||
ge._is_initialized = true
|
||||
@@ -376,7 +372,7 @@ ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) {
|
||||
// ge_cond_assign sets `ge = ge` iff `ctrl == 0` and `ge = a` iff `ctrl == 1`.
|
||||
// Behavior for all other values of ctrl are undefined,
|
||||
ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) {
|
||||
_ge_assert_initialized([]^Group_Element{ge, a})
|
||||
_ge_ensure_initialized([]^Group_Element{ge, a})
|
||||
|
||||
grp.ge_cond_assign(&ge._p, &a._p, ctrl)
|
||||
}
|
||||
@@ -384,7 +380,7 @@ ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) {
|
||||
// ge_cond_select sets `ge = a` iff `ctrl == 0` and `ge = b` iff `ctrl == 1`.
|
||||
// Behavior for all other values of ctrl are undefined,
|
||||
ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) {
|
||||
_ge_assert_initialized([]^Group_Element{a, b})
|
||||
_ge_ensure_initialized([]^Group_Element{a, b})
|
||||
|
||||
grp.ge_cond_select(&ge._p, &a._p, &b._p, ctrl)
|
||||
ge._is_initialized = true
|
||||
@@ -393,7 +389,7 @@ ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) {
|
||||
// ge_equal returns 1 iff `a == b`, and 0 otherwise.
|
||||
@(require_results)
|
||||
ge_equal :: proc(a, b: ^Group_Element) -> int {
|
||||
_ge_assert_initialized([]^Group_Element{a, b})
|
||||
_ge_ensure_initialized([]^Group_Element{a, b})
|
||||
|
||||
// CT_EQ(x1 * y2, y1 * x2) | CT_EQ(y1 * y2, x1 * x2)
|
||||
ax_by, ay_bx, ay_by, ax_bx: field.Tight_Field_Element = ---, ---, ---, ---
|
||||
@@ -501,10 +497,8 @@ ge_map :: proc "contextless" (ge: ^Group_Element, b: []byte) {
|
||||
}
|
||||
|
||||
@(private)
|
||||
_ge_assert_initialized :: proc(ges: []^Group_Element) {
|
||||
_ge_ensure_initialized :: proc(ges: []^Group_Element) {
|
||||
for ge in ges {
|
||||
if !ge._is_initialized {
|
||||
panic("crypto/ristretto255: uninitialized group element")
|
||||
}
|
||||
ensure(ge._is_initialized, "crypto/ristretto255: uninitialized group element")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,9 +42,7 @@ sc_set_bytes :: proc(sc: ^Scalar, b: []byte) -> bool {
|
||||
// scalar, from a wide (512-bit) byte string by interpreting b as a
|
||||
// little-endian value, and reducing it mod the group order.
|
||||
sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) {
|
||||
if len(b) != WIDE_SCALAR_SIZE {
|
||||
panic("crypto/ristretto255: invalid wide input size")
|
||||
}
|
||||
ensure(len(b) == WIDE_SCALAR_SIZE, "crypto/ristretto255: invalid wide input size")
|
||||
|
||||
b_ := (^[WIDE_SCALAR_SIZE]byte)(raw_data(b))
|
||||
grp.sc_set_bytes_wide(sc, b_)
|
||||
@@ -52,9 +50,7 @@ sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) {
|
||||
|
||||
// sc_bytes sets dst to the canonical encoding of sc.
|
||||
sc_bytes :: proc(sc: ^Scalar, dst: []byte) {
|
||||
if len(dst) != SCALAR_SIZE {
|
||||
panic("crypto/ristretto255: invalid destination size")
|
||||
}
|
||||
ensure(len(dst) == SCALAR_SIZE, "crypto/ristretto255: invalid destination size")
|
||||
|
||||
grp.sc_bytes(dst, sc)
|
||||
}
|
||||
|
||||
+34
-25
@@ -15,9 +15,9 @@ package sha2
|
||||
zhibog, dotbmp: Initial implementation.
|
||||
*/
|
||||
|
||||
import "core:encoding/endian"
|
||||
@(require) import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
import "core:mem"
|
||||
@(require) import "core:mem"
|
||||
|
||||
// DIGEST_SIZE_224 is the SHA-224 digest size in bytes.
|
||||
DIGEST_SIZE_224 :: 28
|
||||
@@ -158,7 +158,7 @@ _init :: proc(ctx: ^$T) {
|
||||
|
||||
// update adds more data to the Context.
|
||||
update :: proc(ctx: ^$T, data: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
ensure(ctx.is_initialized)
|
||||
|
||||
when T == Context_256 {
|
||||
CURR_BLOCK_SIZE :: BLOCK_SIZE_256
|
||||
@@ -194,11 +194,8 @@ update :: proc(ctx: ^$T, data: []byte) {
|
||||
// Iff finalize_clone is set, final will work on a copy of the Context,
|
||||
// which is useful for for calculating rolling digests.
|
||||
final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
|
||||
assert(ctx.is_initialized)
|
||||
|
||||
if len(hash) * 8 < ctx.md_bits {
|
||||
panic("crypto/sha2: invalid destination digest size")
|
||||
}
|
||||
ensure(ctx.is_initialized)
|
||||
ensure(len(hash) * 8 >= ctx.md_bits, "crypto/sha2: invalid destination digest size")
|
||||
|
||||
ctx := ctx
|
||||
if finalize_clone {
|
||||
@@ -238,7 +235,7 @@ final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
|
||||
endian.unchecked_put_u64be(pad[8:], length_lo)
|
||||
update(ctx, pad[0:16])
|
||||
}
|
||||
assert(ctx.bitlength == 0)
|
||||
assert(ctx.bitlength == 0) // Check for bugs
|
||||
|
||||
when T == Context_256 {
|
||||
for i := 0; i < ctx.md_bits / 32; i += 1 {
|
||||
@@ -270,8 +267,8 @@ reset :: proc(ctx: ^$T) {
|
||||
SHA2 implementation
|
||||
*/
|
||||
|
||||
@(private)
|
||||
sha256_k := [64]u32 {
|
||||
@(private, rodata)
|
||||
SHA256_K := [64]u32 {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
@@ -290,8 +287,8 @@ sha256_k := [64]u32 {
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
|
||||
}
|
||||
|
||||
@(private)
|
||||
sha512_k := [80]u64 {
|
||||
@(private, rodata)
|
||||
SHA512_K := [80]u64 {
|
||||
0x428a2f98d728ae22, 0x7137449123ef65cd,
|
||||
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
|
||||
0x3956c25bf348b538, 0x59f111f1b605d019,
|
||||
@@ -334,6 +331,11 @@ sha512_k := [80]u64 {
|
||||
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
|
||||
}
|
||||
|
||||
@(private)
|
||||
SHA256_ROUNDS :: 64
|
||||
@(private)
|
||||
SHA512_ROUNDS :: 80
|
||||
|
||||
@(private)
|
||||
SHA256_CH :: #force_inline proc "contextless" (x, y, z: u32) -> u32 {
|
||||
return (x & y) ~ (~x & z)
|
||||
@@ -395,22 +397,29 @@ SHA512_F4 :: #force_inline proc "contextless" (x: u64) -> u64 {
|
||||
}
|
||||
|
||||
@(private)
|
||||
sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) {
|
||||
sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) #no_bounds_check {
|
||||
when T == Context_256 {
|
||||
w: [64]u32
|
||||
if is_hardware_accelerated_256() {
|
||||
sha256_transf_hw(ctx, data)
|
||||
return
|
||||
}
|
||||
|
||||
w: [SHA256_ROUNDS]u32
|
||||
wv: [8]u32
|
||||
t1, t2: u32
|
||||
|
||||
CURR_BLOCK_SIZE :: BLOCK_SIZE_256
|
||||
} else when T == Context_512 {
|
||||
w: [80]u64
|
||||
w: [SHA512_ROUNDS]u64
|
||||
wv: [8]u64
|
||||
t1, t2: u64
|
||||
|
||||
CURR_BLOCK_SIZE :: BLOCK_SIZE_512
|
||||
}
|
||||
|
||||
data := data
|
||||
for len(data) >= CURR_BLOCK_SIZE {
|
||||
for i := 0; i < 16; i += 1 {
|
||||
for i in 0 ..< 16 {
|
||||
when T == Context_256 {
|
||||
w[i] = endian.unchecked_get_u32be(data[i * 4:])
|
||||
} else when T == Context_512 {
|
||||
@@ -419,22 +428,22 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) {
|
||||
}
|
||||
|
||||
when T == Context_256 {
|
||||
for i := 16; i < 64; i += 1 {
|
||||
for i in 16 ..< SHA256_ROUNDS {
|
||||
w[i] = SHA256_F4(w[i - 2]) + w[i - 7] + SHA256_F3(w[i - 15]) + w[i - 16]
|
||||
}
|
||||
} else when T == Context_512 {
|
||||
for i := 16; i < 80; i += 1 {
|
||||
for i in 16 ..< SHA512_ROUNDS {
|
||||
w[i] = SHA512_F4(w[i - 2]) + w[i - 7] + SHA512_F3(w[i - 15]) + w[i - 16]
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < 8; i += 1 {
|
||||
for i in 0 ..< 8 {
|
||||
wv[i] = ctx.h[i]
|
||||
}
|
||||
|
||||
when T == Context_256 {
|
||||
for i := 0; i < 64; i += 1 {
|
||||
t1 = wv[7] + SHA256_F2(wv[4]) + SHA256_CH(wv[4], wv[5], wv[6]) + sha256_k[i] + w[i]
|
||||
for i in 0 ..< SHA256_ROUNDS {
|
||||
t1 = wv[7] + SHA256_F2(wv[4]) + SHA256_CH(wv[4], wv[5], wv[6]) + SHA256_K[i] + w[i]
|
||||
t2 = SHA256_F1(wv[0]) + SHA256_MAJ(wv[0], wv[1], wv[2])
|
||||
wv[7] = wv[6]
|
||||
wv[6] = wv[5]
|
||||
@@ -446,8 +455,8 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) {
|
||||
wv[0] = t1 + t2
|
||||
}
|
||||
} else when T == Context_512 {
|
||||
for i := 0; i < 80; i += 1 {
|
||||
t1 = wv[7] + SHA512_F2(wv[4]) + SHA512_CH(wv[4], wv[5], wv[6]) + sha512_k[i] + w[i]
|
||||
for i in 0 ..< SHA512_ROUNDS {
|
||||
t1 = wv[7] + SHA512_F2(wv[4]) + SHA512_CH(wv[4], wv[5], wv[6]) + SHA512_K[i] + w[i]
|
||||
t2 = SHA512_F1(wv[0]) + SHA512_MAJ(wv[0], wv[1], wv[2])
|
||||
wv[7] = wv[6]
|
||||
wv[6] = wv[5]
|
||||
@@ -460,7 +469,7 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) {
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < 8; i += 1 {
|
||||
for i in 0 ..< 8 {
|
||||
ctx.h[i] += wv[i]
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
#+build !amd64
|
||||
package sha2
|
||||
|
||||
@(private = "file")
|
||||
ERR_HW_NOT_SUPPORTED :: "crypto/sha2: hardware implementation unsupported"
|
||||
|
||||
// is_hardware_accelerated_256 returns true iff hardware accelerated
|
||||
// SHA-224/SHA-256 is supported.
|
||||
is_hardware_accelerated_256 :: proc "contextless" () -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
@@ -0,0 +1,260 @@
|
||||
#+build amd64
|
||||
package sha2
|
||||
|
||||
// Based on the public domain code by Jeffrey Walton, though
|
||||
// realistically, there only is one sensible way to write this
|
||||
// and Intel's whitepaper covers it.
|
||||
//
|
||||
// See: https://github.com/noloader/SHA-Intrinsics
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:simd"
|
||||
import "core:simd/x86"
|
||||
import "core:sys/info"
|
||||
|
||||
@(private = "file")
|
||||
MASK :: x86.__m128i{0x0405060700010203, 0x0c0d0e0f08090a0b}
|
||||
|
||||
@(private = "file")
|
||||
K_0 :: simd.u64x2{0x71374491428a2f98, 0xe9b5dba5b5c0fbcf}
|
||||
@(private = "file")
|
||||
K_1 :: simd.u64x2{0x59f111f13956c25b, 0xab1c5ed5923f82a4}
|
||||
@(private = "file")
|
||||
K_2 :: simd.u64x2{0x12835b01d807aa98, 0x550c7dc3243185be}
|
||||
@(private = "file")
|
||||
K_3 :: simd.u64x2{0x80deb1fe72be5d74, 0xc19bf1749bdc06a7}
|
||||
@(private = "file")
|
||||
K_4 :: simd.u64x2{0xefbe4786e49b69c1, 0x240ca1cc0fc19dc6}
|
||||
@(private = "file")
|
||||
K_5 :: simd.u64x2{0x4a7484aa2de92c6f, 0x76f988da5cb0a9dc}
|
||||
@(private = "file")
|
||||
K_6 :: simd.u64x2{0xa831c66d983e5152, 0xbf597fc7b00327c8}
|
||||
@(private = "file")
|
||||
K_7 :: simd.u64x2{0xd5a79147c6e00bf3, 0x1429296706ca6351}
|
||||
@(private = "file")
|
||||
K_8 :: simd.u64x2{0x2e1b213827b70a85, 0x53380d134d2c6dfc}
|
||||
@(private = "file")
|
||||
K_9 :: simd.u64x2{0x766a0abb650a7354, 0x92722c8581c2c92e}
|
||||
@(private = "file")
|
||||
K_10 :: simd.u64x2{0xa81a664ba2bfe8a1, 0xc76c51a3c24b8b70}
|
||||
@(private = "file")
|
||||
K_11 :: simd.u64x2{0xd6990624d192e819, 0x106aa070f40e3585}
|
||||
@(private = "file")
|
||||
K_12 :: simd.u64x2{0x1e376c0819a4c116, 0x34b0bcb52748774c}
|
||||
@(private = "file")
|
||||
K_13 :: simd.u64x2{0x4ed8aa4a391c0cb3, 0x682e6ff35b9cca4f}
|
||||
@(private = "file")
|
||||
K_14 :: simd.u64x2{0x78a5636f748f82ee, 0x8cc7020884c87814}
|
||||
@(private = "file")
|
||||
K_15 :: simd.u64x2{0xa4506ceb90befffa, 0xc67178f2bef9a3f7}
|
||||
|
||||
|
||||
// is_hardware_accelerated_256 returns true iff hardware accelerated
|
||||
// SHA-224/SHA-256 is supported.
|
||||
is_hardware_accelerated_256 :: proc "contextless" () -> bool {
|
||||
features, ok := info.cpu_features.?
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
req_features :: info.CPU_Features{
|
||||
.sse2,
|
||||
.ssse3,
|
||||
.sse41,
|
||||
.sha,
|
||||
}
|
||||
return features >= req_features
|
||||
}
|
||||
|
||||
@(private, enable_target_feature="sse2,ssse3,sse4.1,sha")
|
||||
sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) #no_bounds_check {
|
||||
// Load the state
|
||||
tmp := intrinsics.unaligned_load((^x86.__m128i)(&ctx.h[0]))
|
||||
state_1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx.h[4]))
|
||||
|
||||
tmp = x86._mm_shuffle_epi32(tmp, 0xb1) // CDAB
|
||||
state_1 = x86._mm_shuffle_epi32(state_1, 0x1b) // EFGH
|
||||
state_0 := x86._mm_alignr_epi8(tmp, state_1, 8) // ABEF
|
||||
// state_1 = x86._mm_blend_epi16(state_1, tmp, 0xf0) // CDGH
|
||||
state_1 = kludge_mm_blend_epi16_0xf0(state_1, tmp)
|
||||
|
||||
data := data
|
||||
for len(data) >= BLOCK_SIZE_256 {
|
||||
state_0_save, state_1_save := state_0, state_1
|
||||
|
||||
// Rounds 0-3
|
||||
msg := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data)))
|
||||
msg_0 := x86._mm_shuffle_epi8(msg, MASK)
|
||||
msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_0))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0xe)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
|
||||
// Rounds 4-7
|
||||
msg_1 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[16:])))
|
||||
msg_1 = x86._mm_shuffle_epi8(msg_1, MASK)
|
||||
msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_1))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0xe)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1)
|
||||
|
||||
// Rounds 8-11
|
||||
msg_2 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[32:])))
|
||||
msg_2 = x86._mm_shuffle_epi8(msg_2, MASK)
|
||||
msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_2))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0xe)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2)
|
||||
|
||||
// Rounds 12-15
|
||||
msg_3 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(data[48:])))
|
||||
msg_3 = x86._mm_shuffle_epi8(msg_3, MASK)
|
||||
msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_3))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4)
|
||||
msg_0 = x86._mm_add_epi32(msg_0, tmp)
|
||||
msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3)
|
||||
|
||||
// Rounds 16-19
|
||||
msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_4))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4)
|
||||
msg_1 = x86._mm_add_epi32(msg_1, tmp)
|
||||
msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0)
|
||||
|
||||
// Rounds 20-23
|
||||
msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_5))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4)
|
||||
msg_2 = x86._mm_add_epi32(msg_2, tmp)
|
||||
msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1)
|
||||
|
||||
// Rounds 24-27
|
||||
msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_6))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4)
|
||||
msg_3 = x86._mm_add_epi32(msg_3, tmp)
|
||||
msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2)
|
||||
|
||||
// Rounds 28-31
|
||||
msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_7))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4)
|
||||
msg_0 = x86._mm_add_epi32(msg_0, tmp)
|
||||
msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3)
|
||||
|
||||
// Rounds 32-35
|
||||
msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_8))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4)
|
||||
msg_1 = x86._mm_add_epi32(msg_1, tmp)
|
||||
msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0)
|
||||
|
||||
// Rounds 36-39
|
||||
msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_9))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4)
|
||||
msg_2 = x86._mm_add_epi32(msg_2, tmp)
|
||||
msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_0 = x86._mm_sha256msg1_epu32(msg_0, msg_1)
|
||||
|
||||
// Rounds 40-43
|
||||
msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_10))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4)
|
||||
msg_3 = x86._mm_add_epi32(msg_3, tmp)
|
||||
msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_1 = x86._mm_sha256msg1_epu32(msg_1, msg_2)
|
||||
|
||||
// Rounds 44-47
|
||||
msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_11))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_3, msg_2, 4)
|
||||
msg_0 = x86._mm_add_epi32(msg_0, tmp)
|
||||
msg_0 = x86._mm_sha256msg2_epu32(msg_0, msg_3)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_2 = x86._mm_sha256msg1_epu32(msg_2, msg_3)
|
||||
|
||||
// Rounds 48-51
|
||||
msg = x86._mm_add_epi32(msg_0, x86.__m128i(K_12))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_0, msg_3, 4)
|
||||
msg_1 = x86._mm_add_epi32(msg_1, tmp)
|
||||
msg_1 = x86._mm_sha256msg2_epu32(msg_1, msg_0)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
msg_3 = x86._mm_sha256msg1_epu32(msg_3, msg_0)
|
||||
|
||||
// Rounds 52-55
|
||||
msg = x86._mm_add_epi32(msg_1, x86.__m128i(K_13))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_1, msg_0, 4)
|
||||
msg_2 = x86._mm_add_epi32(msg_2, tmp)
|
||||
msg_2 = x86._mm_sha256msg2_epu32(msg_2, msg_1)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
|
||||
/* Rounds 56-59 */
|
||||
msg = x86._mm_add_epi32(msg_2, x86.__m128i(K_14))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
tmp = x86._mm_alignr_epi8(msg_2, msg_1, 4)
|
||||
msg_3 = x86._mm_add_epi32(msg_3, tmp)
|
||||
msg_3 = x86._mm_sha256msg2_epu32(msg_3, msg_2)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
|
||||
// Rounds 60-63
|
||||
msg = x86._mm_add_epi32(msg_3, x86.__m128i(K_15))
|
||||
state_1 = x86._mm_sha256rnds2_epu32(state_1, state_0, msg)
|
||||
msg = x86._mm_shuffle_epi32(msg, 0x0e)
|
||||
state_0 = x86._mm_sha256rnds2_epu32(state_0, state_1, msg)
|
||||
|
||||
state_0 = x86._mm_add_epi32(state_0, state_0_save)
|
||||
state_1 = x86._mm_add_epi32(state_1, state_1_save)
|
||||
|
||||
data = data[BLOCK_SIZE_256:]
|
||||
}
|
||||
|
||||
// Write back the updated state
|
||||
tmp = x86._mm_shuffle_epi32(state_0, 0x1b) // FEBA
|
||||
state_1 = x86._mm_shuffle_epi32(state_1, 0xb1) // DCHG
|
||||
// state_0 = x86._mm_blend_epi16(tmp, state_1, 0xf0) // DCBA
|
||||
state_0 = kludge_mm_blend_epi16_0xf0(tmp, state_1)
|
||||
state_1 = x86._mm_alignr_epi8(state_1, tmp, 8) // ABEF
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[0]), state_0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[4]), state_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
kludge_mm_blend_epi16_0xf0 :: #force_inline proc "contextless"(a, b: x86.__m128i) -> x86.__m128i {
|
||||
// HACK HACK HACK: LLVM got rid of `llvm.x86.sse41.pblendw`.
|
||||
a_ := simd.to_array(a)
|
||||
b_ := simd.to_array(b)
|
||||
return x86.__m128i{a_[0], b_[1]}
|
||||
}
|
||||
@@ -219,18 +219,14 @@ verify_4_8 :: proc {
|
||||
*/
|
||||
|
||||
init :: proc(ctx: ^Context, key: []byte, c_rounds, d_rounds: int) {
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/siphash; invalid key size")
|
||||
}
|
||||
ensure(len(key) == KEY_SIZE,"crypto/siphash; invalid key size")
|
||||
ctx.c_rounds = c_rounds
|
||||
ctx.d_rounds = d_rounds
|
||||
is_valid_setting :=
|
||||
(ctx.c_rounds == 1 && ctx.d_rounds == 3) ||
|
||||
(ctx.c_rounds == 2 && ctx.d_rounds == 4) ||
|
||||
(ctx.c_rounds == 4 && ctx.d_rounds == 8)
|
||||
if !is_valid_setting {
|
||||
panic("crypto/siphash: incorrect rounds set up")
|
||||
}
|
||||
ensure(is_valid_setting, "crypto/siphash: incorrect rounds set up")
|
||||
ctx.k0 = endian.unchecked_get_u64le(key[:8])
|
||||
ctx.k1 = endian.unchecked_get_u64le(key[8:])
|
||||
ctx.v0 = 0x736f6d6570736575 ~ ctx.k0
|
||||
@@ -245,7 +241,7 @@ init :: proc(ctx: ^Context, key: []byte, c_rounds, d_rounds: int) {
|
||||
}
|
||||
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
assert(ctx.is_initialized, "crypto/siphash: context is not initialized")
|
||||
ensure(ctx.is_initialized)
|
||||
|
||||
data := data
|
||||
ctx.total_length += len(data)
|
||||
@@ -269,7 +265,7 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
}
|
||||
|
||||
final :: proc(ctx: ^Context, dst: ^u64) {
|
||||
assert(ctx.is_initialized, "crypto/siphash: context is not initialized")
|
||||
ensure(ctx.is_initialized)
|
||||
|
||||
tmp: [BLOCK_SIZE]byte
|
||||
copy(tmp[:], ctx.buf[:ctx.last_block])
|
||||
@@ -336,9 +332,8 @@ _get_byte :: #force_inline proc "contextless" (byte_num: byte, into: u64) -> byt
|
||||
|
||||
@(private)
|
||||
_collect_output :: #force_inline proc(dst: []byte, hash: u64) {
|
||||
if len(dst) < DIGEST_SIZE {
|
||||
panic("crypto/siphash: invalid tag size")
|
||||
}
|
||||
ensure(len(dst) >= DIGEST_SIZE, "crypto/siphash: invalid tag size")
|
||||
|
||||
dst[0] = _get_byte(7, hash)
|
||||
dst[1] = _get_byte(6, hash)
|
||||
dst[2] = _get_byte(5, hash)
|
||||
|
||||
@@ -53,7 +53,7 @@ init :: proc(ctx: ^Context) {
|
||||
|
||||
// update adds more data to the Context.
|
||||
update :: proc(ctx: ^Context, data: []byte) {
|
||||
assert(ctx.is_initialized)
|
||||
ensure(ctx.is_initialized)
|
||||
|
||||
data := data
|
||||
ctx.length += u64(len(data))
|
||||
@@ -83,11 +83,8 @@ update :: proc(ctx: ^Context, data: []byte) {
|
||||
// Iff finalize_clone is set, final will work on a copy of the Context,
|
||||
// which is useful for for calculating rolling digests.
|
||||
final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
assert(ctx.is_initialized)
|
||||
|
||||
if len(hash) < DIGEST_SIZE {
|
||||
panic("crypto/sm3: invalid destination digest size")
|
||||
}
|
||||
ensure(ctx.is_initialized)
|
||||
ensure(len(hash) >= DIGEST_SIZE, "crypto/sm3: invalid destination digest size")
|
||||
|
||||
ctx := ctx
|
||||
if finalize_clone {
|
||||
@@ -110,7 +107,7 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
|
||||
length <<= 3
|
||||
endian.unchecked_put_u64be(pad[:], length)
|
||||
update(ctx, pad[0:8])
|
||||
assert(ctx.bitlength == 0)
|
||||
assert(ctx.bitlength == 0) // Check for bugs
|
||||
|
||||
for i := 0; i < DIGEST_SIZE / 4; i += 1 {
|
||||
endian.unchecked_put_u32be(hash[i * 4:], ctx.state[i])
|
||||
@@ -136,7 +133,7 @@ reset :: proc(ctx: ^Context) {
|
||||
SM3 implementation
|
||||
*/
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
IV := [8]u32 {
|
||||
0x7380166f, 0x4914b2b9, 0x172442d7, 0xda8a0600,
|
||||
0xa96f30bc, 0x163138aa, 0xe38dee4d, 0xb0fb0e4e,
|
||||
|
||||
@@ -15,7 +15,7 @@ SCALAR_SIZE :: 32
|
||||
// POINT_SIZE is the size of a X25519 point (public key/shared secret) in bytes.
|
||||
POINT_SIZE :: 32
|
||||
|
||||
@(private)
|
||||
@(private, rodata)
|
||||
_BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
|
||||
@(private)
|
||||
@@ -101,15 +101,9 @@ _scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) {
|
||||
// scalarmult "multiplies" the provided scalar and point, and writes the
|
||||
// resulting point to dst.
|
||||
scalarmult :: proc(dst, scalar, point: []byte) {
|
||||
if len(scalar) != SCALAR_SIZE {
|
||||
panic("crypto/x25519: invalid scalar size")
|
||||
}
|
||||
if len(point) != POINT_SIZE {
|
||||
panic("crypto/x25519: invalid point size")
|
||||
}
|
||||
if len(dst) != POINT_SIZE {
|
||||
panic("crypto/x25519: invalid destination point size")
|
||||
}
|
||||
ensure(len(scalar) == SCALAR_SIZE, "crypto/x25519: invalid scalar size")
|
||||
ensure(len(point) == POINT_SIZE, "crypto/x25519: invalid point size")
|
||||
ensure(len(dst) == POINT_SIZE, "crypto/x25519: invalid destination point size")
|
||||
|
||||
// "clamp" the scalar
|
||||
e: [32]byte = ---
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
package x448 implements the X448 (aka curve448) Elliptic-Curve
|
||||
Diffie-Hellman key exchange protocol.
|
||||
|
||||
See:
|
||||
- [[ https://www.rfc-editor.org/rfc/rfc7748 ]]
|
||||
*/
|
||||
package x448
|
||||
|
||||
import field "core:crypto/_fiat/field_curve448"
|
||||
import "core:mem"
|
||||
|
||||
// SCALAR_SIZE is the size of a X448 scalar (private key) in bytes.
|
||||
SCALAR_SIZE :: 56
|
||||
// POINT_SIZE is the size of a X448 point (public key/shared secret) in bytes.
|
||||
POINT_SIZE :: 56
|
||||
|
||||
@(private, rodata)
|
||||
_BASE_POINT: [56]byte = {
|
||||
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
}
|
||||
|
||||
@(private)
|
||||
_scalar_bit :: #force_inline proc "contextless" (s: ^[56]byte, i: int) -> u8 {
|
||||
if i < 0 {
|
||||
return 0
|
||||
}
|
||||
return (s[i >> 3] >> uint(i & 7)) & 1
|
||||
}
|
||||
|
||||
@(private)
|
||||
_scalarmult :: proc "contextless" (out, scalar, point: ^[56]byte) {
|
||||
// Montgomery pseudo-multiplication, using the RFC 7748 formula.
|
||||
t1, t2: field.Loose_Field_Element = ---, ---
|
||||
|
||||
// x_1 = u
|
||||
// x_2 = 1
|
||||
// z_2 = 0
|
||||
// x_3 = u
|
||||
// z_3 = 1
|
||||
x1: field.Tight_Field_Element = ---
|
||||
field.fe_from_bytes(&x1, point)
|
||||
|
||||
x2, x3, z2, z3: field.Tight_Field_Element = ---, ---, ---, ---
|
||||
field.fe_one(&x2)
|
||||
field.fe_zero(&z2)
|
||||
field.fe_set(&x3, &x1)
|
||||
field.fe_one(&z3)
|
||||
|
||||
// swap = 0
|
||||
swap: int
|
||||
|
||||
// For t = bits-1 down to 0:a
|
||||
for t := 448 - 1; t >= 0; t -= 1 {
|
||||
// k_t = (k >> t) & 1
|
||||
k_t := int(_scalar_bit(scalar, t))
|
||||
// swap ^= k_t
|
||||
swap ~= k_t
|
||||
// Conditional swap; see text below.
|
||||
// (x_2, x_3) = cswap(swap, x_2, x_3)
|
||||
field.fe_cond_swap(&x2, &x3, swap)
|
||||
// (z_2, z_3) = cswap(swap, z_2, z_3)
|
||||
field.fe_cond_swap(&z2, &z3, swap)
|
||||
// swap = k_t
|
||||
swap = k_t
|
||||
|
||||
// Note: This deliberately omits reductions after add/sub operations
|
||||
// if the result is only ever used as the input to a mul/square since
|
||||
// the implementations of those can deal with non-reduced inputs.
|
||||
//
|
||||
// fe_tighten_cast is only used to store a fully reduced
|
||||
// output in a Loose_Field_Element, or to provide such a
|
||||
// Loose_Field_Element as a Tight_Field_Element argument.
|
||||
|
||||
// A = x_2 + z_2
|
||||
field.fe_add(&t1, &x2, &z2)
|
||||
// B = x_2 - z_2
|
||||
field.fe_sub(&t2, &x2, &z2)
|
||||
// D = x_3 - z_3
|
||||
field.fe_sub(field.fe_relax_cast(&z2), &x3, &z3) // (z2 unreduced)
|
||||
// DA = D * A
|
||||
field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1)
|
||||
// C = x_3 + z_3
|
||||
field.fe_add(field.fe_relax_cast(&z3), &x3, &z3) // (z3 unreduced)
|
||||
// CB = C * B
|
||||
field.fe_carry_mul(&x3, &t2, field.fe_relax_cast(&z3))
|
||||
// z_3 = x_1 * (DA - CB)^2
|
||||
field.fe_sub(field.fe_relax_cast(&z3), &x2, &x3) // (z3 unreduced)
|
||||
field.fe_carry_square(&z3, field.fe_relax_cast(&z3))
|
||||
field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z3))
|
||||
// x_3 = (DA + CB)^2
|
||||
field.fe_add(field.fe_relax_cast(&z2), &x2, &x3) // (z2 unreduced)
|
||||
field.fe_carry_square(&x3, field.fe_relax_cast(&z2))
|
||||
|
||||
// AA = A^2
|
||||
field.fe_carry_square(&z2, &t1)
|
||||
// BB = B^2
|
||||
field.fe_carry_square(field.fe_tighten_cast(&t1), &t2) // (t1 reduced)
|
||||
// x_2 = AA * BB
|
||||
field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1)
|
||||
// E = AA - BB
|
||||
field.fe_sub(&t2, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced)
|
||||
// z_2 = E * (AA + a24 * E)
|
||||
field.fe_carry_mul_small(field.fe_tighten_cast(&t1), &t2, 39081) // (t1 reduced)
|
||||
field.fe_add(&t1, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced)
|
||||
field.fe_carry_mul(&z2, &t2, &t1)
|
||||
}
|
||||
|
||||
// Conditional swap; see text below.
|
||||
// (x_2, x_3) = cswap(swap, x_2, x_3)
|
||||
field.fe_cond_swap(&x2, &x3, swap)
|
||||
// (z_2, z_3) = cswap(swap, z_2, z_3)
|
||||
field.fe_cond_swap(&z2, &z3, swap)
|
||||
|
||||
// Return x_2 * (z_2^(p - 2))
|
||||
field.fe_carry_inv(&z2, field.fe_relax_cast(&z2))
|
||||
field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
|
||||
field.fe_to_bytes(out, &x2)
|
||||
|
||||
field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3})
|
||||
field.fe_clear_vec([]^field.Loose_Field_Element{&t1, &t2})
|
||||
}
|
||||
|
||||
// scalarmult "multiplies" the provided scalar and point, and writes the
|
||||
// resulting point to dst.
|
||||
scalarmult :: proc(dst, scalar, point: []byte) {
|
||||
ensure(len(scalar) == SCALAR_SIZE, "crypto/x448: invalid scalar size")
|
||||
ensure(len(point) == POINT_SIZE, "crypto/x448: invalid point size")
|
||||
ensure(len(dst) == POINT_SIZE, "crypto/x448: invalid destination point size")
|
||||
|
||||
// "clamp" the scalar
|
||||
e: [56]byte = ---
|
||||
copy_slice(e[:], scalar)
|
||||
e[0] &= 252
|
||||
e[55] |= 128
|
||||
|
||||
p: [56]byte = ---
|
||||
copy_slice(p[:], point)
|
||||
|
||||
d: [56]byte = ---
|
||||
_scalarmult(&d, &e, &p)
|
||||
copy_slice(dst, d[:])
|
||||
|
||||
mem.zero_explicit(&e, size_of(e))
|
||||
mem.zero_explicit(&d, size_of(d))
|
||||
}
|
||||
|
||||
// scalarmult_basepoint "multiplies" the provided scalar with the X448
|
||||
// base point and writes the resulting point to dst.
|
||||
scalarmult_basepoint :: proc(dst, scalar: []byte) {
|
||||
scalarmult(dst, scalar, _BASE_POINT[:])
|
||||
}
|
||||
@@ -21,7 +21,7 @@ _mm_abs_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i {
|
||||
_mm_shuffle_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i {
|
||||
return transmute(__m128i)pshufb128(transmute(u8x16)a, transmute(u8x16)b)
|
||||
}
|
||||
@(require_results, enable_target_feature="ssse3")
|
||||
@(require_results, enable_target_feature="sse2,ssse3")
|
||||
_mm_alignr_epi8 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u32) -> __m128i {
|
||||
shift :: IMM8
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ CPU_Feature :: enum u64 {
|
||||
popcnt, // Hamming weight instruction POPCNT.
|
||||
rdrand, // RDRAND instruction (on-chip random number generator)
|
||||
rdseed, // RDSEED instruction (on-chip random number generator)
|
||||
sha, // SHA Extensions (SHA-1, SHA-224, SHA-256)
|
||||
sse2, // Streaming SIMD extension 2 (always available on amd64)
|
||||
sse3, // Streaming SIMD extension 3
|
||||
ssse3, // Supplemental streaming SIMD extension 3
|
||||
@@ -115,6 +116,7 @@ init_cpu_features :: proc "c" () {
|
||||
|
||||
_, ebx7, ecx7, edx7 := cpuid(7, 0)
|
||||
try_set(&set, .bmi1, 3, ebx7)
|
||||
try_set(&set, .sha, 29, ebx7)
|
||||
if os_supports_avx {
|
||||
try_set(&set, .avx2, 5, ebx7)
|
||||
}
|
||||
|
||||
@@ -26,12 +26,14 @@ import topological_sort "core:container/topological_sort"
|
||||
|
||||
import crypto "core:crypto"
|
||||
import aead "core:crypto/aead"
|
||||
import aegis "core:crypto/aegis"
|
||||
import aes "core:crypto/aes"
|
||||
import blake2b "core:crypto/blake2b"
|
||||
import blake2s "core:crypto/blake2s"
|
||||
import chacha20 "core:crypto/chacha20"
|
||||
import chacha20poly1305 "core:crypto/chacha20poly1305"
|
||||
import crypto_hash "core:crypto/hash"
|
||||
import deoxysii "core:crypto/deoxysii"
|
||||
import ed25519 "core:crypto/ed25519"
|
||||
import hkdf "core:crypto/hkdf"
|
||||
import hmac "core:crypto/hmac"
|
||||
@@ -48,6 +50,7 @@ import shake "core:crypto/shake"
|
||||
import sm3 "core:crypto/sm3"
|
||||
import tuplehash "core:crypto/tuplehash"
|
||||
import x25519 "core:crypto/x25519"
|
||||
import x448 "core:crypto/x448"
|
||||
|
||||
import pe "core:debug/pe"
|
||||
import trace "core:debug/trace"
|
||||
@@ -169,11 +172,13 @@ _ :: topological_sort
|
||||
_ :: crypto
|
||||
_ :: crypto_hash
|
||||
_ :: aead
|
||||
_ :: aegis
|
||||
_ :: aes
|
||||
_ :: blake2b
|
||||
_ :: blake2s
|
||||
_ :: chacha20
|
||||
_ :: chacha20poly1305
|
||||
_ :: deoxysii
|
||||
_ :: ed25519
|
||||
_ :: hmac
|
||||
_ :: hkdf
|
||||
@@ -190,6 +195,7 @@ _ :: shake
|
||||
_ :: sm3
|
||||
_ :: tuplehash
|
||||
_ :: x25519
|
||||
_ :: x448
|
||||
_ :: pe
|
||||
_ :: trace
|
||||
_ :: dynlib
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
package benchmark_core_crypto
|
||||
|
||||
import "base:runtime"
|
||||
import "core:crypto"
|
||||
import "core:testing"
|
||||
import "core:text/table"
|
||||
import "core:time"
|
||||
|
||||
import "core:crypto/aead"
|
||||
|
||||
@(private = "file")
|
||||
ITERS :: 10000
|
||||
@(private = "file")
|
||||
SIZES := []int{64, 1024, 65536}
|
||||
|
||||
@(test)
|
||||
benchmark_crypto_aead :: proc(t: ^testing.T) {
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
|
||||
tbl: table.Table
|
||||
table.init(&tbl)
|
||||
defer table.destroy(&tbl)
|
||||
|
||||
table.caption(&tbl, "AEAD")
|
||||
table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Time", "Throughput")
|
||||
|
||||
for algo, i in aead.Algorithm {
|
||||
if algo == .Invalid {
|
||||
continue
|
||||
}
|
||||
if i > 1 {
|
||||
table.row(&tbl)
|
||||
}
|
||||
|
||||
algo_name := aead.ALGORITHM_NAMES[algo]
|
||||
key_sz := aead.KEY_SIZES[algo]
|
||||
|
||||
key := make([]byte, key_sz, context.temp_allocator)
|
||||
crypto.rand_bytes(key)
|
||||
|
||||
// TODO: Benchmark all available imlementations?
|
||||
ctx: aead.Context
|
||||
aead.init(&ctx, algo, key)
|
||||
|
||||
for sz, _ in SIZES {
|
||||
options := &time.Benchmark_Options{
|
||||
rounds = ITERS,
|
||||
bytes = aead.IV_SIZES[algo] + sz,
|
||||
setup = setup_sized_buf,
|
||||
bench = do_bench_aead,
|
||||
teardown = teardown_sized_buf,
|
||||
}
|
||||
context.user_ptr = &ctx
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil)
|
||||
|
||||
time_per_iter := options.duration / ITERS
|
||||
table.aligned_row_of_values(
|
||||
&tbl,
|
||||
.Right,
|
||||
algo_name,
|
||||
table.format(&tbl, "%d", sz),
|
||||
table.format(&tbl, "%8M", time_per_iter),
|
||||
table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
log_table(&tbl)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
do_bench_aead :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
tag_: [aead.MAX_TAG_SIZE]byte
|
||||
|
||||
ctx := (^aead.Context)(context.user_ptr)
|
||||
iv_sz := aead.iv_size(ctx)
|
||||
|
||||
iv := options.input[:iv_sz]
|
||||
buf := options.input[iv_sz:]
|
||||
tag := tag_[:aead.tag_size(ctx)]
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
aead.seal_ctx(ctx, buf, tag, iv, nil, buf)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * (options.bytes - iv_sz)
|
||||
|
||||
return
|
||||
}
|
||||
@@ -1,415 +0,0 @@
|
||||
package benchmark_core_crypto
|
||||
|
||||
import "base:runtime"
|
||||
import "core:encoding/hex"
|
||||
import "core:fmt"
|
||||
import "core:log"
|
||||
import "core:strings"
|
||||
import "core:testing"
|
||||
import "core:time"
|
||||
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/chacha20"
|
||||
import "core:crypto/chacha20poly1305"
|
||||
import "core:crypto/ed25519"
|
||||
import "core:crypto/poly1305"
|
||||
import "core:crypto/x25519"
|
||||
|
||||
// Cryptographic primitive benchmarks.
|
||||
|
||||
@(test)
|
||||
benchmark_crypto :: proc(t: ^testing.T) {
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
|
||||
str: strings.Builder
|
||||
strings.builder_init(&str, context.allocator)
|
||||
defer {
|
||||
log.info(strings.to_string(str))
|
||||
strings.builder_destroy(&str)
|
||||
}
|
||||
|
||||
{
|
||||
name := "AES256-CTR 64 bytes"
|
||||
options := &time.Benchmark_Options {
|
||||
rounds = 1_000,
|
||||
bytes = 64,
|
||||
setup = _setup_sized_buf,
|
||||
bench = _benchmark_aes256_ctr,
|
||||
teardown = _teardown_sized_buf,
|
||||
}
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "AES256-CTR 1024 bytes"
|
||||
options.bytes = 1024
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "AES256-CTR 65536 bytes"
|
||||
options.bytes = 65536
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
}
|
||||
{
|
||||
name := "ChaCha20 64 bytes"
|
||||
options := &time.Benchmark_Options {
|
||||
rounds = 1_000,
|
||||
bytes = 64,
|
||||
setup = _setup_sized_buf,
|
||||
bench = _benchmark_chacha20,
|
||||
teardown = _teardown_sized_buf,
|
||||
}
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "ChaCha20 1024 bytes"
|
||||
options.bytes = 1024
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "ChaCha20 65536 bytes"
|
||||
options.bytes = 65536
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
}
|
||||
{
|
||||
name := "Poly1305 64 zero bytes"
|
||||
options := &time.Benchmark_Options {
|
||||
rounds = 1_000,
|
||||
bytes = 64,
|
||||
setup = _setup_sized_buf,
|
||||
bench = _benchmark_poly1305,
|
||||
teardown = _teardown_sized_buf,
|
||||
}
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "Poly1305 1024 zero bytes"
|
||||
options.bytes = 1024
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
}
|
||||
{
|
||||
name := "chacha20poly1305 64 bytes"
|
||||
options := &time.Benchmark_Options {
|
||||
rounds = 1_000,
|
||||
bytes = 64,
|
||||
setup = _setup_sized_buf,
|
||||
bench = _benchmark_chacha20poly1305,
|
||||
teardown = _teardown_sized_buf,
|
||||
}
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "chacha20poly1305 1024 bytes"
|
||||
options.bytes = 1024
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "chacha20poly1305 65536 bytes"
|
||||
options.bytes = 65536
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
}
|
||||
{
|
||||
name := "AES256-GCM 64 bytes"
|
||||
options := &time.Benchmark_Options {
|
||||
rounds = 1_000,
|
||||
bytes = 64,
|
||||
setup = _setup_sized_buf,
|
||||
bench = _benchmark_aes256_gcm,
|
||||
teardown = _teardown_sized_buf,
|
||||
}
|
||||
|
||||
key := [aes.KEY_SIZE_256]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
ctx: aes.Context_GCM
|
||||
aes.init_gcm(&ctx, key[:])
|
||||
|
||||
context.user_ptr = &ctx
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "AES256-GCM 1024 bytes"
|
||||
options.bytes = 1024
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "AES256-GCM 65536 bytes"
|
||||
options.bytes = 65536
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
}
|
||||
{
|
||||
iters :: 10000
|
||||
|
||||
priv_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
|
||||
priv_bytes, _ := hex.decode(transmute([]byte)(priv_str), context.temp_allocator)
|
||||
priv_key: ed25519.Private_Key
|
||||
start := time.now()
|
||||
for i := 0; i < iters; i = i + 1 {
|
||||
ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes)
|
||||
assert(ok, "private key should deserialize")
|
||||
}
|
||||
elapsed := time.since(start)
|
||||
fmt.sbprintfln(&str,
|
||||
"ed25519.private_key_set_bytes: ~%f us/op",
|
||||
time.duration_microseconds(elapsed) / iters,
|
||||
)
|
||||
|
||||
pub_bytes := priv_key._pub_key._b[:] // "I know what I am doing"
|
||||
pub_key: ed25519.Public_Key
|
||||
start = time.now()
|
||||
for i := 0; i < iters; i = i + 1 {
|
||||
ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes[:])
|
||||
assert(ok, "public key should deserialize")
|
||||
}
|
||||
elapsed = time.since(start)
|
||||
fmt.sbprintfln(&str,
|
||||
"ed25519.public_key_set_bytes: ~%f us/op",
|
||||
time.duration_microseconds(elapsed) / iters,
|
||||
)
|
||||
|
||||
msg := "Got a job for you, 621."
|
||||
sig_bytes: [ed25519.SIGNATURE_SIZE]byte
|
||||
msg_bytes := transmute([]byte)(msg)
|
||||
start = time.now()
|
||||
for i := 0; i < iters; i = i + 1 {
|
||||
ed25519.sign(&priv_key, msg_bytes, sig_bytes[:])
|
||||
}
|
||||
elapsed = time.since(start)
|
||||
fmt.sbprintfln(&str,
|
||||
"ed25519.sign: ~%f us/op",
|
||||
time.duration_microseconds(elapsed) / iters,
|
||||
)
|
||||
|
||||
start = time.now()
|
||||
for i := 0; i < iters; i = i + 1 {
|
||||
ok := ed25519.verify(&pub_key, msg_bytes, sig_bytes[:])
|
||||
assert(ok, "signature should validate")
|
||||
}
|
||||
elapsed = time.since(start)
|
||||
fmt.sbprintfln(&str,
|
||||
"ed25519.verify: ~%f us/op",
|
||||
time.duration_microseconds(elapsed) / iters,
|
||||
)
|
||||
}
|
||||
{
|
||||
point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
|
||||
scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
|
||||
|
||||
point, _ := hex.decode(transmute([]byte)(point_str), context.temp_allocator)
|
||||
scalar, _ := hex.decode(transmute([]byte)(scalar_str), context.temp_allocator)
|
||||
out: [x25519.POINT_SIZE]byte = ---
|
||||
|
||||
iters :: 10000
|
||||
start := time.now()
|
||||
for i := 0; i < iters; i = i + 1 {
|
||||
x25519.scalarmult(out[:], scalar[:], point[:])
|
||||
}
|
||||
elapsed := time.since(start)
|
||||
|
||||
fmt.sbprintfln(&str,
|
||||
"x25519.scalarmult: ~%f us/op",
|
||||
time.duration_microseconds(elapsed) / iters,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
_setup_sized_buf :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
assert(options != nil)
|
||||
|
||||
options.input = make([]u8, options.bytes, allocator)
|
||||
return nil if len(options.input) == options.bytes else .Allocation_Error
|
||||
}
|
||||
|
||||
@(private)
|
||||
_teardown_sized_buf :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
assert(options != nil)
|
||||
|
||||
delete(options.input)
|
||||
return nil
|
||||
}
|
||||
|
||||
@(private)
|
||||
_benchmark_chacha20 :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
key := [chacha20.KEY_SIZE]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
iv := [chacha20.IV_SIZE]byte {
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
}
|
||||
|
||||
ctx: chacha20.Context = ---
|
||||
chacha20.init(&ctx, key[:], iv[:])
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
chacha20.xor_bytes(&ctx, buf, buf)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
return nil
|
||||
}
|
||||
|
||||
@(private)
|
||||
_benchmark_poly1305 :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
key := [poly1305.KEY_SIZE]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
|
||||
tag: [poly1305.TAG_SIZE]byte = ---
|
||||
for _ in 0 ..= options.rounds {
|
||||
poly1305.sum(tag[:], buf, key[:])
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
//options.hash = u128(h)
|
||||
return nil
|
||||
}
|
||||
|
||||
@(private)
|
||||
_benchmark_chacha20poly1305 :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
key := [chacha20.KEY_SIZE]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
iv := [chacha20.IV_SIZE]byte {
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
}
|
||||
|
||||
ctx: chacha20poly1305.Context = ---
|
||||
chacha20poly1305.init(&ctx, key[:]) // Basically 0 overhead.
|
||||
|
||||
tag: [chacha20poly1305.TAG_SIZE]byte = ---
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
chacha20poly1305.seal(&ctx, buf, tag[:], iv[:], nil, buf)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
return nil
|
||||
}
|
||||
|
||||
@(private)
|
||||
_benchmark_aes256_ctr :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
key := [aes.KEY_SIZE_256]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
iv := [aes.CTR_IV_SIZE]byte {
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
}
|
||||
|
||||
ctx: aes.Context_CTR = ---
|
||||
aes.init_ctr(&ctx, key[:], iv[:])
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
aes.xor_bytes_ctr(&ctx, buf, buf)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
return nil
|
||||
}
|
||||
|
||||
_benchmark_aes256_gcm :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
iv: [aes.GCM_IV_SIZE]byte
|
||||
tag: [aes.GCM_TAG_SIZE]byte = ---
|
||||
|
||||
ctx := (^aes.Context_GCM)(context.user_ptr)
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
aes.seal_gcm(ctx, buf, tag[:], iv[:], nil, buf)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
return nil
|
||||
}
|
||||
|
||||
@(private)
|
||||
benchmark_print :: proc(str: ^strings.Builder, name: string, options: ^time.Benchmark_Options, loc := #caller_location) {
|
||||
fmt.sbprintfln(str, "[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n",
|
||||
name,
|
||||
options.rounds,
|
||||
options.processed,
|
||||
time.duration_nanoseconds(options.duration),
|
||||
options.rounds_per_second,
|
||||
options.megabytes_per_second,
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
package benchmark_core_crypto
|
||||
|
||||
import "base:runtime"
|
||||
import "core:encoding/hex"
|
||||
import "core:testing"
|
||||
import "core:text/table"
|
||||
import "core:time"
|
||||
|
||||
import "core:crypto/ed25519"
|
||||
import "core:crypto/x25519"
|
||||
import "core:crypto/x448"
|
||||
|
||||
@(private = "file")
|
||||
ECDH_ITERS :: 10000
|
||||
@(private = "file")
|
||||
DSA_ITERS :: 10000
|
||||
|
||||
@(test)
|
||||
benchmark_crypto_ecc :: proc(t: ^testing.T) {
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
|
||||
bench_ecdh()
|
||||
bench_dsa()
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
bench_ecdh :: proc() {
|
||||
tbl: table.Table
|
||||
table.init(&tbl)
|
||||
defer table.destroy(&tbl)
|
||||
|
||||
table.caption(&tbl, "ECDH")
|
||||
table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Scalar-Basepoint", "Scalar-Point")
|
||||
|
||||
append_tbl := proc(tbl: ^table.Table, algo_name: string, bp, sc: time.Duration) {
|
||||
table.aligned_row_of_values(
|
||||
tbl,
|
||||
.Right,
|
||||
algo_name,
|
||||
table.format(tbl, "%8M", bp),
|
||||
table.format(tbl, "%8M", sc),
|
||||
)
|
||||
}
|
||||
|
||||
scalar_bp, scalar := bench_x25519()
|
||||
append_tbl(&tbl, "X25519", scalar_bp, scalar)
|
||||
|
||||
scalar_bp, scalar = bench_x448()
|
||||
append_tbl(&tbl, "X448", scalar_bp, scalar)
|
||||
|
||||
log_table(&tbl)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
bench_x25519 :: proc() -> (bp, sc: time.Duration) {
|
||||
point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
|
||||
scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
|
||||
|
||||
point, _ := hex.decode(transmute([]byte)(point_str), context.temp_allocator)
|
||||
scalar, _ := hex.decode(transmute([]byte)(scalar_str), context.temp_allocator)
|
||||
out: [x25519.POINT_SIZE]byte = ---
|
||||
|
||||
start := time.tick_now()
|
||||
for _ in 0 ..< ECDH_ITERS {
|
||||
x25519.scalarmult_basepoint(out[:], scalar[:])
|
||||
}
|
||||
bp = time.tick_since(start) / ECDH_ITERS
|
||||
|
||||
start = time.tick_now()
|
||||
for _ in 0 ..< ECDH_ITERS {
|
||||
x25519.scalarmult(out[:], scalar[:], point[:])
|
||||
}
|
||||
sc = time.tick_since(start) / ECDH_ITERS
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
bench_x448 :: proc() -> (bp, sc: time.Duration) {
|
||||
point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
|
||||
scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
|
||||
|
||||
point, _ := hex.decode(transmute([]byte)(point_str), context.temp_allocator)
|
||||
scalar, _ := hex.decode(transmute([]byte)(scalar_str), context.temp_allocator)
|
||||
out: [x448.POINT_SIZE]byte = ---
|
||||
|
||||
start := time.tick_now()
|
||||
for _ in 0 ..< ECDH_ITERS {
|
||||
x448.scalarmult_basepoint(out[:], scalar[:])
|
||||
}
|
||||
bp = time.tick_since(start) / ECDH_ITERS
|
||||
|
||||
start = time.tick_now()
|
||||
for _ in 0 ..< ECDH_ITERS {
|
||||
x448.scalarmult(out[:], scalar[:], point[:])
|
||||
}
|
||||
sc = time.tick_since(start) / ECDH_ITERS
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
bench_dsa :: proc() {
|
||||
tbl: table.Table
|
||||
table.init(&tbl)
|
||||
defer table.destroy(&tbl)
|
||||
|
||||
table.caption(&tbl, "ECDSA/EdDSA")
|
||||
table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Op", "Time")
|
||||
|
||||
append_tbl := proc(tbl: ^table.Table, algo_name, op: string, t: time.Duration) {
|
||||
table.aligned_row_of_values(
|
||||
tbl,
|
||||
.Right,
|
||||
algo_name,
|
||||
op,
|
||||
table.format(tbl, "%8M", t),
|
||||
)
|
||||
}
|
||||
|
||||
sk, sig, verif := bench_ed25519()
|
||||
append_tbl(&tbl, "ed25519", "private_key_set_bytes", sk)
|
||||
append_tbl(&tbl, "ed25519", "sign", sig)
|
||||
append_tbl(&tbl, "ed25519", "verify", verif)
|
||||
|
||||
log_table(&tbl)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
bench_ed25519 :: proc() -> (sk, sig, verif: time.Duration) {
|
||||
priv_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
|
||||
priv_bytes, _ := hex.decode(transmute([]byte)(priv_str), context.temp_allocator)
|
||||
priv_key: ed25519.Private_Key
|
||||
start := time.tick_now()
|
||||
for _ in 0 ..< DSA_ITERS {
|
||||
ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes)
|
||||
assert(ok, "private key should deserialize")
|
||||
}
|
||||
sk = time.tick_since(start) / DSA_ITERS
|
||||
|
||||
pub_bytes := priv_key._pub_key._b[:] // "I know what I am doing"
|
||||
pub_key: ed25519.Public_Key
|
||||
ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes[:])
|
||||
assert(ok, "public key should deserialize")
|
||||
|
||||
msg := "Got a job for you, 621."
|
||||
sig_bytes: [ed25519.SIGNATURE_SIZE]byte
|
||||
msg_bytes := transmute([]byte)(msg)
|
||||
start = time.tick_now()
|
||||
for _ in 0 ..< DSA_ITERS {
|
||||
ed25519.sign(&priv_key, msg_bytes, sig_bytes[:])
|
||||
}
|
||||
sig = time.tick_since(start) / DSA_ITERS
|
||||
|
||||
start = time.tick_now()
|
||||
for _ in 0 ..< DSA_ITERS {
|
||||
ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes[:])
|
||||
assert(ok, "signature should validate")
|
||||
}
|
||||
verif = time.tick_since(start) / DSA_ITERS
|
||||
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
package benchmark_core_crypto
|
||||
|
||||
import "base:runtime"
|
||||
import "core:testing"
|
||||
import "core:text/table"
|
||||
import "core:time"
|
||||
|
||||
import "core:crypto/hash"
|
||||
|
||||
@(private = "file")
|
||||
ITERS :: 10000
|
||||
@(private = "file")
|
||||
SIZES := []int{64, 1024, 65536}
|
||||
|
||||
@(test)
|
||||
benchmark_crypto_hash :: proc(t: ^testing.T) {
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
|
||||
tbl: table.Table
|
||||
table.init(&tbl)
|
||||
defer table.destroy(&tbl)
|
||||
|
||||
table.caption(&tbl, "Hash")
|
||||
table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Time", "Throughput")
|
||||
|
||||
for algo, i in hash.Algorithm {
|
||||
// Skip the sentinel value, and uncommon algorithms
|
||||
#partial switch algo {
|
||||
case .Invalid:
|
||||
continue
|
||||
case .Legacy_KECCAK_224, .Legacy_KECCAK_256, .Legacy_KECCAK_384, .Legacy_KECCAK_512:
|
||||
// Skip: Legacy and not worth using over SHA3
|
||||
continue
|
||||
case .Insecure_MD5, .Insecure_SHA1:
|
||||
// Skip: Legacy and not worth using at all
|
||||
continue
|
||||
case .SHA224, .SHA384, .SHA3_224, .SHA3_384:
|
||||
// Skip: Uncommon SHA2/SHA3 variants
|
||||
continue
|
||||
case .SM3:
|
||||
// Skip: Liberty Prime is online. All systems nominal.
|
||||
// Weapons hot. Mission: the destruction of any and
|
||||
// all Chinese communists.
|
||||
continue
|
||||
}
|
||||
if i > 1 {
|
||||
table.row(&tbl)
|
||||
}
|
||||
|
||||
algo_name := hash.ALGORITHM_NAMES[algo]
|
||||
|
||||
for sz, _ in SIZES {
|
||||
options := &time.Benchmark_Options{
|
||||
rounds = ITERS,
|
||||
bytes = sz,
|
||||
setup = setup_sized_buf,
|
||||
bench = do_bench_hash,
|
||||
teardown = teardown_sized_buf,
|
||||
}
|
||||
tmp := algo
|
||||
context.user_ptr = &tmp
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil)
|
||||
|
||||
time_per_iter := options.duration / ITERS
|
||||
table.aligned_row_of_values(
|
||||
&tbl,
|
||||
.Right,
|
||||
algo_name,
|
||||
table.format(&tbl, "%d", sz),
|
||||
table.format(&tbl, "%8M", time_per_iter),
|
||||
table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
log_table(&tbl)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
do_bench_hash :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
digest_: [hash.MAX_DIGEST_SIZE]byte
|
||||
|
||||
buf := options.input
|
||||
algo := (^hash.Algorithm)(context.user_ptr)^
|
||||
digest := digest_[:hash.DIGEST_SIZES[algo]]
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
hash.hash_bytes_to_buffer(algo, buf, digest)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * (options.bytes)
|
||||
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,191 @@
|
||||
package benchmark_core_crypto
|
||||
|
||||
import "base:runtime"
|
||||
import "core:testing"
|
||||
import "core:text/table"
|
||||
import "core:time"
|
||||
|
||||
import "core:crypto/hmac"
|
||||
import "core:crypto/kmac"
|
||||
import "core:crypto/poly1305"
|
||||
|
||||
@(private = "file")
|
||||
ITERS :: 10000
|
||||
@(private = "file")
|
||||
SIZES := []int{64, 1024, 65536}
|
||||
@(private = "file")
|
||||
KMAC_KEY_SIZES := []int{128, 256}
|
||||
|
||||
@(test)
|
||||
benchmark_crypto_mac :: proc(t: ^testing.T) {
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
|
||||
tbl: table.Table
|
||||
table.init(&tbl)
|
||||
defer table.destroy(&tbl)
|
||||
|
||||
table.caption(&tbl, "MAC")
|
||||
table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Time", "Throughput")
|
||||
|
||||
{
|
||||
for sz, _ in SIZES {
|
||||
options := &time.Benchmark_Options{
|
||||
rounds = ITERS,
|
||||
bytes = sz,
|
||||
setup = setup_sized_buf,
|
||||
bench = do_bench_hmac_sha_256,
|
||||
teardown = teardown_sized_buf,
|
||||
}
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil)
|
||||
|
||||
time_per_iter := options.duration / ITERS
|
||||
table.aligned_row_of_values(
|
||||
&tbl,
|
||||
.Right,
|
||||
"HMAC-SHA256",
|
||||
table.format(&tbl, "%d", sz),
|
||||
table.format(&tbl, "%8M", time_per_iter),
|
||||
table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
table.row(&tbl)
|
||||
|
||||
for key_sz, i in KMAC_KEY_SIZES {
|
||||
if i > 0 {
|
||||
table.row(&tbl)
|
||||
}
|
||||
|
||||
for sz, _ in SIZES {
|
||||
options := &time.Benchmark_Options{
|
||||
rounds = ITERS,
|
||||
bytes = sz,
|
||||
processed = key_sz, // Pls ignore.
|
||||
setup = setup_sized_buf,
|
||||
bench = do_bench_kmac,
|
||||
teardown = teardown_sized_buf,
|
||||
}
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil)
|
||||
|
||||
time_per_iter := options.duration / ITERS
|
||||
table.aligned_row_of_values(
|
||||
&tbl,
|
||||
.Right,
|
||||
table.format(&tbl, "KMAC%d", key_sz),
|
||||
table.format(&tbl, "%d", sz),
|
||||
table.format(&tbl, "%8M", time_per_iter),
|
||||
table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
table.row(&tbl)
|
||||
|
||||
{
|
||||
for sz, _ in SIZES {
|
||||
options := &time.Benchmark_Options{
|
||||
rounds = ITERS,
|
||||
bytes = sz,
|
||||
setup = setup_sized_buf,
|
||||
bench = do_bench_poly1305,
|
||||
teardown = teardown_sized_buf,
|
||||
}
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil)
|
||||
|
||||
time_per_iter := options.duration / ITERS
|
||||
table.aligned_row_of_values(
|
||||
&tbl,
|
||||
.Right,
|
||||
"poly1305",
|
||||
table.format(&tbl, "%d", sz),
|
||||
table.format(&tbl, "%8M", time_per_iter),
|
||||
table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
log_table(&tbl)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
do_bench_hmac_sha_256 :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
key := [32]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
|
||||
tag: [32]byte = ---
|
||||
for _ in 0 ..= options.rounds {
|
||||
hmac.sum(.SHA256, tag[:], buf, key[:])
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
do_bench_kmac :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
key := [kmac.MIN_KEY_SIZE_256]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
sec_strength := options.processed
|
||||
|
||||
tag: [32]byte = ---
|
||||
for _ in 0 ..= options.rounds {
|
||||
kmac.sum(sec_strength, tag[:sec_strength/8], buf, key[:], nil)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
do_bench_poly1305 :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
key := [poly1305.KEY_SIZE]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
|
||||
tag: [poly1305.TAG_SIZE]byte = ---
|
||||
for _ in 0 ..= options.rounds {
|
||||
poly1305.sum(tag[:], buf, key[:])
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
package benchmark_core_crypto
|
||||
|
||||
import "base:runtime"
|
||||
import "core:crypto"
|
||||
import "core:testing"
|
||||
import "core:text/table"
|
||||
import "core:time"
|
||||
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/chacha20"
|
||||
|
||||
@(private = "file")
|
||||
ITERS :: 10000
|
||||
@(private = "file")
|
||||
SIZES := []int{64, 1024, 65536}
|
||||
@(private = "file")
|
||||
AES_CTR_KEY_SIZES := []int{128, 192, 256}
|
||||
|
||||
@(test)
|
||||
benchmark_crypto_stream :: proc(t: ^testing.T) {
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
|
||||
tbl: table.Table
|
||||
table.init(&tbl)
|
||||
defer table.destroy(&tbl)
|
||||
|
||||
table.caption(&tbl, "Stream Cipher")
|
||||
table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Time", "Throughput")
|
||||
|
||||
for key_sz, i in AES_CTR_KEY_SIZES {
|
||||
if i > 0 {
|
||||
table.row(&tbl)
|
||||
}
|
||||
|
||||
key := make([]byte, key_sz/8, context.temp_allocator)
|
||||
iv := make([]byte, aes.CTR_IV_SIZE, context.temp_allocator)
|
||||
crypto.rand_bytes(key)
|
||||
crypto.rand_bytes(iv)
|
||||
|
||||
ctx: aes.Context_CTR
|
||||
aes.init_ctr(&ctx, key, iv)
|
||||
|
||||
for sz, _ in SIZES {
|
||||
options := &time.Benchmark_Options{
|
||||
rounds = ITERS,
|
||||
bytes = sz,
|
||||
setup = setup_sized_buf,
|
||||
bench = do_bench_aes_ctr,
|
||||
teardown = teardown_sized_buf,
|
||||
}
|
||||
context.user_ptr = &ctx
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil)
|
||||
|
||||
time_per_iter := options.duration / ITERS
|
||||
table.aligned_row_of_values(
|
||||
&tbl,
|
||||
.Right,
|
||||
table.format(&tbl, "AES%d-CTR", key_sz),
|
||||
table.format(&tbl, "%d", sz),
|
||||
table.format(&tbl, "%8M", time_per_iter),
|
||||
table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
table.row(&tbl)
|
||||
|
||||
{
|
||||
key := make([]byte, chacha20.KEY_SIZE, context.temp_allocator)
|
||||
iv := make([]byte, chacha20.IV_SIZE, context.temp_allocator)
|
||||
crypto.rand_bytes(key)
|
||||
crypto.rand_bytes(iv)
|
||||
|
||||
ctx: chacha20.Context
|
||||
chacha20.init(&ctx, key, iv)
|
||||
|
||||
for sz, _ in SIZES {
|
||||
options := &time.Benchmark_Options{
|
||||
rounds = ITERS,
|
||||
bytes = sz,
|
||||
setup = setup_sized_buf,
|
||||
bench = do_bench_chacha20,
|
||||
teardown = teardown_sized_buf,
|
||||
}
|
||||
context.user_ptr = &ctx
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil)
|
||||
|
||||
time_per_iter := options.duration / ITERS
|
||||
table.aligned_row_of_values(
|
||||
&tbl,
|
||||
.Right,
|
||||
"chacha20",
|
||||
table.format(&tbl, "%d", sz),
|
||||
table.format(&tbl, "%8M", time_per_iter),
|
||||
table.format(&tbl, "%5.3f MiB/s", options.megabytes_per_second),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
log_table(&tbl)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
do_bench_aes_ctr :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
ctx := (^aes.Context_CTR)(context.user_ptr)
|
||||
|
||||
buf := options.input
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
aes.xor_bytes_ctr(ctx, buf, buf)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
do_bench_chacha20 :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
ctx := (^chacha20.Context)(context.user_ptr)
|
||||
|
||||
buf := options.input
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
chacha20.xor_bytes(ctx, buf, buf)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
package benchmark_core_crypto
|
||||
|
||||
import "core:crypto"
|
||||
import "core:fmt"
|
||||
import "core:log"
|
||||
import "core:strings"
|
||||
import "core:text/table"
|
||||
import "core:time"
|
||||
|
||||
@(private)
|
||||
log_table :: #force_inline proc(tbl: ^table.Table) {
|
||||
sb := strings.builder_make()
|
||||
defer strings.builder_destroy(&sb)
|
||||
|
||||
wr := strings.to_writer(&sb)
|
||||
|
||||
fmt.sbprintln(&sb)
|
||||
table.write_plain_table(wr, tbl)
|
||||
|
||||
log.info(strings.to_string(sb))
|
||||
}
|
||||
|
||||
@(private)
|
||||
setup_sized_buf :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
assert(options != nil)
|
||||
|
||||
options.input = make([]u8, options.bytes, allocator)
|
||||
if len(options.input) > 0 {
|
||||
crypto.rand_bytes(options.input)
|
||||
}
|
||||
return nil if len(options.input) == options.bytes else .Allocation_Error
|
||||
}
|
||||
|
||||
@(private)
|
||||
teardown_sized_buf :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
assert(options != nil)
|
||||
|
||||
delete(options.input)
|
||||
return nil
|
||||
}
|
||||
@@ -1,7 +1,10 @@
|
||||
package test_core_crypto
|
||||
|
||||
import "base:runtime"
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/aegis"
|
||||
import "core:crypto/aead"
|
||||
import "core:crypto/deoxysii"
|
||||
import "core:encoding/hex"
|
||||
import "core:testing"
|
||||
|
||||
@@ -17,6 +20,14 @@ test_aead :: proc(t: ^testing.T) {
|
||||
for impl in supported_chacha_impls() {
|
||||
append(&chacha_impls, impl)
|
||||
}
|
||||
aegis_impls := make([dynamic]aead.Implementation, context.temp_allocator)
|
||||
for impl in supported_aegis_impls() {
|
||||
append(&aegis_impls, impl)
|
||||
}
|
||||
deoxysii_impls := make([dynamic]aead.Implementation, context.temp_allocator)
|
||||
for impl in supported_deoxysii_impls() {
|
||||
append(&deoxysii_impls, impl)
|
||||
}
|
||||
impls := [aead.Algorithm][dynamic]aead.Implementation{
|
||||
.Invalid = nil,
|
||||
.AES_GCM_128 = aes_impls,
|
||||
@@ -24,6 +35,11 @@ test_aead :: proc(t: ^testing.T) {
|
||||
.AES_GCM_256 = aes_impls,
|
||||
.CHACHA20POLY1305 = chacha_impls,
|
||||
.XCHACHA20POLY1305 = chacha_impls,
|
||||
.AEGIS_128L = aegis_impls,
|
||||
.AEGIS_128L_256 = aegis_impls,
|
||||
.AEGIS_256 = aegis_impls,
|
||||
.AEGIS_256_256 = aegis_impls,
|
||||
.DEOXYS_II_256 = deoxysii_impls,
|
||||
}
|
||||
|
||||
test_vectors := []struct{
|
||||
@@ -224,6 +240,263 @@ test_aead :: proc(t: ^testing.T) {
|
||||
"bd6d179d3e83d43b9576579493c0e939572a1700252bfaccbed2902c21396cbb731c7f1b0b4aa6440bf3a82f4eda7e39ae64c6708c54c216cb96b72e1213b4522f8c9ba40db5d945b11b69b982c1bb9e3f3fac2bc369488f76b2383565d3fff921f9664c97637da9768812f615c68b13b52e",
|
||||
"c0875924c1c7987947deafd8780acf49",
|
||||
},
|
||||
// AEGIS-128L
|
||||
// https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-11.txt
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"",
|
||||
"00000000000000000000000000000000",
|
||||
"c1c0e58bd913006feba00f4b3cc3594e",
|
||||
"abe0ece80c24868a226a35d16bdae37a",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"",
|
||||
"00000000000000000000000000000000",
|
||||
"c1c0e58bd913006feba00f4b3cc3594e",
|
||||
"25835bfbb21632176cf03840687cb968cace4617af1bd0f7d064c639a5c79ee4",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"c2b879a67def9d74e6c14f708bbcc9b4",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"1360dc9db8ae42455f6e5b6a9d488ea4f2184c4e12120249335c4ee84bafe25d",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"79d94593d8c2119d7e8fd9b8fc77845c5c077a05b2528b6ac54b563aed8efe84",
|
||||
"cc6f3372f6aa1bb82388d695c3962d9a",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"79d94593d8c2119d7e8fd9b8fc77845c5c077a05b2528b6ac54b563aed8efe84",
|
||||
"022cb796fe7e0ae1197525ff67e309484cfbab6528ddef89f17d74ef8ecd82b3",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d",
|
||||
"79d94593d8c2119d7e8fd9b8fc77",
|
||||
"5c04b3dba849b2701effbe32c7f0fab7",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d",
|
||||
"79d94593d8c2119d7e8fd9b8fc77",
|
||||
"86f1b80bfb463aba711d15405d094baf4a55a15dbfec81a76f35ed0b9c8b04ac",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829",
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637",
|
||||
"b31052ad1cca4e291abcf2df3502e6bdb1bfd6db36798be3607b1f94d34478aa7ede7f7a990fec10",
|
||||
"7542a745733014f9474417b337399507",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829",
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637",
|
||||
"b31052ad1cca4e291abcf2df3502e6bdb1bfd6db36798be3607b1f94d34478aa7ede7f7a990fec10",
|
||||
"b91e2947a33da8bee89b6794e647baf0fc835ff574aca3fc27c33be0db2aff98",
|
||||
},
|
||||
// AEGIS-256
|
||||
// https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-11.txt
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"",
|
||||
"00000000000000000000000000000000",
|
||||
"754fc3d8c973246dcc6d741412a4b236",
|
||||
"3fe91994768b332ed7f570a19ec5896e",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"",
|
||||
"00000000000000000000000000000000",
|
||||
"754fc3d8c973246dcc6d741412a4b236",
|
||||
"1181a1d18091082bf0266f66297d167d2e68b845f61a3b0527d31fc7b7b89f13",
|
||||
},
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"e3def978a0f054afd1e761d7553afba3",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"6a348c930adbd654896e1666aad67de989ea75ebaa2b82fb588977b1ffec864a",
|
||||
},
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"f373079ed84b2709faee373584585d60accd191db310ef5d8b11833df9dec711",
|
||||
"8d86f91ee606e9ff26a01b64ccbdd91d",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"f373079ed84b2709faee373584585d60accd191db310ef5d8b11833df9dec711",
|
||||
"b7d28d0c3c0ebd409fd22b44160503073a547412da0854bfb9723020dab8da1a",
|
||||
},
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d",
|
||||
"f373079ed84b2709faee37358458",
|
||||
"c60b9c2d33ceb058f96e6dd03c215652",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d",
|
||||
"f373079ed84b2709faee37358458",
|
||||
"8c1cc703c81281bee3f6d9966e14948b4a175b2efbdc31e61a98b4465235c2d9",
|
||||
},
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829",
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637",
|
||||
"57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67",
|
||||
"ab8a7d53fd0e98d727accca94925e128",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829",
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637",
|
||||
"57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67",
|
||||
"a3aca270c006094d71c20e6910b5161c0826df233d08919a566ec2c05990f734",
|
||||
},
|
||||
// Deoxys-II-256
|
||||
{
|
||||
.DEOXYS_II_256,
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f",
|
||||
"202122232425262728292a2b2c2d2e",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"2b97bd77712f0cde975309959dfe1d7c",
|
||||
},
|
||||
{
|
||||
.DEOXYS_II_256,
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f",
|
||||
"202122232425262728292a2b2c2d2e",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"",
|
||||
"",
|
||||
"54708ae5565a71f147bdb94d7ba3aed7",
|
||||
},
|
||||
{
|
||||
.DEOXYS_II_256,
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f",
|
||||
"202122232425262728292a2b2c2d2e",
|
||||
"f495c9c03d29989695d98ff5d430650125805c1e0576d06f26cbda42b1f82238b8",
|
||||
"",
|
||||
"",
|
||||
"3277689dc4208cc1ff59d15434a1baf1",
|
||||
},
|
||||
{
|
||||
.DEOXYS_II_256,
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f",
|
||||
"202122232425262728292a2b2c2d2e",
|
||||
"",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"9da20db1c2781f6669257d87e2a4d9be1970f7581bef2c995e1149331e5e8cc1",
|
||||
"92ce3aec3a4b72ff9eab71c2a93492fa",
|
||||
},
|
||||
{
|
||||
.DEOXYS_II_256,
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f",
|
||||
"202122232425262728292a2b2c2d2e",
|
||||
"",
|
||||
"15cd77732f9d0c4c6e581ef400876ad9188c5b8850ebd38224da95d7cdc99f7acc",
|
||||
"e5ffd2abc5b459a73667756eda6443ede86c0883fc51dd75d22bb14992c684618c",
|
||||
"5fa78d57308f19d0252072ee39df5ecc",
|
||||
},
|
||||
{
|
||||
.DEOXYS_II_256,
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f",
|
||||
"202122232425262728292a2b2c2d2e",
|
||||
"000102030405060708090a0b0c0d0e0f",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"109f8a168b36dfade02628a9e129d5257f03cc7912aefa79729b67b186a2b08f",
|
||||
"6549f9bf10acba0a451dbb2484a60d90",
|
||||
},
|
||||
{
|
||||
.DEOXYS_II_256,
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f",
|
||||
"202122232425262728292a2b2c2d2e",
|
||||
"000102030405060708090a0b0c0d0e0f10",
|
||||
"422857fb165af0a35c03199fb895604dca9cea6d788954962c419e0d5c225c0327",
|
||||
"7d772203fa38be296d8d20d805163130c69aba8cb16ed845c2296c61a8f34b394e",
|
||||
"0b3f10e3933c78190b24b33008bf80e9",
|
||||
},
|
||||
{
|
||||
.DEOXYS_II_256,
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f",
|
||||
"202122232425262728292a2b2c2d2e",
|
||||
"3290bb8441279dc6083a43e9048c3dc08966ab30d7a6b35759e7a13339f124918f3b5ab1affa65e6c0e3680eb33a6ec82424ab1ce5a40b8654e13d845c29b13896a1466a75fc875acba4527ded37ed00c600a357c9a6e586c74cf3d85cd3258c813218f319d12b82480e5124ff19ec00bda1fbb8bd25eeb3de9fcbf3296deba250caf7e9f4ef0be1918e24221dd0be888c59c166ad761d7b58462a1b1d44b04265b45827172c133dd5b6c870b9af7b21368d12a88f4efa1751047543d584382d9ec22e7550d50ecddba27d1f65453f1f3398de54ee8c1f4ac8e16f5523d89641e99a632380af0f0b1e6b0e192ec29bf1d8714978ff9fbfb93604142393e9a82c3aaebbbe15e3b4e5cfd18bdfe309315c9f9f830deebe2edcdc24f8eca90fda49f6646e789c5041fb5be933fa843278e95f3a54f8eb41f14777ea949d5ea442b01249e64816151a325769e264ed4acd5c3f21700ca755d5bc0c2c5f9453419510bc74f2d71621dcecb9efc9c24791b4bb560fb70a8231521d6560af89d8d50144d9c080863f043781153bcd59030e60bd17a6d7aa083211b67b581fa4f74cce4d030d1e8f9429fd725c110040d41eb6989ffb1595c72cbe3c9b78a8ab80d71a6a5283da77b89cae295bb13c14fbe466b617f4da8ad60b085e2ea153f6713ae0046aa31e0ba44e43ef36a111bf05c073a4e3624cd35f63a546f9142b35aa81b8826d",
|
||||
"83dab23b1379e090755c99079cfe918cb737e989f2d720ccaff493a744927644fec3653211fa75306a83486e5c34ecfe63870c97251a73e4b9033ae374809711b211ed5d293a592e466a81170f1d85750b5ca025ccd4579947edbae9ec132bfb1a7233ad79fae30006a6699f143893861b975226ed9d3cfb8a240be232fbf4e83755d59d20bc2faa2ea5e5b0428427485cca5e76a89fe32bdd59ab4177ad7cb1899c101e3c4f7535129591390ebdf30140846078b13867bbb2efd6cf434afe356eb18d716b21fd664c26c908496534bf2cde6d6b897799016594fb6d9f830ae5f44ccec26d42ff0d1a21b80cdbe8c8c170a5f766fad884abcc781b5b8ebc0f559bfeaa4557b04d977d51411a7f47bf437d0280cf9f92bc4f9cd6226337a492320851955adae2cafea22a89c3132dd252e4728328eda05555dff3241404341b8aa502d45c456113af42a8e91a85e4b4e9555028982ec3d144722af0eb04a6d3b8127c3040629de53f5fd187048198e8f8e8cc857afcbae45c693fec12fc2149d5e7587d0121b1717d0147f6979f75e8f085293f705c3399a6cc8df7057bf481e6c374edf0a0af7479f858045357b7fe21021c3fabdaf012652bf2e5db257bd9490ce637a81477bd3f9814a2198fdb9afa9344321f2393798670e588c47a1924d592cda3eb5a96754dfd92d87ee1ffa9d4ee586c85d7518c5d2db57d0451c33de0",
|
||||
"88294fcef65a1bdfd7baaa472816c64ef5bef2622b88c1ec5a739396157ef4935f3aa76449e391c32da28ee2857f399ac3dd95aed30cfb26cc0063cd4cd8f7431108176fbf370123856662b000a8348e5925fbb97c9ec0c737758330a7983f06b51590c1d2f5e5faaf0eb58e34e19e5fc85cec03d3926dd46a79ba7026e83dec24e07484c9103dd0cdb0edb505500caca5e1d5dbc71348cf00648821488ebaab7f9d84bbbf91b3c521dbef30110e7bd94f8dad5ab8e0cc5411ca9682d210d5d80c0c4bdbba8181789a4273d6deb80899fdcd976ca6f3a9770b54305f586a04256cfbeb4c11254e88559f294db3b9a94b80ab9f9a02cb4c0748de0af7818685521691dba5738be546dba13a56016fb8635af9dff50f25d1b17ad21707db2640a76a741e65e559b2afaaec0f37e18436bf02008f84dbd7b2698687a22376b65dc7524fca8a28709eee3f3caee3b28ed1173d1e08ee849e2ca63d2c90d555755c8fbafd5d2f4b37f06a1dbd6852ee2ffcfe79d510152e98fc4f3094f740a4aede9ee378b606d34576776bf5f1269f5385a84b3928433bfca177550ccfcd22cd0331bbc595e38c2758b2662476fa66354c4e84c7b360405aa3f5b2a48621bdca1a90c69b21789c91b5b8c568e3c741d99e22f6d7e26f2abed045f1d578b782ab4a5cf2af636d842b3012e180e4b045d8d15b057b69c92398a517053daf9be7c2935e",
|
||||
"a616f0c218e18b526cf2a3f8c115e262",
|
||||
},
|
||||
}
|
||||
for v, _ in test_vectors {
|
||||
algo_name := aead.ALGORITHM_NAMES[v.algo]
|
||||
@@ -337,3 +610,23 @@ test_aead :: proc(t: ^testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
supported_aegis_impls :: proc() -> [dynamic]aes.Implementation {
|
||||
impls := make([dynamic]aes.Implementation, 0, 2, context.temp_allocator)
|
||||
append(&impls, aes.Implementation.Portable)
|
||||
if aegis.is_hardware_accelerated() {
|
||||
append(&impls, aes.Implementation.Hardware)
|
||||
}
|
||||
|
||||
return impls
|
||||
}
|
||||
|
||||
supported_deoxysii_impls :: proc() -> [dynamic]aes.Implementation {
|
||||
impls := make([dynamic]aes.Implementation, 0, 2, context.temp_allocator)
|
||||
append(&impls, aes.Implementation.Portable)
|
||||
if deoxysii.is_hardware_accelerated() {
|
||||
append(&impls, aes.Implementation.Hardware)
|
||||
}
|
||||
|
||||
return impls
|
||||
}
|
||||
|
||||
+63
@@ -7,6 +7,7 @@ import field "core:crypto/_fiat/field_curve25519"
|
||||
import "core:crypto/ed25519"
|
||||
import "core:crypto/ristretto255"
|
||||
import "core:crypto/x25519"
|
||||
import "core:crypto/x448"
|
||||
|
||||
@(test)
|
||||
test_sqrt_ratio_m1 :: proc(t: ^testing.T) {
|
||||
@@ -684,6 +685,68 @@ test_x25519 :: proc(t: ^testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
@(test)
|
||||
test_x448 :: proc(t: ^testing.T) {
|
||||
// Local copy of this so that the base point doesn't need to be exported.
|
||||
_BASE_POINT: [56]byte = {
|
||||
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
}
|
||||
|
||||
test_vectors := []struct {
|
||||
scalar: string,
|
||||
point: string,
|
||||
product: string,
|
||||
} {
|
||||
// Test vectors from RFC 7748
|
||||
{
|
||||
"3d262fddf9ec8e88495266fea19a34d28882acef045104d0d1aae121700a779c984c24f8cdd78fbff44943eba368f54b29259a4f1c600ad3",
|
||||
"06fce640fa3487bfda5f6cf2d5263f8aad88334cbd07437f020f08f9814dc031ddbdc38c19c6da2583fa5429db94ada18aa7a7fb4ef8a086",
|
||||
"ce3e4ff95a60dc6697da1db1d85e6afbdf79b50a2412d7546d5f239fe14fbaadeb445fc66a01b0779d98223961111e21766282f73dd96b6f",
|
||||
},
|
||||
{
|
||||
"203d494428b8399352665ddca42f9de8fef600908e0d461cb021f8c538345dd77c3e4806e25f46d3315c44e0a5b4371282dd2c8d5be3095f",
|
||||
"0fbcc2f993cd56d3305b0b7d9e55d4c1a8fb5dbb52f8e9a1e9b6201b165d015894e56c4d3570bee52fe205e28a78b91cdfbde71ce8d157db",
|
||||
"884a02576239ff7a2f2f63b2db6a9ff37047ac13568e1e30fe63c4a7ad1b3ee3a5700df34321d62077e63633c575c1c954514e99da7c179d",
|
||||
},
|
||||
}
|
||||
for v, _ in test_vectors {
|
||||
scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator)
|
||||
point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator)
|
||||
|
||||
derived_point: [x448.POINT_SIZE]byte
|
||||
x448.scalarmult(derived_point[:], scalar[:], point[:])
|
||||
derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator))
|
||||
|
||||
testing.expectf(
|
||||
t,
|
||||
derived_point_str == v.product,
|
||||
"Expected %s for %s * %s, but got %s instead",
|
||||
v.product,
|
||||
v.scalar,
|
||||
v.point,
|
||||
derived_point_str,
|
||||
)
|
||||
|
||||
// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
|
||||
p1, p2: [x448.POINT_SIZE]byte
|
||||
x448.scalarmult_basepoint(p1[:], scalar[:])
|
||||
x448.scalarmult(p2[:], scalar[:], _BASE_POINT[:])
|
||||
p1_str := string(hex.encode(p1[:], context.temp_allocator))
|
||||
p2_str := string(hex.encode(p2[:], context.temp_allocator))
|
||||
testing.expectf(
|
||||
t,
|
||||
p1_str == p2_str,
|
||||
"Expected %s for %s * basepoint, but got %s instead",
|
||||
p2_str,
|
||||
v.scalar,
|
||||
p1_str,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
ge_str :: proc(ge: ^ristretto255.Group_Element) -> string {
|
||||
b: [ristretto255.ELEMENT_SIZE]byte
|
||||
Reference in New Issue
Block a user