From 1a7a6a9116c7d9ed0e9ced208d0373ea62ad46c3 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sat, 6 Nov 2021 04:21:24 +0000 Subject: [PATCH] core/crypto: Add x25519 This package implements the X25519 key agreement scheme as specified in RFC 7748, using routines taken from fiat-crypto and Monocypher. --- core/crypto/_fiat/README.md | 35 + core/crypto/_fiat/fiat.odin | 24 + core/crypto/_fiat/field_curve25519/field.odin | 138 ++++ .../_fiat/field_curve25519/field51.odin | 616 ++++++++++++++++++ core/crypto/x25519/x25519.odin | 126 ++++ tests/core/crypto/test_core_crypto.odin | 5 + .../core/crypto/test_core_crypto_modern.odin | 95 +++ 7 files changed, 1039 insertions(+) create mode 100644 core/crypto/_fiat/README.md create mode 100644 core/crypto/_fiat/fiat.odin create mode 100644 core/crypto/_fiat/field_curve25519/field.odin create mode 100644 core/crypto/_fiat/field_curve25519/field51.odin create mode 100644 core/crypto/x25519/x25519.odin create mode 100644 tests/core/crypto/test_core_crypto_modern.odin diff --git a/core/crypto/_fiat/README.md b/core/crypto/_fiat/README.md new file mode 100644 index 000000000..cd510d442 --- /dev/null +++ b/core/crypto/_fiat/README.md @@ -0,0 +1,35 @@ +# fiat + +This package contains low level arithmetic required to implement certain +cryptographic primitives, ported from the [fiat-crypto project][1] +along with some higher-level helpers. + +## Notes + +fiat-crypto gives the choice of 3 licenses for derived works. The 1-Clause +BSD license is chosen as it is compatible with Odin's existing licensing. + +The routines are intended to be timing-safe, as long as the underlying +integer arithmetic is constant time. This is true on most systems commonly +used today, with the notable exception of WASM. + +While fiat-crypto provides both output targeting both 32-bit and 64-bit +architectures, only the 64-bit versions were used, as 32-bit architectures +are becoming increasingly uncommon and irrelevant. + +With the current Odin syntax, the Go output is trivially ported in most +cases and was used as the basis of the port. + +In the future, it would be better to auto-generate Odin either directly +by adding an appropriate code-gen backend written in Coq, or perhaps by +parsing the JSON output. + +As this is a port rather than autogenerated output, none of fiat-crypto's +formal verification guarantees apply, unless it is possible to prove binary +equivalence. + +For the most part, alterations to the base fiat-crypto generated code was +kept to a minimum, to aid auditability. This results in a somewhat +ideosyncratic style, and in some cases minor performance penalties. + +[1]: https://github.com/mit-plv/fiat-crypto diff --git a/core/crypto/_fiat/fiat.odin b/core/crypto/_fiat/fiat.odin new file mode 100644 index 000000000..ae9727149 --- /dev/null +++ b/core/crypto/_fiat/fiat.odin @@ -0,0 +1,24 @@ +package fiat + +// This package provides various helpers and types common to all of the +// fiat-crypto derived backends. + +// This code only works on a two's complement system. +#assert((-1 & 3) == 3) + +u1 :: distinct u8 +i1 :: distinct i8 + +cmovznz_u64 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) { + x1 := (u64(arg1) * 0xffffffffffffffff) + x2 := ((x1 & arg3) | ((~x1) & arg2)) + out1 = x2 + return +} + +cmovznz_u32 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) { + x1 := (u32(arg1) * 0xffffffff) + x2 := ((x1 & arg3) | ((~x1) & arg2)) + out1 = x2 + return +} diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin new file mode 100644 index 000000000..faf8ae3f7 --- /dev/null +++ b/core/crypto/_fiat/field_curve25519/field.odin @@ -0,0 +1,138 @@ +package field_curve25519 + +import "core:crypto" +import "core:mem" + +fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element { + return transmute(^Loose_Field_Element)(arg1) +} + +fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element { + return transmute(^Tight_Field_Element)(arg1) +} + +fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) { + // Ignore the unused bit by copying the input and masking the bit off + // prior to deserialization. + tmp1: [32]byte = --- + copy_slice(tmp1[:], arg1[:]) + tmp1[31] &= 127 + + _fe_from_bytes(out1, &tmp1) + + mem.zero_explicit(&tmp1, size_of(tmp1)) +} + +fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int { + tmp2: [32]byte = --- + + fe_to_bytes(&tmp2, arg2) + ret := fe_equal_bytes(arg1, &tmp2) + + mem.zero_explicit(&tmp2, size_of(tmp2)) + + return ret +} + +fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byte) -> int { + tmp1: [32]byte = --- + + fe_to_bytes(&tmp1, arg1) + + ret := crypto.compare_constant_time(tmp1[:], arg2[:]) + + mem.zero_explicit(&tmp1, size_of(tmp1)) + + return ret +} + +fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) { + // Special case: `arg1^(2 * 0) = 1`, though this should never happen. + if arg2 == 0 { + fe_one(out1) + return + } + + fe_carry_square(out1, arg1) + for _ in 1.. int { + // Inverse square root taken from Monocypher. + + tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, --- + + // t0 = x^((p-5)/8) + // Can be achieved with a simple double & add ladder, + // but it would be slower. + fe_carry_pow2k(&tmp1, arg1, 1) + fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2) + fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2)) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2)) + fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) + fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 5) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) + fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 10) + fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) + fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 20) + fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2)) + fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 10) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) + fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 50) + fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) + fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 100) + fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2)) + fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) + fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) + + // quartic = x^((p-1)/4) + quartic := &tmp2 + fe_carry_square(quartic, fe_relax_cast(&tmp1)) + fe_carry_mul(quartic, fe_relax_cast(quartic), arg1) + + // Serialize quartic once to save on repeated serialization/sanitization. + quartic_buf: [32]byte = --- + fe_to_bytes(&quartic_buf, quartic) + check := &tmp3 + + fe_one(check) + p1 := fe_equal_bytes(check, &quartic_buf) + fe_carry_opp(check, check) + m1 := fe_equal_bytes(check, &quartic_buf) + fe_carry_opp(check, &SQRT_M1) + ms := fe_equal_bytes(check, &quartic_buf) + + // if quartic == -1 or sqrt(-1) + // then isr = x^((p-1)/4) * sqrt(-1) + // else isr = x^((p-1)/4) + fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1)) + fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1) + + mem.zero_explicit(&tmp1, size_of(tmp1)) + mem.zero_explicit(&tmp2, size_of(tmp2)) + mem.zero_explicit(&tmp3, size_of(tmp3)) + mem.zero_explicit(&quartic_buf, size_of(quartic_buf)) + + return p1 | m1 +} + +fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { + tmp1: Tight_Field_Element + + fe_carry_square(&tmp1, arg1) + _ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1)) + fe_carry_square(&tmp1, fe_relax_cast(&tmp1)) + fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1) + + mem.zero_explicit(&tmp1, size_of(tmp1)) +} diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin new file mode 100644 index 000000000..e4ca98b57 --- /dev/null +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -0,0 +1,616 @@ +// The BSD 1-Clause License (BSD-1-Clause) +// +// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file) +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design, +// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package field_curve25519 + +// The file provides arithmetic on the field Z/(2^255-19) using +// unsaturated 64-bit integer arithmetic. It is derived primarily +// from the machine generated Golang output from the fiat-crypto project. +// +// While the base implementation is provably correct, this implementation +// makes no such claims as the port and optimizations were done by hand. +// At some point, it may be worth adding support to fiat-crypto for +// generating Odin output. +// +// TODO: +// * When fiat-crypto supports it, using a saturated 64-bit limbs +// instead of 51-bit limbs will be faster, though the gains are +// minimal unless adcx/adox/mulx are used. + +import fiat "core:crypto/_fiat" +import "core:math/bits" + +Loose_Field_Element :: distinct [5]u64 +Tight_Field_Element :: distinct [5]u64 + +SQRT_M1 := Tight_Field_Element{ + 1718705420411056, + 234908883556509, + 2233514472574048, + 2117202627021982, + 765476049583133, +} + +_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) { + x1 := ((u64(arg1) + arg2) + arg3) + x2 := (x1 & 0x7ffffffffffff) + x3 := fiat.u1((x1 >> 51)) + out1 = x2 + out2 = x3 + return +} + +_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) { + x1 := ((i64(arg2) - i64(arg1)) - i64(arg3)) + x2 := fiat.i1((x1 >> 51)) + x3 := (u64(x1) & 0x7ffffffffffff) + out1 = x3 + out2 = (0x0 - fiat.u1(x2)) + return +} + +fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { + x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13)) + x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13)) + x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13)) + x8, x7 := bits.mul_u64(arg1[4], (arg2[1] * 0x13)) + x10, x9 := bits.mul_u64(arg1[3], (arg2[4] * 0x13)) + x12, x11 := bits.mul_u64(arg1[3], (arg2[3] * 0x13)) + x14, x13 := bits.mul_u64(arg1[3], (arg2[2] * 0x13)) + x16, x15 := bits.mul_u64(arg1[2], (arg2[4] * 0x13)) + x18, x17 := bits.mul_u64(arg1[2], (arg2[3] * 0x13)) + x20, x19 := bits.mul_u64(arg1[1], (arg2[4] * 0x13)) + x22, x21 := bits.mul_u64(arg1[4], arg2[0]) + x24, x23 := bits.mul_u64(arg1[3], arg2[1]) + x26, x25 := bits.mul_u64(arg1[3], arg2[0]) + x28, x27 := bits.mul_u64(arg1[2], arg2[2]) + x30, x29 := bits.mul_u64(arg1[2], arg2[1]) + x32, x31 := bits.mul_u64(arg1[2], arg2[0]) + x34, x33 := bits.mul_u64(arg1[1], arg2[3]) + x36, x35 := bits.mul_u64(arg1[1], arg2[2]) + x38, x37 := bits.mul_u64(arg1[1], arg2[1]) + x40, x39 := bits.mul_u64(arg1[1], arg2[0]) + x42, x41 := bits.mul_u64(arg1[0], arg2[4]) + x44, x43 := bits.mul_u64(arg1[0], arg2[3]) + x46, x45 := bits.mul_u64(arg1[0], arg2[2]) + x48, x47 := bits.mul_u64(arg1[0], arg2[1]) + x50, x49 := bits.mul_u64(arg1[0], arg2[0]) + x51, x52 := bits.add_u64(x13, x7, u64(0x0)) + x53, _ := bits.add_u64(x14, x8, u64(fiat.u1(x52))) + x55, x56 := bits.add_u64(x17, x51, u64(0x0)) + x57, _ := bits.add_u64(x18, x53, u64(fiat.u1(x56))) + x59, x60 := bits.add_u64(x19, x55, u64(0x0)) + x61, _ := bits.add_u64(x20, x57, u64(fiat.u1(x60))) + x63, x64 := bits.add_u64(x49, x59, u64(0x0)) + x65, _ := bits.add_u64(x50, x61, u64(fiat.u1(x64))) + x67 := ((x63 >> 51) | ((x65 << 13) & 0xffffffffffffffff)) + x68 := (x63 & 0x7ffffffffffff) + x69, x70 := bits.add_u64(x23, x21, u64(0x0)) + x71, _ := bits.add_u64(x24, x22, u64(fiat.u1(x70))) + x73, x74 := bits.add_u64(x27, x69, u64(0x0)) + x75, _ := bits.add_u64(x28, x71, u64(fiat.u1(x74))) + x77, x78 := bits.add_u64(x33, x73, u64(0x0)) + x79, _ := bits.add_u64(x34, x75, u64(fiat.u1(x78))) + x81, x82 := bits.add_u64(x41, x77, u64(0x0)) + x83, _ := bits.add_u64(x42, x79, u64(fiat.u1(x82))) + x85, x86 := bits.add_u64(x25, x1, u64(0x0)) + x87, _ := bits.add_u64(x26, x2, u64(fiat.u1(x86))) + x89, x90 := bits.add_u64(x29, x85, u64(0x0)) + x91, _ := bits.add_u64(x30, x87, u64(fiat.u1(x90))) + x93, x94 := bits.add_u64(x35, x89, u64(0x0)) + x95, _ := bits.add_u64(x36, x91, u64(fiat.u1(x94))) + x97, x98 := bits.add_u64(x43, x93, u64(0x0)) + x99, _ := bits.add_u64(x44, x95, u64(fiat.u1(x98))) + x101, x102 := bits.add_u64(x9, x3, u64(0x0)) + x103, _ := bits.add_u64(x10, x4, u64(fiat.u1(x102))) + x105, x106 := bits.add_u64(x31, x101, u64(0x0)) + x107, _ := bits.add_u64(x32, x103, u64(fiat.u1(x106))) + x109, x110 := bits.add_u64(x37, x105, u64(0x0)) + x111, _ := bits.add_u64(x38, x107, u64(fiat.u1(x110))) + x113, x114 := bits.add_u64(x45, x109, u64(0x0)) + x115, _ := bits.add_u64(x46, x111, u64(fiat.u1(x114))) + x117, x118 := bits.add_u64(x11, x5, u64(0x0)) + x119, _ := bits.add_u64(x12, x6, u64(fiat.u1(x118))) + x121, x122 := bits.add_u64(x15, x117, u64(0x0)) + x123, _ := bits.add_u64(x16, x119, u64(fiat.u1(x122))) + x125, x126 := bits.add_u64(x39, x121, u64(0x0)) + x127, _ := bits.add_u64(x40, x123, u64(fiat.u1(x126))) + x129, x130 := bits.add_u64(x47, x125, u64(0x0)) + x131, _ := bits.add_u64(x48, x127, u64(fiat.u1(x130))) + x133, x134 := bits.add_u64(x67, x129, u64(0x0)) + x135 := (u64(fiat.u1(x134)) + x131) + x136 := ((x133 >> 51) | ((x135 << 13) & 0xffffffffffffffff)) + x137 := (x133 & 0x7ffffffffffff) + x138, x139 := bits.add_u64(x136, x113, u64(0x0)) + x140 := (u64(fiat.u1(x139)) + x115) + x141 := ((x138 >> 51) | ((x140 << 13) & 0xffffffffffffffff)) + x142 := (x138 & 0x7ffffffffffff) + x143, x144 := bits.add_u64(x141, x97, u64(0x0)) + x145 := (u64(fiat.u1(x144)) + x99) + x146 := ((x143 >> 51) | ((x145 << 13) & 0xffffffffffffffff)) + x147 := (x143 & 0x7ffffffffffff) + x148, x149 := bits.add_u64(x146, x81, u64(0x0)) + x150 := (u64(fiat.u1(x149)) + x83) + x151 := ((x148 >> 51) | ((x150 << 13) & 0xffffffffffffffff)) + x152 := (x148 & 0x7ffffffffffff) + x153 := (x151 * 0x13) + x154 := (x68 + x153) + x155 := (x154 >> 51) + x156 := (x154 & 0x7ffffffffffff) + x157 := (x155 + x137) + x158 := fiat.u1((x157 >> 51)) + x159 := (x157 & 0x7ffffffffffff) + x160 := (u64(x158) + x142) + out1[0] = x156 + out1[1] = x159 + out1[2] = x160 + out1[3] = x147 + out1[4] = x152 +} + +fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { + x1 := (arg1[4] * 0x13) + x2 := (x1 * 0x2) + x3 := (arg1[4] * 0x2) + x4 := (arg1[3] * 0x13) + x5 := (x4 * 0x2) + x6 := (arg1[3] * 0x2) + x7 := (arg1[2] * 0x2) + x8 := (arg1[1] * 0x2) + x10, x9 := bits.mul_u64(arg1[4], x1) + x12, x11 := bits.mul_u64(arg1[3], x2) + x14, x13 := bits.mul_u64(arg1[3], x4) + x16, x15 := bits.mul_u64(arg1[2], x2) + x18, x17 := bits.mul_u64(arg1[2], x5) + x20, x19 := bits.mul_u64(arg1[2], arg1[2]) + x22, x21 := bits.mul_u64(arg1[1], x2) + x24, x23 := bits.mul_u64(arg1[1], x6) + x26, x25 := bits.mul_u64(arg1[1], x7) + x28, x27 := bits.mul_u64(arg1[1], arg1[1]) + x30, x29 := bits.mul_u64(arg1[0], x3) + x32, x31 := bits.mul_u64(arg1[0], x6) + x34, x33 := bits.mul_u64(arg1[0], x7) + x36, x35 := bits.mul_u64(arg1[0], x8) + x38, x37 := bits.mul_u64(arg1[0], arg1[0]) + x39, x40 := bits.add_u64(x21, x17, u64(0x0)) + x41, _ := bits.add_u64(x22, x18, u64(fiat.u1(x40))) + x43, x44 := bits.add_u64(x37, x39, u64(0x0)) + x45, _ := bits.add_u64(x38, x41, u64(fiat.u1(x44))) + x47 := ((x43 >> 51) | ((x45 << 13) & 0xffffffffffffffff)) + x48 := (x43 & 0x7ffffffffffff) + x49, x50 := bits.add_u64(x23, x19, u64(0x0)) + x51, _ := bits.add_u64(x24, x20, u64(fiat.u1(x50))) + x53, x54 := bits.add_u64(x29, x49, u64(0x0)) + x55, _ := bits.add_u64(x30, x51, u64(fiat.u1(x54))) + x57, x58 := bits.add_u64(x25, x9, u64(0x0)) + x59, _ := bits.add_u64(x26, x10, u64(fiat.u1(x58))) + x61, x62 := bits.add_u64(x31, x57, u64(0x0)) + x63, _ := bits.add_u64(x32, x59, u64(fiat.u1(x62))) + x65, x66 := bits.add_u64(x27, x11, u64(0x0)) + x67, _ := bits.add_u64(x28, x12, u64(fiat.u1(x66))) + x69, x70 := bits.add_u64(x33, x65, u64(0x0)) + x71, _ := bits.add_u64(x34, x67, u64(fiat.u1(x70))) + x73, x74 := bits.add_u64(x15, x13, u64(0x0)) + x75, _ := bits.add_u64(x16, x14, u64(fiat.u1(x74))) + x77, x78 := bits.add_u64(x35, x73, u64(0x0)) + x79, _ := bits.add_u64(x36, x75, u64(fiat.u1(x78))) + x81, x82 := bits.add_u64(x47, x77, u64(0x0)) + x83 := (u64(fiat.u1(x82)) + x79) + x84 := ((x81 >> 51) | ((x83 << 13) & 0xffffffffffffffff)) + x85 := (x81 & 0x7ffffffffffff) + x86, x87 := bits.add_u64(x84, x69, u64(0x0)) + x88 := (u64(fiat.u1(x87)) + x71) + x89 := ((x86 >> 51) | ((x88 << 13) & 0xffffffffffffffff)) + x90 := (x86 & 0x7ffffffffffff) + x91, x92 := bits.add_u64(x89, x61, u64(0x0)) + x93 := (u64(fiat.u1(x92)) + x63) + x94 := ((x91 >> 51) | ((x93 << 13) & 0xffffffffffffffff)) + x95 := (x91 & 0x7ffffffffffff) + x96, x97 := bits.add_u64(x94, x53, u64(0x0)) + x98 := (u64(fiat.u1(x97)) + x55) + x99 := ((x96 >> 51) | ((x98 << 13) & 0xffffffffffffffff)) + x100 := (x96 & 0x7ffffffffffff) + x101 := (x99 * 0x13) + x102 := (x48 + x101) + x103 := (x102 >> 51) + x104 := (x102 & 0x7ffffffffffff) + x105 := (x103 + x85) + x106 := fiat.u1((x105 >> 51)) + x107 := (x105 & 0x7ffffffffffff) + x108 := (u64(x106) + x90) + out1[0] = x104 + out1[1] = x107 + out1[2] = x108 + out1[3] = x95 + out1[4] = x100 +} + +fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { + x1 := arg1[0] + x2 := ((x1 >> 51) + arg1[1]) + x3 := ((x2 >> 51) + arg1[2]) + x4 := ((x3 >> 51) + arg1[3]) + x5 := ((x4 >> 51) + arg1[4]) + x6 := ((x1 & 0x7ffffffffffff) + ((x5 >> 51) * 0x13)) + x7 := (u64(fiat.u1((x6 >> 51))) + (x2 & 0x7ffffffffffff)) + x8 := (x6 & 0x7ffffffffffff) + x9 := (x7 & 0x7ffffffffffff) + x10 := (u64(fiat.u1((x7 >> 51))) + (x3 & 0x7ffffffffffff)) + x11 := (x4 & 0x7ffffffffffff) + x12 := (x5 & 0x7ffffffffffff) + out1[0] = x8 + out1[1] = x9 + out1[2] = x10 + out1[3] = x11 + out1[4] = x12 +} + +fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) { + x1 := (arg1[0] + arg2[0]) + x2 := (arg1[1] + arg2[1]) + x3 := (arg1[2] + arg2[2]) + x4 := (arg1[3] + arg2[3]) + x5 := (arg1[4] + arg2[4]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} + +fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) { + x1 := ((0xfffffffffffda + arg1[0]) - arg2[0]) + x2 := ((0xffffffffffffe + arg1[1]) - arg2[1]) + x3 := ((0xffffffffffffe + arg1[2]) - arg2[2]) + x4 := ((0xffffffffffffe + arg1[3]) - arg2[3]) + x5 := ((0xffffffffffffe + arg1[4]) - arg2[4]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} + +fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) { + x1 := (0xfffffffffffda - arg1[0]) + x2 := (0xffffffffffffe - arg1[1]) + x3 := (0xffffffffffffe - arg1[2]) + x4 := (0xffffffffffffe - arg1[3]) + x5 := (0xffffffffffffe - arg1[4]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} + +fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) { + x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0]) + x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1]) + x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2]) + x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3]) + x5 := fiat.cmovznz_u64(fiat.u1(arg2), out1[4], arg1[4]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} + +fe_to_bytes :: proc "contextless" (out1: ^[32]byte, arg1: ^Tight_Field_Element) { + x1, x2 := _subborrowx_u51(0x0, arg1[0], 0x7ffffffffffed) + x3, x4 := _subborrowx_u51(x2, arg1[1], 0x7ffffffffffff) + x5, x6 := _subborrowx_u51(x4, arg1[2], 0x7ffffffffffff) + x7, x8 := _subborrowx_u51(x6, arg1[3], 0x7ffffffffffff) + x9, x10 := _subborrowx_u51(x8, arg1[4], 0x7ffffffffffff) + x11 := fiat.cmovznz_u64(x10, u64(0x0), 0xffffffffffffffff) + x12, x13 := _addcarryx_u51(0x0, x1, (x11 & 0x7ffffffffffed)) + x14, x15 := _addcarryx_u51(x13, x3, (x11 & 0x7ffffffffffff)) + x16, x17 := _addcarryx_u51(x15, x5, (x11 & 0x7ffffffffffff)) + x18, x19 := _addcarryx_u51(x17, x7, (x11 & 0x7ffffffffffff)) + x20, _ := _addcarryx_u51(x19, x9, (x11 & 0x7ffffffffffff)) + x22 := (x20 << 4) + x23 := (x18 * u64(0x2)) + x24 := (x16 << 6) + x25 := (x14 << 3) + x26 := (u8(x12) & 0xff) + x27 := (x12 >> 8) + x28 := (u8(x27) & 0xff) + x29 := (x27 >> 8) + x30 := (u8(x29) & 0xff) + x31 := (x29 >> 8) + x32 := (u8(x31) & 0xff) + x33 := (x31 >> 8) + x34 := (u8(x33) & 0xff) + x35 := (x33 >> 8) + x36 := (u8(x35) & 0xff) + x37 := u8((x35 >> 8)) + x38 := (x25 + u64(x37)) + x39 := (u8(x38) & 0xff) + x40 := (x38 >> 8) + x41 := (u8(x40) & 0xff) + x42 := (x40 >> 8) + x43 := (u8(x42) & 0xff) + x44 := (x42 >> 8) + x45 := (u8(x44) & 0xff) + x46 := (x44 >> 8) + x47 := (u8(x46) & 0xff) + x48 := (x46 >> 8) + x49 := (u8(x48) & 0xff) + x50 := u8((x48 >> 8)) + x51 := (x24 + u64(x50)) + x52 := (u8(x51) & 0xff) + x53 := (x51 >> 8) + x54 := (u8(x53) & 0xff) + x55 := (x53 >> 8) + x56 := (u8(x55) & 0xff) + x57 := (x55 >> 8) + x58 := (u8(x57) & 0xff) + x59 := (x57 >> 8) + x60 := (u8(x59) & 0xff) + x61 := (x59 >> 8) + x62 := (u8(x61) & 0xff) + x63 := (x61 >> 8) + x64 := (u8(x63) & 0xff) + x65 := fiat.u1((x63 >> 8)) + x66 := (x23 + u64(x65)) + x67 := (u8(x66) & 0xff) + x68 := (x66 >> 8) + x69 := (u8(x68) & 0xff) + x70 := (x68 >> 8) + x71 := (u8(x70) & 0xff) + x72 := (x70 >> 8) + x73 := (u8(x72) & 0xff) + x74 := (x72 >> 8) + x75 := (u8(x74) & 0xff) + x76 := (x74 >> 8) + x77 := (u8(x76) & 0xff) + x78 := u8((x76 >> 8)) + x79 := (x22 + u64(x78)) + x80 := (u8(x79) & 0xff) + x81 := (x79 >> 8) + x82 := (u8(x81) & 0xff) + x83 := (x81 >> 8) + x84 := (u8(x83) & 0xff) + x85 := (x83 >> 8) + x86 := (u8(x85) & 0xff) + x87 := (x85 >> 8) + x88 := (u8(x87) & 0xff) + x89 := (x87 >> 8) + x90 := (u8(x89) & 0xff) + x91 := u8((x89 >> 8)) + out1[0] = x26 + out1[1] = x28 + out1[2] = x30 + out1[3] = x32 + out1[4] = x34 + out1[5] = x36 + out1[6] = x39 + out1[7] = x41 + out1[8] = x43 + out1[9] = x45 + out1[10] = x47 + out1[11] = x49 + out1[12] = x52 + out1[13] = x54 + out1[14] = x56 + out1[15] = x58 + out1[16] = x60 + out1[17] = x62 + out1[18] = x64 + out1[19] = x67 + out1[20] = x69 + out1[21] = x71 + out1[22] = x73 + out1[23] = x75 + out1[24] = x77 + out1[25] = x80 + out1[26] = x82 + out1[27] = x84 + out1[28] = x86 + out1[29] = x88 + out1[30] = x90 + out1[31] = x91 +} + +_fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) { + x1 := (u64(arg1[31]) << 44) + x2 := (u64(arg1[30]) << 36) + x3 := (u64(arg1[29]) << 28) + x4 := (u64(arg1[28]) << 20) + x5 := (u64(arg1[27]) << 12) + x6 := (u64(arg1[26]) << 4) + x7 := (u64(arg1[25]) << 47) + x8 := (u64(arg1[24]) << 39) + x9 := (u64(arg1[23]) << 31) + x10 := (u64(arg1[22]) << 23) + x11 := (u64(arg1[21]) << 15) + x12 := (u64(arg1[20]) << 7) + x13 := (u64(arg1[19]) << 50) + x14 := (u64(arg1[18]) << 42) + x15 := (u64(arg1[17]) << 34) + x16 := (u64(arg1[16]) << 26) + x17 := (u64(arg1[15]) << 18) + x18 := (u64(arg1[14]) << 10) + x19 := (u64(arg1[13]) << 2) + x20 := (u64(arg1[12]) << 45) + x21 := (u64(arg1[11]) << 37) + x22 := (u64(arg1[10]) << 29) + x23 := (u64(arg1[9]) << 21) + x24 := (u64(arg1[8]) << 13) + x25 := (u64(arg1[7]) << 5) + x26 := (u64(arg1[6]) << 48) + x27 := (u64(arg1[5]) << 40) + x28 := (u64(arg1[4]) << 32) + x29 := (u64(arg1[3]) << 24) + x30 := (u64(arg1[2]) << 16) + x31 := (u64(arg1[1]) << 8) + x32 := arg1[0] + x33 := (x31 + u64(x32)) + x34 := (x30 + x33) + x35 := (x29 + x34) + x36 := (x28 + x35) + x37 := (x27 + x36) + x38 := (x26 + x37) + x39 := (x38 & 0x7ffffffffffff) + x40 := u8((x38 >> 51)) + x41 := (x25 + u64(x40)) + x42 := (x24 + x41) + x43 := (x23 + x42) + x44 := (x22 + x43) + x45 := (x21 + x44) + x46 := (x20 + x45) + x47 := (x46 & 0x7ffffffffffff) + x48 := u8((x46 >> 51)) + x49 := (x19 + u64(x48)) + x50 := (x18 + x49) + x51 := (x17 + x50) + x52 := (x16 + x51) + x53 := (x15 + x52) + x54 := (x14 + x53) + x55 := (x13 + x54) + x56 := (x55 & 0x7ffffffffffff) + x57 := u8((x55 >> 51)) + x58 := (x12 + u64(x57)) + x59 := (x11 + x58) + x60 := (x10 + x59) + x61 := (x9 + x60) + x62 := (x8 + x61) + x63 := (x7 + x62) + x64 := (x63 & 0x7ffffffffffff) + x65 := u8((x63 >> 51)) + x66 := (x6 + u64(x65)) + x67 := (x5 + x66) + x68 := (x4 + x67) + x69 := (x3 + x68) + x70 := (x2 + x69) + x71 := (x1 + x70) + out1[0] = x39 + out1[1] = x47 + out1[2] = x56 + out1[3] = x64 + out1[4] = x71 +} + +fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) { + x1 := arg1[0] + x2 := arg1[1] + x3 := arg1[2] + x4 := arg1[3] + x5 := arg1[4] + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} + +fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { + x2, x1 := bits.mul_u64(0x1db42, arg1[4]) + x4, x3 := bits.mul_u64(0x1db42, arg1[3]) + x6, x5 := bits.mul_u64(0x1db42, arg1[2]) + x8, x7 := bits.mul_u64(0x1db42, arg1[1]) + x10, x9 := bits.mul_u64(0x1db42, arg1[0]) + x11 := ((x9 >> 51) | ((x10 << 13) & 0xffffffffffffffff)) + x12 := (x9 & 0x7ffffffffffff) + x13, x14 := bits.add_u64(x11, x7, u64(0x0)) + x15 := (u64(fiat.u1(x14)) + x8) + x16 := ((x13 >> 51) | ((x15 << 13) & 0xffffffffffffffff)) + x17 := (x13 & 0x7ffffffffffff) + x18, x19 := bits.add_u64(x16, x5, u64(0x0)) + x20 := (u64(fiat.u1(x19)) + x6) + x21 := ((x18 >> 51) | ((x20 << 13) & 0xffffffffffffffff)) + x22 := (x18 & 0x7ffffffffffff) + x23, x24 := bits.add_u64(x21, x3, u64(0x0)) + x25 := (u64(fiat.u1(x24)) + x4) + x26 := ((x23 >> 51) | ((x25 << 13) & 0xffffffffffffffff)) + x27 := (x23 & 0x7ffffffffffff) + x28, x29 := bits.add_u64(x26, x1, u64(0x0)) + x30 := (u64(fiat.u1(x29)) + x2) + x31 := ((x28 >> 51) | ((x30 << 13) & 0xffffffffffffffff)) + x32 := (x28 & 0x7ffffffffffff) + x33 := (x31 * 0x13) + x34 := (x12 + x33) + x35 := fiat.u1((x34 >> 51)) + x36 := (x34 & 0x7ffffffffffff) + x37 := (u64(x35) + x17) + x38 := fiat.u1((x37 >> 51)) + x39 := (x37 & 0x7ffffffffffff) + x40 := (u64(x38) + x22) + out1[0] = x36 + out1[1] = x39 + out1[2] = x40 + out1[3] = x27 + out1[4] = x32 +} + +// The following routines were added by hand, and do not come from fiat-crypto. + +fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) { + out1[0] = 0 + out1[1] = 0 + out1[2] = 0 + out1[3] = 0 + out1[4] = 0 +} + +fe_one :: proc "contextless" (out1: ^Tight_Field_Element) { + out1[0] = 1 + out1[1] = 0 + out1[2] = 0 + out1[3] = 0 + out1[4] = 0 +} + +fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) { + x1 := arg1[0] + x2 := arg1[1] + x3 := arg1[2] + x4 := arg1[3] + x5 := arg1[4] + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} + +fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) { + mask := -u64(arg1) + x := (out1[0] ~ out2[0]) & mask + x1, y1 := out1[0] ~ x, out2[0] ~ x + x = (out1[1] ~ out2[1]) & mask + x2, y2 := out1[1] ~ x, out2[1] ~ x + x = (out1[2] ~ out2[2]) & mask + x3, y3 := out1[2] ~ x, out2[2] ~ x + x = (out1[3] ~ out2[3]) & mask + x4, y4 := out1[3] ~ x, out2[3] ~ x + x = (out1[4] ~ out2[4]) & mask + x5, y5 := out1[4] ~ x, out2[4] ~ x + out1[0], out2[0] = x1, y1 + out1[1], out2[1] = x2, y2 + out1[2], out2[2] = x3, y3 + out1[3], out2[3] = x4, y4 + out1[4], out2[4] = x5, y5 +} diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin new file mode 100644 index 000000000..dfc8daa47 --- /dev/null +++ b/core/crypto/x25519/x25519.odin @@ -0,0 +1,126 @@ +package x25519 + +import field "core:crypto/_fiat/field_curve25519" +import "core:mem" + +SCALAR_SIZE :: 32 +POINT_SIZE :: 32 + +_BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +_scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 { + if i < 0 { + return 0 + } + return (s[i>>3] >> uint(i&7)) & 1 +} + +_scalarmult :: proc (out, scalar, point: ^[32]byte) { + // Montgomery pseduo-multiplication taken from Monocypher. + + // computes the scalar product + x1: field.Tight_Field_Element = --- + field.fe_from_bytes(&x1, point) + + // computes the actual scalar product (the result is in x2 and z2) + x2, x3, z2, z3: field.Tight_Field_Element = ---, ---, ---, --- + t0, t1: field.Loose_Field_Element = ---, --- + + // Montgomery ladder + // In projective coordinates, to avoid divisions: x = X / Z + // We don't care about the y coordinate, it's only 1 bit of information + field.fe_one(&x2) // "zero" point + field.fe_zero(&z2) + field.fe_set(&x3, &x1) // "one" point + field.fe_one(&z3) + + swap: int + for pos := 255-1; pos >= 0; pos = pos - 1 { + // constant time conditional swap before ladder step + b := int(_scalar_bit(scalar, pos)) + swap ~= b // xor trick avoids swapping at the end of the loop + field.fe_cond_swap(&x2, &x3, swap) + field.fe_cond_swap(&z2, &z3, swap) + swap = b // anticipates one last swap after the loop + + // Montgomery ladder step: replaces (P2, P3) by (P2*2, P2+P3) + // with differential addition + // + // Note: This deliberately omits reductions after add/sub operations + // if the result is only ever used as the input to a mul/square since + // the implementations of those can deal with non-reduced inputs. + // + // fe_tighten_cast is only used to store a fully reduced + // output in a Loose_Field_Element, or to provide such a + // Loose_Field_Element as a Tight_Field_Element argument. + field.fe_sub(&t0, &x3, &z3) + field.fe_sub(&t1, &x2, &z2) + field.fe_add(field.fe_relax_cast(&x2), &x2, &z2) // x2 - unreduced + field.fe_add(field.fe_relax_cast(&z2), &x3, &z3) // z2 - unreduced + field.fe_carry_mul(&z3, &t0, field.fe_relax_cast(&x2)) + field.fe_carry_mul(&z2, field.fe_relax_cast(&z2), &t1) // z2 - reduced + field.fe_carry_square(field.fe_tighten_cast(&t0), &t1) // t0 - reduced + field.fe_carry_square(field.fe_tighten_cast(&t1), field.fe_relax_cast(&x2)) // t1 - reduced + field.fe_add(field.fe_relax_cast(&x3), &z3, &z2) // x3 - unreduced + field.fe_sub(field.fe_relax_cast(&z2), &z3, &z2) // z2 - unreduced + field.fe_carry_mul(&x2, &t1, &t0) // x2 - reduced + field.fe_sub(&t1, field.fe_tighten_cast(&t1), field.fe_tighten_cast(&t0)) // safe - t1/t0 is reduced + field.fe_carry_square(&z2, field.fe_relax_cast(&z2)) // z2 - reduced + field.fe_carry_scmul_121666(&z3, &t1) + field.fe_carry_square(&x3, field.fe_relax_cast(&x3)) // x3 - reduced + field.fe_add(&t0, field.fe_tighten_cast(&t0), &z3) // safe - t0 is reduced + field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z2)) + field.fe_carry_mul(&z2, &t1, &t0) + } + // last swap is necessary to compensate for the xor trick + // Note: after this swap, P3 == P2 + P1. + field.fe_cond_swap(&x2, &x3, swap) + field.fe_cond_swap(&z2, &z3, swap) + + // normalises the coordinates: x == X / Z + field.fe_carry_inv(&z2, field.fe_relax_cast(&z2)) + field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2)) + field.fe_to_bytes(out, &x2) + + mem.zero_explicit(&x1, size_of(x1)) + mem.zero_explicit(&x2, size_of(x2)) + mem.zero_explicit(&x3, size_of(x3)) + mem.zero_explicit(&z2, size_of(z2)) + mem.zero_explicit(&z3, size_of(z3)) + mem.zero_explicit(&t0, size_of(t0)) + mem.zero_explicit(&t1, size_of(t1)) +} + +scalarmult :: proc (dst, scalar, point: []byte) { + if len(scalar) != SCALAR_SIZE { + panic("crypto/x25519: invalid scalar size") + } + if len(point) != POINT_SIZE { + panic("crypto/x25519: invalid point size") + } + if len(dst) != POINT_SIZE { + panic("crypto/x25519: invalid destination point size") + } + + // "clamp" the scalar + e: [32]byte = --- + copy_slice(e[:], scalar) + e[0] &= 248 + e[31] &= 127 + e[31] |= 64 + + p: [32]byte = --- + copy_slice(p[:], point) + + d: [32]byte = --- + _scalarmult(&d, &e, &p) + copy_slice(dst, d[:]) + + mem.zero_explicit(&e, size_of(e)) + mem.zero_explicit(&d, size_of(d)) +} + +scalarmult_basepoint :: proc (dst, scalar: []byte) { + // TODO/perf: Switch to using a precomputed table. + scalarmult(dst, scalar, _BASE_POINT[:]) +} diff --git a/tests/core/crypto/test_core_crypto.odin b/tests/core/crypto/test_core_crypto.odin index df9920552..768ba242f 100644 --- a/tests/core/crypto/test_core_crypto.odin +++ b/tests/core/crypto/test_core_crypto.odin @@ -115,6 +115,11 @@ main :: proc() { test_haval_224(&t) test_haval_256(&t) + // "modern" crypto tests + test_x25519(&t) + + bench_modern(&t) + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) } diff --git a/tests/core/crypto/test_core_crypto_modern.odin b/tests/core/crypto/test_core_crypto_modern.odin new file mode 100644 index 000000000..4d7f08bb1 --- /dev/null +++ b/tests/core/crypto/test_core_crypto_modern.odin @@ -0,0 +1,95 @@ +package test_core_crypto + +import "core:testing" +import "core:fmt" +import "core:time" + +import "core:crypto/x25519" + +_digit_value :: proc(r: rune) -> int { + ri := int(r) + v: int = 16 + switch r { + case '0'..='9': v = ri-'0' + case 'a'..='z': v = ri-'a'+10 + case 'A'..='Z': v = ri-'A'+10 + } + return v +} + +_decode_hex32 :: proc(s: string) -> [32]byte{ + b: [32]byte + for i := 0; i < len(s); i = i + 2 { + hi := _digit_value(rune(s[i])) + lo := _digit_value(rune(s[i+1])) + b[i/2] = byte(hi << 4 | lo) + } + return b +} + +TestECDH :: struct { + scalar: string, + point: string, + product: string, +} + +@(test) +test_x25519 :: proc(t: ^testing.T) { + log(t, "Testing X25519") + + test_vectors := [?]TestECDH { + // Test vectors from RFC 7748 + TestECDH{ + "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4", + "e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c", + "c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552", + }, + TestECDH{ + "4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d", + "e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493", + "95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957", + }, + } + for v, _ in test_vectors { + scalar := _decode_hex32(v.scalar) + point := _decode_hex32(v.point) + + derived_point: [x25519.POINT_SIZE]byte + x25519.scalarmult(derived_point[:], scalar[:], point[:]) + derived_point_str := hex_string(derived_point[:]) + + expect(t, derived_point_str == v.product, fmt.tprintf("Expected %s for %s * %s, but got %s instead", v.product, v.scalar, v.point, derived_point_str)) + + // Abuse the test vectors to sanity-check the scalar-basepoint multiply. + p1, p2: [x25519.POINT_SIZE]byte + x25519.scalarmult_basepoint(p1[:], scalar[:]) + x25519.scalarmult(p2[:], scalar[:], x25519._BASE_POINT[:]) + p1_str, p2_str := hex_string(p1[:]), hex_string(p2[:]) + expect(t, p1_str == p2_str, fmt.tprintf("Expected %s for %s * basepoint, but got %s instead", p2_str, v.scalar, p1_str)) + } + + // TODO/tests: Run the wycheproof test vectors, once I figure out + // how to work with JSON. +} + +@(test) +bench_modern :: proc(t: ^testing.T) { + fmt.println("Starting benchmarks:") + + bench_x25519(t) +} + +bench_x25519 :: proc(t: ^testing.T) { + point := _decode_hex32("deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef") + scalar := _decode_hex32("cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe") + out: [x25519.POINT_SIZE]byte = --- + + iters :: 10000 + start := time.now() + for i := 0; i < iters; i = i + 1 { + x25519.scalarmult(out[:], scalar[:], point[:]) + } + elapsed := time.since(start) + + log(t, fmt.tprintf("x25519.scalarmult: ~%f us/op", time.duration_microseconds(elapsed) / iters)) +}