Files
Odin/core/crypto/_aes/ct64/ct64.odin
T
2025-03-23 19:14:33 +09:00

261 lines
6.7 KiB
Odin

// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package aes_ct64
// Bitsliced AES for 64-bit general purpose (integer) registers. Each
// invocation will process up to 4 blocks at a time. This implementation
// is derived from the BearSSL ct64 code, and distributed under a 1-clause
// BSD license with permission from the original author.
//
// WARNING: "hic sunt dracones"
//
// This package also deliberately exposes enough internals to be able to
// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to
// allow the implementation of non-AES primitives that use the AES round
// function such as AEGIS and Deoxys-II. This should ONLY be done when
// implementing something other than AES itself.
sub_bytes :: proc "contextless" (q: ^[8]u64) {
// This S-box implementation is a straightforward translation of
// the circuit described by Boyar and Peralta in "A new
// combinational logic minimization technique with applications
// to cryptology" (https://eprint.iacr.org/2009/191.pdf).
//
// Note that variables x* (input) and s* (output) are numbered
// in "reverse" order (x0 is the high bit, x7 is the low bit).
x0 := q[7]
x1 := q[6]
x2 := q[5]
x3 := q[4]
x4 := q[3]
x5 := q[2]
x6 := q[1]
x7 := q[0]
// Top linear transformation.
y14 := x3 ~ x5
y13 := x0 ~ x6
y9 := x0 ~ x3
y8 := x0 ~ x5
t0 := x1 ~ x2
y1 := t0 ~ x7
y4 := y1 ~ x3
y12 := y13 ~ y14
y2 := y1 ~ x0
y5 := y1 ~ x6
y3 := y5 ~ y8
t1 := x4 ~ y12
y15 := t1 ~ x5
y20 := t1 ~ x1
y6 := y15 ~ x7
y10 := y15 ~ t0
y11 := y20 ~ y9
y7 := x7 ~ y11
y17 := y10 ~ y11
y19 := y10 ~ y8
y16 := t0 ~ y11
y21 := y13 ~ y16
y18 := x0 ~ y16
// Non-linear section.
t2 := y12 & y15
t3 := y3 & y6
t4 := t3 ~ t2
t5 := y4 & x7
t6 := t5 ~ t2
t7 := y13 & y16
t8 := y5 & y1
t9 := t8 ~ t7
t10 := y2 & y7
t11 := t10 ~ t7
t12 := y9 & y11
t13 := y14 & y17
t14 := t13 ~ t12
t15 := y8 & y10
t16 := t15 ~ t12
t17 := t4 ~ t14
t18 := t6 ~ t16
t19 := t9 ~ t14
t20 := t11 ~ t16
t21 := t17 ~ y20
t22 := t18 ~ y19
t23 := t19 ~ y21
t24 := t20 ~ y18
t25 := t21 ~ t22
t26 := t21 & t23
t27 := t24 ~ t26
t28 := t25 & t27
t29 := t28 ~ t22
t30 := t23 ~ t24
t31 := t22 ~ t26
t32 := t31 & t30
t33 := t32 ~ t24
t34 := t23 ~ t33
t35 := t27 ~ t33
t36 := t24 & t35
t37 := t36 ~ t34
t38 := t27 ~ t36
t39 := t29 & t38
t40 := t25 ~ t39
t41 := t40 ~ t37
t42 := t29 ~ t33
t43 := t29 ~ t40
t44 := t33 ~ t37
t45 := t42 ~ t41
z0 := t44 & y15
z1 := t37 & y6
z2 := t33 & x7
z3 := t43 & y16
z4 := t40 & y1
z5 := t29 & y7
z6 := t42 & y11
z7 := t45 & y17
z8 := t41 & y10
z9 := t44 & y12
z10 := t37 & y3
z11 := t33 & y4
z12 := t43 & y13
z13 := t40 & y5
z14 := t29 & y2
z15 := t42 & y9
z16 := t45 & y14
z17 := t41 & y8
// Bottom linear transformation.
t46 := z15 ~ z16
t47 := z10 ~ z11
t48 := z5 ~ z13
t49 := z9 ~ z10
t50 := z2 ~ z12
t51 := z2 ~ z5
t52 := z7 ~ z8
t53 := z0 ~ z3
t54 := z6 ~ z7
t55 := z16 ~ z17
t56 := z12 ~ t48
t57 := t50 ~ t53
t58 := z4 ~ t46
t59 := z3 ~ t54
t60 := t46 ~ t57
t61 := z14 ~ t57
t62 := t52 ~ t58
t63 := t49 ~ t58
t64 := z4 ~ t59
t65 := t61 ~ t62
t66 := z1 ~ t63
s0 := t59 ~ t63
s6 := t56 ~ ~t62
s7 := t48 ~ ~t60
t67 := t64 ~ t65
s3 := t53 ~ t66
s4 := t51 ~ t66
s5 := t47 ~ t65
s1 := t64 ~ ~s3
s2 := t55 ~ ~t67
q[7] = s0
q[6] = s1
q[5] = s2
q[4] = s3
q[3] = s4
q[2] = s5
q[1] = s6
q[0] = s7
}
orthogonalize :: proc "contextless" (q: ^[8]u64) {
CL2 :: 0x5555555555555555
CH2 :: 0xAAAAAAAAAAAAAAAA
q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2)
q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2)
q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2)
q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2)
CL4 :: 0x3333333333333333
CH4 :: 0xCCCCCCCCCCCCCCCC
q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4)
q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4)
q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4)
q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4)
CL8 :: 0x0F0F0F0F0F0F0F0F
CH8 :: 0xF0F0F0F0F0F0F0F0
q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8)
q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8)
q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8)
q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8)
}
@(require_results)
interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check {
x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3)
x0 |= (x0 << 16)
x1 |= (x1 << 16)
x2 |= (x2 << 16)
x3 |= (x3 << 16)
x0 &= 0x0000FFFF0000FFFF
x1 &= 0x0000FFFF0000FFFF
x2 &= 0x0000FFFF0000FFFF
x3 &= 0x0000FFFF0000FFFF
x0 |= (x0 << 8)
x1 |= (x1 << 8)
x2 |= (x2 << 8)
x3 |= (x3 << 8)
x0 &= 0x00FF00FF00FF00FF
x1 &= 0x00FF00FF00FF00FF
x2 &= 0x00FF00FF00FF00FF
x3 &= 0x00FF00FF00FF00FF
q0 = x0 | (x2 << 8)
q1 = x1 | (x3 << 8)
return
}
@(require_results)
interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) {
x0 := q0 & 0x00FF00FF00FF00FF
x1 := q1 & 0x00FF00FF00FF00FF
x2 := (q0 >> 8) & 0x00FF00FF00FF00FF
x3 := (q1 >> 8) & 0x00FF00FF00FF00FF
x0 |= (x0 >> 8)
x1 |= (x1 >> 8)
x2 |= (x2 >> 8)
x3 |= (x3 >> 8)
x0 &= 0x0000FFFF0000FFFF
x1 &= 0x0000FFFF0000FFFF
x2 &= 0x0000FFFF0000FFFF
x3 &= 0x0000FFFF0000FFFF
w0 = u32(x0) | u32(x0 >> 16)
w1 = u32(x1) | u32(x1 >> 16)
w2 = u32(x2) | u32(x2 >> 16)
w3 = u32(x3) | u32(x3 >> 16)
return
}
@(private)
rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 {
return (x << 32) | (x >> 32)
}