mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-22 21:54:59 -07:00
big: Add internal_int_exponent_mod_fast.
This commit is contained in:
@@ -208,15 +208,17 @@ int_to_byte_little :: proc(v: ^Int) {
|
||||
}
|
||||
}
|
||||
|
||||
// printf :: fmt.printf;
|
||||
|
||||
demo :: proc() {
|
||||
a, b, c, d, e, f, res := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
|
||||
defer destroy(a, b, c, d, e, f, res);
|
||||
|
||||
set(a, 42);
|
||||
set(b, 6);
|
||||
set(c, 5);
|
||||
set(c, 131);
|
||||
|
||||
if err := internal_int_exponent_mod(res, a, b, c, 0); err != nil {
|
||||
if err := internal_int_exponent_mod_fast(res, a, b, c, 0); err != nil {
|
||||
fmt.printf("Error: %v\n", err);
|
||||
}
|
||||
|
||||
|
||||
@@ -991,13 +991,21 @@ internal_int_mod_bits :: proc(remainder, numerator: ^Int, bits: int, allocator :
|
||||
public ones that have already satisfied these constraints.
|
||||
*/
|
||||
|
||||
/*
|
||||
This procedure returns the allocated capacity of an Int.
|
||||
Assumes `a` not to be `nil`.
|
||||
*/
|
||||
internal_int_allocated_cap :: #force_inline proc(a: ^Int) -> (cap: int) {
|
||||
raw := transmute(mem.Raw_Dynamic_Array)a.digit;
|
||||
return raw.cap;
|
||||
}
|
||||
|
||||
/*
|
||||
This procedure will return `true` if the `Int` is initialized, `false` if not.
|
||||
Assumes `a` not to be `nil`.
|
||||
*/
|
||||
internal_int_is_initialized :: #force_inline proc(a: ^Int) -> (initialized: bool) {
|
||||
raw := transmute(mem.Raw_Dynamic_Array)a.digit;
|
||||
return raw.cap >= _MIN_DIGIT_COUNT;
|
||||
return internal_int_allocated_cap(a) >= _MIN_DIGIT_COUNT;
|
||||
}
|
||||
internal_is_initialized :: proc { internal_int_is_initialized, };
|
||||
|
||||
@@ -1650,8 +1658,7 @@ internal_int_destroy :: proc(integers: ..^Int) {
|
||||
integers := integers;
|
||||
|
||||
for a in &integers {
|
||||
raw := transmute(mem.Raw_Dynamic_Array)a.digit;
|
||||
if raw.cap > 0 {
|
||||
if internal_int_allocated_cap(a) > 0 {
|
||||
mem.zero_slice(a.digit[:]);
|
||||
free(&a.digit[0]);
|
||||
}
|
||||
@@ -1913,23 +1920,23 @@ internal_int_shrink :: proc(a: ^Int) -> (err: Error) {
|
||||
internal_shrink :: proc { internal_int_shrink, };
|
||||
|
||||
internal_int_grow :: proc(a: ^Int, digits: int, allow_shrink := false, allocator := context.allocator) -> (err: Error) {
|
||||
raw := transmute(mem.Raw_Dynamic_Array)a.digit;
|
||||
|
||||
/*
|
||||
We need at least _MIN_DIGIT_COUNT or a.used digits, whichever is bigger.
|
||||
The caller is asking for `digits`. Let's be accomodating.
|
||||
*/
|
||||
cap := internal_int_allocated_cap(a);
|
||||
|
||||
needed := max(_MIN_DIGIT_COUNT, a.used, digits);
|
||||
if !allow_shrink {
|
||||
needed = max(needed, raw.cap);
|
||||
needed = max(needed, cap);
|
||||
}
|
||||
|
||||
/*
|
||||
If not yet iniialized, initialize the `digit` backing with the allocator we were passed.
|
||||
*/
|
||||
if raw.cap == 0 {
|
||||
if cap == 0 {
|
||||
a.digit = make([dynamic]DIGIT, needed, allocator);
|
||||
} else if raw.cap != needed {
|
||||
} else if cap != needed {
|
||||
/*
|
||||
`[dynamic]DIGIT` already knows what allocator was used for it, so resize will do the right thing.
|
||||
*/
|
||||
|
||||
+281
-2
@@ -144,7 +144,7 @@ internal_int_montgomery_calc_normalization :: proc(a, b: ^Int, allocator := cont
|
||||
power := ((b.used - 1) * _DIGIT_BITS) + bits - 1;
|
||||
internal_int_power_of_two(a, power) or_return;
|
||||
} else {
|
||||
internal_one(a);
|
||||
internal_one(a) or_return;
|
||||
bits = 1;
|
||||
}
|
||||
|
||||
@@ -187,7 +187,8 @@ internal_int_montgomery_setup :: proc(n: ^Int) -> (rho: DIGIT, err: Error) {
|
||||
x := (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
|
||||
x *= 2 - (b * x); /* here x*a==1 mod 2**8 */
|
||||
x *= 2 - (b * x); /* here x*a==1 mod 2**16 */
|
||||
when _WORD_TYPE_BITS == 64 {
|
||||
|
||||
when _DIGIT_TYPE_BITS == 64 {
|
||||
x *= 2 - (b * x); /* here x*a==1 mod 2**32 */
|
||||
x *= 2 - (b * x); /* here x*a==1 mod 2**64 */
|
||||
}
|
||||
@@ -473,6 +474,10 @@ internal_int_exponent_mod :: proc(res, G, X, P: ^Int, redmode: int, allocator :=
|
||||
M := [_TAB_SIZE]Int{};
|
||||
winsize: uint;
|
||||
|
||||
/*
|
||||
Use a pointer to the reduction algorithm.
|
||||
This allows us to use one of many reduction algorithms without modding the guts of the code with if statements everywhere.
|
||||
*/
|
||||
redux: #type proc(x, m, mu: ^Int, allocator := context.allocator) -> (err: Error);
|
||||
|
||||
defer {
|
||||
@@ -686,6 +691,280 @@ internal_int_exponent_mod :: proc(res, G, X, P: ^Int, redmode: int, allocator :=
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
Computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
|
||||
|
||||
Uses a left-to-right `k`-ary sliding window to compute the modular exponentiation.
|
||||
The value of `k` changes based on the size of the exponent.
|
||||
|
||||
Uses Montgomery or Diminished Radix reduction [whichever appropriate]
|
||||
|
||||
Assumes `res`, `G`, `X` and `P` to not be `nil` and for `G`, `X` and `P` to have been initialized.
|
||||
*/
|
||||
internal_int_exponent_mod_fast :: proc(res, G, X, P: ^Int, redmode: int, allocator := context.allocator) -> (err: Error) {
|
||||
context.allocator = allocator;
|
||||
|
||||
M := [_TAB_SIZE]Int{};
|
||||
winsize: uint;
|
||||
|
||||
/*
|
||||
Use a pointer to the reduction algorithm.
|
||||
This allows us to use one of many reduction algorithms without modding the guts of the code with if statements everywhere.
|
||||
*/
|
||||
redux: #type proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error);
|
||||
|
||||
defer {
|
||||
internal_destroy(&M[1]);
|
||||
for x := 1 << (winsize - 1); x < (1 << winsize); x += 1 {
|
||||
internal_destroy(&M[x]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Find window size.
|
||||
*/
|
||||
x := internal_count_bits(X);
|
||||
switch {
|
||||
case x <= 7:
|
||||
winsize = 2;
|
||||
case x <= 36:
|
||||
winsize = 3;
|
||||
case x <= 140:
|
||||
winsize = 4;
|
||||
case x <= 450:
|
||||
winsize = 5;
|
||||
case x <= 1303:
|
||||
winsize = 6;
|
||||
case x <= 3529:
|
||||
winsize = 7;
|
||||
case:
|
||||
winsize = 8;
|
||||
}
|
||||
|
||||
winsize = min(_MAX_WIN_SIZE, winsize) if _MAX_WIN_SIZE > 0 else winsize;
|
||||
|
||||
/*
|
||||
Init M array
|
||||
Init first cell.
|
||||
*/
|
||||
cap := internal_int_allocated_cap(P);
|
||||
internal_grow(&M[1], cap) or_return;
|
||||
|
||||
/*
|
||||
Now init the second half of the array.
|
||||
*/
|
||||
for x = 1 << (winsize - 1); x < (1 << winsize); x += 1 {
|
||||
internal_grow(&M[x], cap) or_return;
|
||||
}
|
||||
|
||||
/*
|
||||
Determine and setup reduction code.
|
||||
*/
|
||||
rho: DIGIT;
|
||||
|
||||
if redmode == 0 {
|
||||
/*
|
||||
Now setup Montgomery.
|
||||
*/
|
||||
rho = internal_int_montgomery_setup(P) or_return;
|
||||
|
||||
/*
|
||||
Automatically pick the comba one if available (saves quite a few calls/ifs).
|
||||
*/
|
||||
if ((P.used * 2) + 1) < _WARRAY && P.used < _MAX_COMBA {
|
||||
redux = _private_montgomery_reduce_comba;
|
||||
} else {
|
||||
/*
|
||||
Use slower baseline Montgomery method.
|
||||
*/
|
||||
redux = internal_int_montgomery_reduce;
|
||||
}
|
||||
} else if redmode == 1 {
|
||||
/*
|
||||
if (MP_HAS(MP_DR_SETUP) && MP_HAS(MP_DR_REDUCE)) {
|
||||
/* setup DR reduction for moduli of the form B**k - b */
|
||||
mp_dr_setup(P, &mp);
|
||||
redux = mp_dr_reduce;
|
||||
} else {
|
||||
err = MP_VAL;
|
||||
goto LBL_M;
|
||||
}
|
||||
*/
|
||||
return .Unimplemented;
|
||||
} else {
|
||||
/*
|
||||
Setup DR reduction for moduli of the form 2**k - b.
|
||||
*/
|
||||
rho = internal_int_reduce_2k_setup(P) or_return;
|
||||
redux = internal_int_reduce_2k;
|
||||
}
|
||||
|
||||
/*
|
||||
Setup result.
|
||||
*/
|
||||
internal_grow(res, cap) or_return;
|
||||
|
||||
/*
|
||||
Create M table
|
||||
The first half of the table is not computed, though, except for M[0] and M[1]
|
||||
*/
|
||||
|
||||
if redmode == 0 {
|
||||
/*
|
||||
Now we need R mod m.
|
||||
*/
|
||||
internal_int_montgomery_calc_normalization(res, P) or_return;
|
||||
|
||||
/*
|
||||
Now set M[1] to G * R mod m.
|
||||
*/
|
||||
internal_mulmod(&M[1], G, res, P) or_return;
|
||||
} else {
|
||||
internal_one(res) or_return;
|
||||
internal_mod(&M[1], G, P) or_return;
|
||||
}
|
||||
|
||||
/*
|
||||
Compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times.
|
||||
*/
|
||||
slot := 1 << (winsize - 1);
|
||||
internal_copy(&M[slot], &M[1]) or_return;
|
||||
|
||||
for x = 0; x < int(winsize - 1); x += 1 {
|
||||
internal_sqr(&M[slot], &M[slot]) or_return;
|
||||
print("slot: ", &M[slot]);
|
||||
redux(&M[slot], P, rho) or_return;
|
||||
print("slot redux: ", &M[slot]);
|
||||
}
|
||||
|
||||
/*
|
||||
Create upper table.
|
||||
*/
|
||||
for x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x += 1 {
|
||||
internal_mul(&M[x], &M[x - 1], &M[1]) or_return;
|
||||
redux(&M[x], P, rho) or_return;
|
||||
}
|
||||
|
||||
/*
|
||||
Set initial mode and bit cnt.
|
||||
*/
|
||||
mode := 0;
|
||||
bitcnt := 1;
|
||||
buf := DIGIT(0);
|
||||
digidx := X.used - 1;
|
||||
bitcpy := 0;
|
||||
bitbuf := DIGIT(0);
|
||||
|
||||
for {
|
||||
/*
|
||||
Grab next digit as required.
|
||||
*/
|
||||
bitcnt -= 1;
|
||||
if bitcnt == 0 {
|
||||
/*
|
||||
If digidx == -1 we are out of digits so break.
|
||||
*/
|
||||
if digidx == -1 { break; }
|
||||
|
||||
/*
|
||||
Read next digit and reset the bitcnt.
|
||||
*/
|
||||
buf = X.digit[digidx];
|
||||
digidx -= 1;
|
||||
bitcnt = _DIGIT_BITS;
|
||||
}
|
||||
|
||||
/*
|
||||
Grab the next msb from the exponent.
|
||||
*/
|
||||
y := (buf >> (_DIGIT_BITS - 1)) & 1;
|
||||
buf <<= 1;
|
||||
|
||||
/*
|
||||
If the bit is zero and mode == 0 then we ignore it.
|
||||
These represent the leading zero bits before the first 1 bit in the exponent.
|
||||
Technically this opt is not required but it does lower the # of trivial squaring/reductions used.
|
||||
*/
|
||||
if mode == 0 && y == 0 { continue; }
|
||||
|
||||
/*
|
||||
If the bit is zero and mode == 1 then we square.
|
||||
*/
|
||||
if mode == 1 && y == 0 {
|
||||
internal_sqr(res, res) or_return;
|
||||
redux(res, P, rho) or_return;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
Else we add it to the window.
|
||||
*/
|
||||
bitcpy += 1;
|
||||
bitbuf |= (y << (winsize - uint(bitcpy)));
|
||||
mode = 2;
|
||||
|
||||
if bitcpy == int(winsize) {
|
||||
/*
|
||||
Window is filled so square as required and multiply
|
||||
Square first.
|
||||
*/
|
||||
for x = 0; x < int(winsize); x += 1 {
|
||||
internal_sqr(res, res) or_return;
|
||||
redux(res, P, rho) or_return;
|
||||
}
|
||||
|
||||
/*
|
||||
Then multiply.
|
||||
*/
|
||||
internal_mul(res, res, &M[bitbuf]) or_return;
|
||||
redux(res, P, rho) or_return;
|
||||
|
||||
/*
|
||||
Empty window and reset.
|
||||
*/
|
||||
bitcpy = 0;
|
||||
bitbuf = 0;
|
||||
mode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
If bits remain then square/multiply.
|
||||
*/
|
||||
if mode == 2 && bitcpy > 0 {
|
||||
/*
|
||||
Square then multiply if the bit is set.
|
||||
*/
|
||||
for x = 0; x < bitcpy; x += 1 {
|
||||
internal_sqr(res, res) or_return;
|
||||
redux(res, P, rho) or_return;
|
||||
|
||||
/*
|
||||
Get next bit of the window.
|
||||
*/
|
||||
bitbuf <<= 1;
|
||||
if bitbuf & (1 << winsize) != 0 {
|
||||
/*
|
||||
Then multiply.
|
||||
*/
|
||||
internal_mul(res, res, &M[1]) or_return;
|
||||
redux(res, P, rho) or_return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if redmode == 0 {
|
||||
/*
|
||||
Fixup result if Montgomery reduction is used.
|
||||
Recall that any value in a Montgomery system is actually multiplied by R mod n.
|
||||
So we have to reduce one more time to cancel out the factor of R.
|
||||
*/
|
||||
redux(res, P, rho) or_return;
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
/*
|
||||
Returns the number of Rabin-Miller trials needed for a given bit size.
|
||||
*/
|
||||
|
||||
@@ -1730,9 +1730,6 @@ _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Computes xR**-1 == x (mod N) via Montgomery Reduction.
|
||||
This is an optimized implementation of `internal_montgomery_reduce`
|
||||
@@ -1753,7 +1750,7 @@ _private_montgomery_reduce_comba :: proc(x, n: ^Int, rho: DIGIT, allocator := co
|
||||
/*
|
||||
Grow `x` as required.
|
||||
*/
|
||||
internal_grow(x, n.used + 1) or_return;
|
||||
internal_grow(x, n.used + 1) or_return;
|
||||
|
||||
/*
|
||||
First we have to get the digits of the input into an array of double precision words W[...]
|
||||
|
||||
Reference in New Issue
Block a user