diff --git a/base/runtime/entry_unix.odin b/base/runtime/entry_unix.odin index 233007936..e49698e6e 100644 --- a/base/runtime/entry_unix.odin +++ b/base/runtime/entry_unix.odin @@ -1,5 +1,5 @@ //+private -//+build linux, darwin, freebsd, openbsd +//+build linux, darwin, freebsd, openbsd, haiku //+no-instrumentation package runtime diff --git a/base/runtime/heap_allocator_unix.odin b/base/runtime/heap_allocator_unix.odin index bfbbb5303..2b6698885 100644 --- a/base/runtime/heap_allocator_unix.odin +++ b/base/runtime/heap_allocator_unix.odin @@ -1,4 +1,4 @@ -//+build linux, darwin, freebsd, openbsd +//+build linux, darwin, freebsd, openbsd, haiku //+private package runtime @@ -35,4 +35,4 @@ _heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr { _heap_free :: proc(ptr: rawptr) { _unix_free(ptr) -} \ No newline at end of file +} diff --git a/base/runtime/os_specific_haiku.odin b/base/runtime/os_specific_haiku.odin new file mode 100644 index 000000000..f8dafac3d --- /dev/null +++ b/base/runtime/os_specific_haiku.odin @@ -0,0 +1,21 @@ +//+build haiku +//+private +package runtime + +foreign import libc "system:c" + +foreign libc { + @(link_name="write") + _unix_write :: proc(fd: i32, buf: rawptr, size: int) -> int --- + + _errnop :: proc() -> ^i32 --- +} + +_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) { + ret := _unix_write(2, raw_data(data), len(data)) + if ret < len(data) { + err := _errnop() + return int(ret), _OS_Errno(err^ if err != nil else 0) + } + return int(ret), 0 +} diff --git a/build_odin.sh b/build_odin.sh index fab6c5fd1..93319b4ef 100755 --- a/build_odin.sh +++ b/build_odin.sh @@ -82,6 +82,11 @@ OpenBSD) LDFLAGS="$LDFLAGS -liconv" LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)" ;; +Haiku) + CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags) -I/system/develop/headers/private/shared -I/system/develop/headers/private/kernel" + LDFLAGS="$LDFLAGS -liconv" + LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)" + ;; *) error "Platform \"$OS_NAME\" unsupported" ;; diff --git a/core/c/libc/errno.odin b/core/c/libc/errno.odin index fe6fbb073..7af763706 100644 --- a/core/c/libc/errno.odin +++ b/core/c/libc/errno.odin @@ -80,6 +80,24 @@ when ODIN_OS == .Darwin { ERANGE :: 34 } +when ODIN_OS == .Haiku { + @(private="file") + @(default_calling_convention="c") + foreign libc { + @(link_name="_errnop") + _get_errno :: proc() -> ^int --- + } + + @(private="file") + B_GENERAL_ERROR_BASE :: min(i32) + @(private="file") + B_POSIX_ERROR_BASE :: B_GENERAL_ERROR_BASE + 0x7000 + + EDOM :: B_POSIX_ERROR_BASE + 16 + EILSEQ :: B_POSIX_ERROR_BASE + 38 + ERANGE :: B_POSIX_ERROR_BASE + 17 +} + // Odin has no way to make an identifier "errno" behave as a function call to // read the value, or to produce an lvalue such that you can assign a different // error value to errno. To work around this, just expose it as a function like diff --git a/core/c/libc/stdio.odin b/core/c/libc/stdio.odin index 39969e4a8..b83ddecc8 100644 --- a/core/c/libc/stdio.odin +++ b/core/c/libc/stdio.odin @@ -163,6 +163,36 @@ when ODIN_OS == .Darwin { } } +when ODIN_OS == .Haiku { + fpos_t :: distinct i64 + + _IOFBF :: 0 + _IOLBF :: 1 + _IONBF :: 2 + + BUFSIZ :: 8192 + + EOF :: int(-1) + + FOPEN_MAX :: 128 + + FILENAME_MAX :: 256 + + L_tmpnam :: 512 + + SEEK_SET :: 0 + SEEK_CUR :: 1 + SEEK_END :: 2 + + TMP_MAX :: 32768 + + foreign libc { + stderr: ^FILE + stdin: ^FILE + stdout: ^FILE + } +} + @(default_calling_convention="c") foreign libc { // 7.21.4 Operations on files diff --git a/core/c/libc/time.odin b/core/c/libc/time.odin index 72b899546..4c4280f30 100644 --- a/core/c/libc/time.odin +++ b/core/c/libc/time.odin @@ -45,7 +45,7 @@ when ODIN_OS == .Windows { } } -when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS == .OpenBSD { +when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS == .OpenBSD || ODIN_OS == .Haiku { @(default_calling_convention="c") foreign libc { // 7.27.2 Time manipulation functions diff --git a/core/c/libc/wctype.odin b/core/c/libc/wctype.odin index 43aee9dc6..cbce220d4 100644 --- a/core/c/libc/wctype.odin +++ b/core/c/libc/wctype.odin @@ -29,7 +29,11 @@ when ODIN_OS == .Windows { } else when ODIN_OS == .FreeBSD { wctrans_t :: distinct int wctype_t :: distinct ulong - + +} else when ODIN_OS == .Haiku { + wctrans_t :: distinct i32 + wctype_t :: distinct i32 + } @(default_calling_convention="c") diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index e3e7a2bb5..02803f882 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -2814,10 +2814,10 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { value := runtime.map_cell_index_dynamic(vs, info.map_info.vs, bucket_index) fmt_arg(&Info{writer = fi.writer}, any{rawptr(key), info.key.id}, verb) - if verb == 'v' { - io.write_string(fi.writer, "=", &fi.n) - } else { + if hash { io.write_string(fi.writer, " = ", &fi.n) + } else { + io.write_string(fi.writer, "=", &fi.n) } fmt_arg(fi, any{rawptr(value), info.value.id}, verb) diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin index 35c95f465..03623e7f2 100644 --- a/core/math/big/internal.odin +++ b/core/math/big/internal.odin @@ -1181,28 +1181,18 @@ internal_cmp_digit :: internal_compare_digit */ internal_int_compare_magnitude :: #force_inline proc(a, b: ^Int) -> (comparison: int) { assert_if_nil(a, b) - /* - Compare based on used digits. - */ + + // Compare based on used digits. if a.used != b.used { - if a.used > b.used { - return +1 - } - return -1 + return +1 if a.used > b.used else -1 } - /* - Same number of used digits, compare based on their value. - */ + // Same number of used digits, compare based on their value. #no_bounds_check for n := a.used - 1; n >= 0; n -= 1 { if a.digit[n] != b.digit[n] { - if a.digit[n] > b.digit[n] { - return +1 - } - return -1 + return +1 if a.digit[n] > b.digit[n] else -1 } } - return 0 } internal_compare_magnitude :: proc { internal_int_compare_magnitude, } diff --git a/core/math/big/private.odin b/core/math/big/private.odin index d045b4239..2ee6cfafa 100644 --- a/core/math/big/private.odin +++ b/core/math/big/private.odin @@ -1,3402 +1,3355 @@ -/* - Copyright 2021 Jeroen van Rijn . - Made available under Odin's BSD-3 license. - - An arbitrary precision mathematics implementation in Odin. - For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. - The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. - - ============================= Private procedures ============================= - - Private procedures used by the above low-level routines follow. - - Don't call these yourself unless you really know what you're doing. - They include implementations that are optimimal for certain ranges of input only. - - These aren't exported for the same reasons. -*/ - - -package math_big - -import "base:intrinsics" -import "core:mem" - -/* - Multiplies |a| * |b| and only computes upto digs digits of result. - HAC pp. 595, Algorithm 14.12 Modified so you can control how - many digits of output are created. -*/ -_private_int_mul :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - /* - Can we use the fast multiplier? - */ - if digits < _WARRAY && min(a.used, b.used) < _MAX_COMBA { - return #force_inline _private_int_mul_comba(dest, a, b, digits) - } - - /* - Set up temporary output `Int`, which we'll swap for `dest` when done. - */ - - t := &Int{} - - internal_grow(t, max(digits, _DEFAULT_DIGIT_COUNT)) or_return - t.used = digits - - /* - Compute the digits of the product directly. - */ - pa := a.used - for ix := 0; ix < pa; ix += 1 { - /* - Limit ourselves to `digits` DIGITs of output. - */ - pb := min(b.used, digits - ix) - carry := _WORD(0) - iy := 0 - - /* - Compute the column of the output and propagate the carry. - */ - #no_bounds_check for iy = 0; iy < pb; iy += 1 { - /* - Compute the column as a _WORD. - */ - column := _WORD(t.digit[ix + iy]) + _WORD(a.digit[ix]) * _WORD(b.digit[iy]) + carry - - /* - The new column is the lower part of the result. - */ - t.digit[ix + iy] = DIGIT(column & _WORD(_MASK)) - - /* - Get the carry word from the result. - */ - carry = column >> _DIGIT_BITS - } - /* - Set carry if it is placed below digits - */ - if ix + iy < digits { - t.digit[ix + pb] = DIGIT(carry) - } - } - - internal_swap(dest, t) - internal_destroy(t) - return internal_clamp(dest) -} - - -/* - Multiplication using the Toom-Cook 3-way algorithm. - - Much more complicated than Karatsuba but has a lower asymptotic running time of O(N**1.464). - This algorithm is only particularly useful on VERY large inputs. - (We're talking 1000s of digits here...). - - This file contains code from J. Arndt's book "Matters Computational" - and the accompanying FXT-library with permission of the author. - - Setup from: - Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae." - 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007. - - The interpolation from above needed one temporary variable more than the interpolation here: - - Bodrato, Marco, and Alberto Zanoni. "What about Toom-Cook matrices optimality." - Centro Vito Volterra Universita di Roma Tor Vergata (2006) -*/ -_private_int_mul_toom :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - S1, S2, T1, a0, a1, a2, b0, b1, b2 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(S1, S2, T1, a0, a1, a2, b0, b1, b2) - - /* - Init temps. - */ - internal_init_multi(S1, S2, T1) or_return - - /* - B - */ - B := min(a.used, b.used) / 3 - - /* - a = a2 * x^2 + a1 * x + a0; - */ - internal_grow(a0, B) or_return - internal_grow(a1, B) or_return - internal_grow(a2, a.used - 2 * B) or_return - - a0.used, a1.used = B, B - a2.used = a.used - 2 * B - - internal_copy_digits(a0, a, a0.used) or_return - internal_copy_digits(a1, a, a1.used, B) or_return - internal_copy_digits(a2, a, a2.used, 2 * B) or_return - - internal_clamp(a0) - internal_clamp(a1) - internal_clamp(a2) - - /* - b = b2 * x^2 + b1 * x + b0; - */ - internal_grow(b0, B) or_return - internal_grow(b1, B) or_return - internal_grow(b2, b.used - 2 * B) or_return - - b0.used, b1.used = B, B - b2.used = b.used - 2 * B - - internal_copy_digits(b0, b, b0.used) or_return - internal_copy_digits(b1, b, b1.used, B) or_return - internal_copy_digits(b2, b, b2.used, 2 * B) or_return - - internal_clamp(b0) - internal_clamp(b1) - internal_clamp(b2) - - - /* - \\ S1 = (a2+a1+a0) * (b2+b1+b0); - */ - internal_add(T1, a2, a1) or_return /* T1 = a2 + a1; */ - internal_add(S2, T1, a0) or_return /* S2 = T1 + a0; */ - internal_add(dest, b2, b1) or_return /* dest = b2 + b1; */ - internal_add(S1, dest, b0) or_return /* S1 = c + b0; */ - internal_mul(S1, S1, S2) or_return /* S1 = S1 * S2; */ - - /* - \\S2 = (4*a2+2*a1+a0) * (4*b2+2*b1+b0); - */ - internal_add(T1, T1, a2) or_return /* T1 = T1 + a2; */ - internal_int_shl1(T1, T1) or_return /* T1 = T1 << 1; */ - internal_add(T1, T1, a0) or_return /* T1 = T1 + a0; */ - internal_add(dest, dest, b2) or_return /* c = c + b2; */ - internal_int_shl1(dest, dest) or_return /* c = c << 1; */ - internal_add(dest, dest, b0) or_return /* c = c + b0; */ - internal_mul(S2, T1, dest) or_return /* S2 = T1 * c; */ - - /* - \\S3 = (a2-a1+a0) * (b2-b1+b0); - */ - internal_sub(a1, a2, a1) or_return /* a1 = a2 - a1; */ - internal_add(a1, a1, a0) or_return /* a1 = a1 + a0; */ - internal_sub(b1, b2, b1) or_return /* b1 = b2 - b1; */ - internal_add(b1, b1, b0) or_return /* b1 = b1 + b0; */ - internal_mul(a1, a1, b1) or_return /* a1 = a1 * b1; */ - internal_mul(b1, a2, b2) or_return /* b1 = a2 * b2; */ - - /* - \\S2 = (S2 - S3) / 3; - */ - internal_sub(S2, S2, a1) or_return /* S2 = S2 - a1; */ - _private_int_div_3(S2, S2) or_return /* S2 = S2 / 3; \\ this is an exact division */ - internal_sub(a1, S1, a1) or_return /* a1 = S1 - a1; */ - internal_int_shr1(a1, a1) or_return /* a1 = a1 >> 1; */ - internal_mul(a0, a0, b0) or_return /* a0 = a0 * b0; */ - internal_sub(S1, S1, a0) or_return /* S1 = S1 - a0; */ - internal_sub(S2, S2, S1) or_return /* S2 = S2 - S1; */ - internal_int_shr1(S2, S2) or_return /* S2 = S2 >> 1; */ - internal_sub(S1, S1, a1) or_return /* S1 = S1 - a1; */ - internal_sub(S1, S1, b1) or_return /* S1 = S1 - b1; */ - internal_int_shl1(T1, b1) or_return /* T1 = b1 << 1; */ - internal_sub(S2, S2, T1) or_return /* S2 = S2 - T1; */ - internal_sub(a1, a1, S2) or_return /* a1 = a1 - S2; */ - - /* - P = b1*x^4+ S2*x^3+ S1*x^2+ a1*x + a0; - */ - _private_int_shl_leg(b1, 4 * B) or_return - _private_int_shl_leg(S2, 3 * B) or_return - internal_add(b1, b1, S2) or_return - _private_int_shl_leg(S1, 2 * B) or_return - internal_add(b1, b1, S1) or_return - _private_int_shl_leg(a1, 1 * B) or_return - internal_add(b1, b1, a1) or_return - internal_add(dest, b1, a0) or_return - - /* - a * b - P - */ - return nil -} - -/* - product = |a| * |b| using Karatsuba Multiplication using three half size multiplications. - - Let `B` represent the radix [e.g. 2**_DIGIT_BITS] and let `n` represent - half of the number of digits in the min(a,b) - - `a` = `a1` * `B`**`n` + `a0` - `b` = `b`1 * `B`**`n` + `b0` - - Then, a * b => 1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 - - Note that a1b1 and a0b0 are used twice and only need to be computed once. - So in total three half size (half # of digit) multiplications are performed, - a0b0, a1b1 and (a1+b1)(a0+b0) - - Note that a multiplication of half the digits requires 1/4th the number of - single precision multiplications, so in total after one call 25% of the - single precision multiplications are saved. - - Note also that the call to `internal_mul` can end up back in this function - if the a0, a1, b0, or b1 are above the threshold. - - This is known as divide-and-conquer and leads to the famous O(N**lg(3)) or O(N**1.584) - work which is asymptopically lower than the standard O(N**2) that the - baseline/comba methods use. Generally though, the overhead of this method doesn't pay off - until a certain size is reached, of around 80 used DIGITs. -*/ -_private_int_mul_karatsuba :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - x0, x1, y0, y1, t1, x0y0, x1y1 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(x0, x1, y0, y1, t1, x0y0, x1y1) - - /* - min # of digits, divided by two. - */ - B := min(a.used, b.used) >> 1 - - /* - Init all the temps. - */ - internal_grow(x0, B) or_return - internal_grow(x1, a.used - B) or_return - internal_grow(y0, B) or_return - internal_grow(y1, b.used - B) or_return - internal_grow(t1, B * 2) or_return - internal_grow(x0y0, B * 2) or_return - internal_grow(x1y1, B * 2) or_return - - /* - Now shift the digits. - */ - x0.used, y0.used = B, B - x1.used = a.used - B - y1.used = b.used - B - - /* - We copy the digits directly instead of using higher level functions - since we also need to shift the digits. - */ - internal_copy_digits(x0, a, x0.used) - internal_copy_digits(y0, b, y0.used) - internal_copy_digits(x1, a, x1.used, B) - internal_copy_digits(y1, b, y1.used, B) - - /* - Only need to clamp the lower words since by definition the - upper words x1/y1 must have a known number of digits. - */ - clamp(x0) - clamp(y0) - - /* - Now calc the products x0y0 and x1y1, - after this x0 is no longer required, free temp [x0==t2]! - */ - internal_mul(x0y0, x0, y0) or_return /* x0y0 = x0*y0 */ - internal_mul(x1y1, x1, y1) or_return /* x1y1 = x1*y1 */ - internal_add(t1, x1, x0) or_return /* now calc x1+x0 and */ - internal_add(x0, y1, y0) or_return /* t2 = y1 + y0 */ - internal_mul(t1, t1, x0) or_return /* t1 = (x1 + x0) * (y1 + y0) */ - - /* - Add x0y0. - */ - internal_add(x0, x0y0, x1y1) or_return /* t2 = x0y0 + x1y1 */ - internal_sub(t1, t1, x0) or_return /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ - - /* - shift by B. - */ - _private_int_shl_leg(t1, B) or_return /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))< (err: Error) { - context.allocator = allocator - - /* - Set up array. - */ - W: [_WARRAY]DIGIT = --- - - /* - Grow the destination as required. - */ - internal_grow(dest, digits) or_return - - /* - Number of output digits to produce. - */ - pa := min(digits, a.used + b.used) - - /* - Clear the carry - */ - _W := _WORD(0) - - ix: int - for ix = 0; ix < pa; ix += 1 { - tx, ty, iy, iz: int - - /* - Get offsets into the two bignums. - */ - ty = min(b.used - 1, ix) - tx = ix - ty - - /* - This is the number of times the loop will iterate, essentially. - while (tx++ < a->used && ty-- >= 0) { ... } - */ - - iy = min(a.used - tx, ty + 1) - - /* - Execute loop. - */ - #no_bounds_check for iz = 0; iz < iy; iz += 1 { - _W += _WORD(a.digit[tx + iz]) * _WORD(b.digit[ty - iz]) - } - - /* - Store term. - */ - W[ix] = DIGIT(_W) & _MASK - - /* - Make next carry. - */ - _W = _W >> _WORD(_DIGIT_BITS) - } - - /* - Setup dest. - */ - old_used := dest.used - dest.used = pa - - /* - Now extract the previous digit [below the carry]. - */ - copy_slice(dest.digit[0:], W[:pa]) - - /* - Clear unused digits [that existed in the old copy of dest]. - */ - internal_zero_unused(dest, old_used) - - /* - Adjust dest.used based on leading zeroes. - */ - - return internal_clamp(dest) -} - -/* - Multiplies |a| * |b| and does not compute the lower digs digits - [meant to get the higher part of the product] -*/ -_private_int_mul_high :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - /* - Can we use the fast multiplier? - */ - if a.used + b.used + 1 < _WARRAY && min(a.used, b.used) < _MAX_COMBA { - return _private_int_mul_high_comba(dest, a, b, digits) - } - - internal_grow(dest, a.used + b.used + 1) or_return - dest.used = a.used + b.used + 1 - - pa := a.used - pb := b.used - for ix := 0; ix < pa; ix += 1 { - carry := DIGIT(0) - - for iy := digits - ix; iy < pb; iy += 1 { - /* - Calculate the double precision result. - */ - r := _WORD(dest.digit[ix + iy]) + _WORD(a.digit[ix]) * _WORD(b.digit[iy]) + _WORD(carry) - - /* - Get the lower part. - */ - dest.digit[ix + iy] = DIGIT(r & _WORD(_MASK)) - - /* - Carry the carry. - */ - carry = DIGIT(r >> _WORD(_DIGIT_BITS)) - } - dest.digit[ix + pb] = carry - } - return internal_clamp(dest) -} - -/* - This is a modified version of `_private_int_mul_comba` that only produces output digits *above* `digits`. - See the comments for `_private_int_mul_comba` to see how it works. - - This is used in the Barrett reduction since for one of the multiplications - only the higher digits were needed. This essentially halves the work. - - Based on Algorithm 14.12 on pp.595 of HAC. -*/ -_private_int_mul_high_comba :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - W: [_WARRAY]DIGIT = --- - _W: _WORD = 0 - - /* - Number of output digits to produce. Grow the destination as required. - */ - pa := a.used + b.used - internal_grow(dest, pa) or_return - - ix: int - for ix = digits; ix < pa; ix += 1 { - /* - Get offsets into the two bignums. - */ - ty := min(b.used - 1, ix) - tx := ix - ty - - /* - This is the number of times the loop will iterrate, essentially it's - while (tx++ < a->used && ty-- >= 0) { ... } - */ - iy := min(a.used - tx, ty + 1) - - /* - Execute loop. - */ - for iz := 0; iz < iy; iz += 1 { - _W += _WORD(a.digit[tx + iz]) * _WORD(b.digit[ty - iz]) - } - - /* - Store term. - */ - W[ix] = DIGIT(_W) & DIGIT(_MASK) - - /* - Make next carry. - */ - _W = _W >> _WORD(_DIGIT_BITS) - } - - /* - Setup dest - */ - old_used := dest.used - dest.used = pa - - for ix = digits; ix < pa; ix += 1 { - /* - Now extract the previous digit [below the carry]. - */ - dest.digit[ix] = W[ix] - } - - /* - Zero remainder. - */ - internal_zero_unused(dest, old_used) - - /* - Adjust dest.used based on leading zeroes. - */ - return internal_clamp(dest) -} - -/* - Single-digit multiplication with the smaller number as the single-digit. -*/ -_private_int_mul_balance :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - a, b := a, b - - a0, tmp, r := &Int{}, &Int{}, &Int{} - defer internal_destroy(a0, tmp, r) - - b_size := min(a.used, b.used) - n_blocks := max(a.used, b.used) / b_size - - internal_grow(a0, b_size + 2) or_return - internal_init_multi(tmp, r) or_return - - /* - Make sure that `a` is the larger one. - */ - if a.used < b.used { - a, b = b, a - } - assert(a.used >= b.used) - - i, j := 0, 0 - for ; i < n_blocks; i += 1 { - /* - Cut a slice off of `a`. - */ - - a0.used = b_size - internal_copy_digits(a0, a, a0.used, j) - j += a0.used - internal_clamp(a0) - - /* - Multiply with `b`. - */ - internal_mul(tmp, a0, b) or_return - - /* - Shift `tmp` to the correct position. - */ - _private_int_shl_leg(tmp, b_size * i) or_return - - /* - Add to output. No carry needed. - */ - internal_add(r, r, tmp) or_return - } - - /* - The left-overs; there are always left-overs. - */ - if j < a.used { - a0.used = a.used - j - internal_copy_digits(a0, a, a0.used, j) - j += a0.used - internal_clamp(a0) - - internal_mul(tmp, a0, b) or_return - _private_int_shl_leg(tmp, b_size * i) or_return - internal_add(r, r, tmp) or_return - } - - internal_swap(dest, r) - return -} - -/* - Low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 - Assumes `dest` and `src` to not be `nil`, and `src` to have been initialized. -*/ -_private_int_sqr :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - pa := src.used - - t := &Int{}; ix, iy: int - /* - Grow `t` to maximum needed size, or `_DEFAULT_DIGIT_COUNT`, whichever is bigger. - */ - internal_grow(t, max((2 * pa) + 1, _DEFAULT_DIGIT_COUNT)) or_return - t.used = (2 * pa) + 1 - - #no_bounds_check for ix = 0; ix < pa; ix += 1 { - carry := DIGIT(0) - /* - First calculate the digit at 2*ix; calculate double precision result. - */ - r := _WORD(t.digit[ix+ix]) + (_WORD(src.digit[ix]) * _WORD(src.digit[ix])) - - /* - Store lower part in result. - */ - t.digit[ix+ix] = DIGIT(r & _WORD(_MASK)) - /* - Get the carry. - */ - carry = DIGIT(r >> _DIGIT_BITS) - - #no_bounds_check for iy = ix + 1; iy < pa; iy += 1 { - /* - First calculate the product. - */ - r = _WORD(src.digit[ix]) * _WORD(src.digit[iy]) - - /* Now calculate the double precision result. NĂ³te we use - * addition instead of *2 since it's easier to optimize - */ - r = _WORD(t.digit[ix+iy]) + r + r + _WORD(carry) - - /* - Store lower part. - */ - t.digit[ix+iy] = DIGIT(r & _WORD(_MASK)) - - /* - Get carry. - */ - carry = DIGIT(r >> _DIGIT_BITS) - } - /* - Propagate upwards. - */ - #no_bounds_check for carry != 0 { - r = _WORD(t.digit[ix+iy]) + _WORD(carry) - t.digit[ix+iy] = DIGIT(r & _WORD(_MASK)) - carry = DIGIT(r >> _WORD(_DIGIT_BITS)) - iy += 1 - } - } - - err = internal_clamp(t) - internal_swap(dest, t) - internal_destroy(t) - return err -} - -/* - The jist of squaring... - You do like mult except the offset of the tmpx [one that starts closer to zero] can't equal the offset of tmpy. - So basically you set up iy like before then you min it with (ty-tx) so that it never happens. - You double all those you add in the inner loop. After that loop you do the squares and add them in. - - Assumes `dest` and `src` not to be `nil` and `src` to have been initialized. -*/ -_private_int_sqr_comba :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - W: [_WARRAY]DIGIT = --- - - /* - Grow the destination as required. - */ - pa := uint(src.used) + uint(src.used) - internal_grow(dest, int(pa)) or_return - - /* - Number of output digits to produce. - */ - W1 := _WORD(0) - _W : _WORD = --- - ix := uint(0) - - #no_bounds_check for ; ix < pa; ix += 1 { - /* - Clear counter. - */ - _W = {} - - /* - Get offsets into the two bignums. - */ - ty := min(uint(src.used) - 1, ix) - tx := ix - ty - - /* - This is the number of times the loop will iterate, - essentially while (tx++ < a->used && ty-- >= 0) { ... } - */ - iy := min(uint(src.used) - tx, ty + 1) - - /* - Now for squaring, tx can never equal ty. - We halve the distance since they approach at a rate of 2x, - and we have to round because odd cases need to be executed. - */ - iy = min(iy, ((ty - tx) + 1) >> 1 ) - - /* - Execute loop. - */ - #no_bounds_check for iz := uint(0); iz < iy; iz += 1 { - _W += _WORD(src.digit[tx + iz]) * _WORD(src.digit[ty - iz]) - } - - /* - Double the inner product and add carry. - */ - _W = _W + _W + W1 - - /* - Even columns have the square term in them. - */ - if ix & 1 == 0 { - _W += _WORD(src.digit[ix >> 1]) * _WORD(src.digit[ix >> 1]) - } - - /* - Store it. - */ - W[ix] = DIGIT(_W & _WORD(_MASK)) - - /* - Make next carry. - */ - W1 = _W >> _DIGIT_BITS - } - - /* - Setup dest. - */ - old_used := dest.used - dest.used = src.used + src.used - - #no_bounds_check for ix = 0; ix < pa; ix += 1 { - dest.digit[ix] = W[ix] & _MASK - } - - /* - Clear unused digits [that existed in the old copy of dest]. - */ - internal_zero_unused(dest, old_used) - - return internal_clamp(dest) -} - -/* - Karatsuba squaring, computes `dest` = `src` * `src` using three half-size squarings. - - See comments of `_private_int_mul_karatsuba` for details. - It is essentially the same algorithm but merely tuned to perform recursive squarings. -*/ -_private_int_sqr_karatsuba :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - x0, x1, t1, t2, x0x0, x1x1 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(x0, x1, t1, t2, x0x0, x1x1) - - /* - Min # of digits, divided by two. - */ - B := src.used >> 1 - - /* - Init temps. - */ - internal_grow(x0, B) or_return - internal_grow(x1, src.used - B) or_return - internal_grow(t1, src.used * 2) or_return - internal_grow(t2, src.used * 2) or_return - internal_grow(x0x0, B * 2 ) or_return - internal_grow(x1x1, (src.used - B) * 2) or_return - - /* - Now shift the digits. - */ - x0.used = B - x1.used = src.used - B - - #force_inline internal_copy_digits(x0, src, x0.used) - #force_inline mem.copy_non_overlapping(&x1.digit[0], &src.digit[B], size_of(DIGIT) * x1.used) - #force_inline internal_clamp(x0) - - /* - Now calc the products x0*x0 and x1*x1. - */ - internal_sqr(x0x0, x0) or_return - internal_sqr(x1x1, x1) or_return - - /* - Now calc (x1+x0)^2 - */ - internal_add(t1, x0, x1) or_return - internal_sqr(t1, t1) or_return - - /* - Add x0y0 - */ - internal_add(t2, x0x0, x1x1) or_return - internal_sub(t1, t1, t2) or_return - - /* - Shift by B. - */ - _private_int_shl_leg(t1, B) or_return - _private_int_shl_leg(x1x1, B * 2) or_return - internal_add(t1, t1, x0x0) or_return - internal_add(dest, t1, x1x1) or_return - - return #force_inline internal_clamp(dest) -} - -/* - Squaring using Toom-Cook 3-way algorithm. - - Setup and interpolation from algorithm SQR_3 in Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae." - 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007. -*/ -_private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(S0, a0, a1, a2) - - /* - Init temps. - */ - internal_zero(S0) or_return - - /* - B - */ - B := src.used / 3 - - /* - a = a2 * x^2 + a1 * x + a0; - */ - internal_grow(a0, B) or_return - internal_grow(a1, B) or_return - internal_grow(a2, src.used - (2 * B)) or_return - - a0.used = B - a1.used = B - a2.used = src.used - 2 * B - - #force_inline mem.copy_non_overlapping(&a0.digit[0], &src.digit[ 0], size_of(DIGIT) * a0.used) - #force_inline mem.copy_non_overlapping(&a1.digit[0], &src.digit[ B], size_of(DIGIT) * a1.used) - #force_inline mem.copy_non_overlapping(&a2.digit[0], &src.digit[2 * B], size_of(DIGIT) * a2.used) - - internal_clamp(a0) - internal_clamp(a1) - internal_clamp(a2) - - /** S0 = a0^2; */ - internal_sqr(S0, a0) or_return - - /** \\S1 = (a2 + a1 + a0)^2 */ - /** \\S2 = (a2 - a1 + a0)^2 */ - /** \\S1 = a0 + a2; */ - /** a0 = a0 + a2; */ - internal_add(a0, a0, a2) or_return - /** \\S2 = S1 - a1; */ - /** b = a0 - a1; */ - internal_sub(dest, a0, a1) or_return - /** \\S1 = S1 + a1; */ - /** a0 = a0 + a1; */ - internal_add(a0, a0, a1) or_return - /** \\S1 = S1^2; */ - /** a0 = a0^2; */ - internal_sqr(a0, a0) or_return - /** \\S2 = S2^2; */ - /** b = b^2; */ - internal_sqr(dest, dest) or_return - /** \\ S3 = 2 * a1 * a2 */ - /** \\S3 = a1 * a2; */ - /** a1 = a1 * a2; */ - internal_mul(a1, a1, a2) or_return - /** \\S3 = S3 << 1; */ - /** a1 = a1 << 1; */ - internal_shl(a1, a1, 1) or_return - /** \\S4 = a2^2; */ - /** a2 = a2^2; */ - internal_sqr(a2, a2) or_return - /** \\ tmp = (S1 + S2)/2 */ - /** \\tmp = S1 + S2; */ - /** b = a0 + b; */ - internal_add(dest, a0, dest) or_return - /** \\tmp = tmp >> 1; */ - /** b = b >> 1; */ - internal_shr(dest, dest, 1) or_return - /** \\ S1 = S1 - tmp - S3 */ - /** \\S1 = S1 - tmp; */ - /** a0 = a0 - b; */ - internal_sub(a0, a0, dest) or_return - /** \\S1 = S1 - S3; */ - /** a0 = a0 - a1; */ - internal_sub(a0, a0, a1) or_return - /** \\S2 = tmp - S4 -S0 */ - /** \\S2 = tmp - S4; */ - /** b = b - a2; */ - internal_sub(dest, dest, a2) or_return - /** \\S2 = S2 - S0; */ - /** b = b - S0; */ - internal_sub(dest, dest, S0) or_return - /** \\P = S4*x^4 + S3*x^3 + S2*x^2 + S1*x + S0; */ - /** P = a2*x^4 + a1*x^3 + b*x^2 + a0*x + S0; */ - _private_int_shl_leg( a2, 4 * B) or_return - _private_int_shl_leg( a1, 3 * B) or_return - _private_int_shl_leg(dest, 2 * B) or_return - _private_int_shl_leg( a0, 1 * B) or_return - - internal_add(a2, a2, a1) or_return - internal_add(dest, dest, a2) or_return - internal_add(dest, dest, a0) or_return - internal_add(dest, dest, S0) or_return - /** a^2 - P */ - - return #force_inline internal_clamp(dest) -} - -/* - Divide by three (based on routine from MPI and the GMP manual). -*/ -_private_int_div_3 :: proc(quotient, numerator: ^Int, allocator := context.allocator) -> (remainder: DIGIT, err: Error) { - context.allocator = allocator - - /* - b = 2^_DIGIT_BITS / 3 - */ - b := _WORD(1) << _WORD(_DIGIT_BITS) / _WORD(3) - - q := &Int{} - internal_grow(q, numerator.used) or_return - q.used = numerator.used - q.sign = numerator.sign - - w, t: _WORD - #no_bounds_check for ix := numerator.used; ix >= 0; ix -= 1 { - w = (w << _WORD(_DIGIT_BITS)) | _WORD(numerator.digit[ix]) - if w >= 3 { - /* - Multiply w by [1/3]. - */ - t = (w * b) >> _WORD(_DIGIT_BITS) - - /* - Now subtract 3 * [w/3] from w, to get the remainder. - */ - w -= t+t+t - - /* - Fixup the remainder as required since the optimization is not exact. - */ - for w >= 3 { - t += 1 - w -= 3 - } - } else { - t = 0 - } - q.digit[ix] = DIGIT(t) - } - remainder = DIGIT(w) - - /* - [optional] store the quotient. - */ - if quotient != nil { - err = clamp(q) - internal_swap(q, quotient) - } - internal_destroy(q) - return remainder, nil -} - -/* - Signed Integer Division - - c*b + d == a [i.e. a/b, c=quotient, d=remainder], HAC pp.598 Algorithm 14.20 - - Note that the description in HAC is horribly incomplete. - For example, it doesn't consider the case where digits are removed from 'x' in - the inner loop. - - It also doesn't consider the case that y has fewer than three digits, etc. - The overall algorithm is as described as 14.20 from HAC but fixed to treat these cases. -*/ -_private_int_div_school :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - error_if_immutable(quotient, remainder) or_return - - q, x, y, t1, t2 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(q, x, y, t1, t2) - - internal_grow(q, numerator.used + 2) or_return - q.used = numerator.used + 2 - - internal_init_multi(t1, t2) or_return - internal_copy(x, numerator) or_return - internal_copy(y, denominator) or_return - - /* - Fix the sign. - */ - neg := numerator.sign != denominator.sign - x.sign = .Zero_or_Positive - y.sign = .Zero_or_Positive - - /* - Normalize both x and y, ensure that y >= b/2, [b == 2**MP_DIGIT_BIT] - */ - norm := internal_count_bits(y) % _DIGIT_BITS - - if norm < _DIGIT_BITS - 1 { - norm = (_DIGIT_BITS - 1) - norm - internal_shl(x, x, norm) or_return - internal_shl(y, y, norm) or_return - } else { - norm = 0 - } - - /* - Note: HAC does 0 based, so if used==5 then it's 0,1,2,3,4, i.e. use 4 - */ - n := x.used - 1 - t := y.used - 1 - - /* - while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } - y = y*b**{n-t} - */ - - _private_int_shl_leg(y, n - t) or_return - - gte := internal_gte(x, y) - for gte { - q.digit[n - t] += 1 - internal_sub(x, x, y) or_return - gte = internal_gte(x, y) - } - - /* - Reset y by shifting it back down. - */ - _private_int_shr_leg(y, n - t) - - /* - Step 3. for i from n down to (t + 1). - */ - #no_bounds_check for i := n; i >= (t + 1); i -= 1 { - if i > x.used { continue } - - /* - step 3.1 if xi == yt then set q{i-t-1} to b-1, otherwise set q{i-t-1} to (xi*b + x{i-1})/yt - */ - if x.digit[i] == y.digit[t] { - q.digit[(i - t) - 1] = 1 << (_DIGIT_BITS - 1) - } else { - - tmp := _WORD(x.digit[i]) << _DIGIT_BITS - tmp |= _WORD(x.digit[i - 1]) - tmp /= _WORD(y.digit[t]) - if tmp > _WORD(_MASK) { - tmp = _WORD(_MASK) - } - q.digit[(i - t) - 1] = DIGIT(tmp & _WORD(_MASK)) - } - - /* while (q{i-t-1} * (yt * b + y{t-1})) > - xi * b**2 + xi-1 * b + xi-2 - - do q{i-t-1} -= 1; - */ - - iter := 0 - - q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] + 1) & _MASK - #no_bounds_check for { - q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] - 1) & _MASK - - /* - Find left hand. - */ - internal_zero(t1) - t1.digit[0] = ((t - 1) < 0) ? 0 : y.digit[t - 1] - t1.digit[1] = y.digit[t] - t1.used = 2 - internal_mul(t1, t1, q.digit[(i - t) - 1]) or_return - - /* - Find right hand. - */ - t2.digit[0] = ((i - 2) < 0) ? 0 : x.digit[i - 2] - t2.digit[1] = x.digit[i - 1] /* i >= 1 always holds */ - t2.digit[2] = x.digit[i] - t2.used = 3 - - if internal_lte(t1, t2) { - break - } - iter += 1; if iter > 100 { - return .Max_Iterations_Reached - } - } - - /* - Step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} - */ - int_mul_digit(t1, y, q.digit[(i - t) - 1]) or_return - _private_int_shl_leg(t1, (i - t) - 1) or_return - internal_sub(x, x, t1) or_return - - /* - if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } - */ - if x.sign == .Negative { - internal_copy(t1, y) or_return - _private_int_shl_leg(t1, (i - t) - 1) or_return - internal_add(x, x, t1) or_return - - q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] - 1) & _MASK - } - } - - /* - Now q is the quotient and x is the remainder, [which we have to normalize] - Get sign before writing to c. - */ - z, _ := is_zero(x) - x.sign = .Zero_or_Positive if z else numerator.sign - - if quotient != nil { - internal_clamp(q) - internal_swap(q, quotient) - quotient.sign = .Negative if neg else .Zero_or_Positive - } - - if remainder != nil { - internal_shr(x, x, norm) or_return - internal_swap(x, remainder) - } - - return nil -} - -/* - Direct implementation of algorithms 1.8 "RecursiveDivRem" and 1.9 "UnbalancedDivision" from: - - Brent, Richard P., and Paul Zimmermann. "Modern computer arithmetic" - Vol. 18. Cambridge University Press, 2010 - Available online at https://arxiv.org/pdf/1004.4710 - - pages 19ff. in the above online document. -*/ -_private_div_recursion :: proc(quotient, remainder, a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - A1, A2, B1, B0, Q1, Q0, R1, R0, t := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(A1, A2, B1, B0, Q1, Q0, R1, R0, t) - - m := a.used - b.used - k := m / 2 - - if m < MUL_KARATSUBA_CUTOFF { - return _private_int_div_school(quotient, remainder, a, b) - } - - internal_init_multi(A1, A2, B1, B0, Q1, Q0, R1, R0, t) or_return - - /* - `B1` = `b` / `beta`^`k`, `B0` = `b` % `beta`^`k` - */ - internal_shrmod(B1, B0, b, k * _DIGIT_BITS) or_return - - /* - (Q1, R1) = RecursiveDivRem(A / beta^(2k), B1) - */ - internal_shrmod(A1, t, a, 2 * k * _DIGIT_BITS) or_return - _private_div_recursion(Q1, R1, A1, B1) or_return - - /* - A1 = (R1 * beta^(2k)) + (A % beta^(2k)) - (Q1 * B0 * beta^k) - */ - _private_int_shl_leg(R1, 2 * k) or_return - internal_add(A1, R1, t) or_return - internal_mul(t, Q1, B0) or_return - - /* - While A1 < 0 do Q1 = Q1 - 1, A1 = A1 + (beta^k * B) - */ - if internal_lt(A1, 0) { - internal_shl(t, b, k * _DIGIT_BITS) or_return - - for { - internal_decr(Q1) or_return - internal_add(A1, A1, t) or_return - if internal_gte(A1, 0) { break } - } - } - - /* - (Q0, R0) = RecursiveDivRem(A1 / beta^(k), B1) - */ - internal_shrmod(A1, t, A1, k * _DIGIT_BITS) or_return - _private_div_recursion(Q0, R0, A1, B1) or_return - - /* - A2 = (R0*beta^k) + (A1 % beta^k) - (Q0*B0) - */ - _private_int_shl_leg(R0, k) or_return - internal_add(A2, R0, t) or_return - internal_mul(t, Q0, B0) or_return - internal_sub(A2, A2, t) or_return - - /* - While A2 < 0 do Q0 = Q0 - 1, A2 = A2 + B. - */ - for internal_is_negative(A2) { // internal_lt(A2, 0) { - internal_decr(Q0) or_return - internal_add(A2, A2, b) or_return - } - - /* - Return q = (Q1*beta^k) + Q0, r = A2. - */ - _private_int_shl_leg(Q1, k) or_return - internal_add(quotient, Q1, Q0) or_return - - return internal_copy(remainder, A2) -} - -_private_int_div_recursive :: proc(quotient, remainder, a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - A, B, Q, Q1, R, A_div, A_mod := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(A, B, Q, Q1, R, A_div, A_mod) - - internal_init_multi(A, B, Q, Q1, R, A_div, A_mod) or_return - - /* - Most significant bit of a limb. - Assumes _DIGIT_MAX < (sizeof(DIGIT) * sizeof(u8)). - */ - msb := (_DIGIT_MAX + DIGIT(1)) >> 1 - sigma := 0 - msb_b := b.digit[b.used - 1] - for msb_b < msb { - sigma += 1 - msb_b <<= 1 - } - - /* - Use that sigma to normalize B. - */ - internal_shl(B, b, sigma) or_return - internal_shl(A, a, sigma) or_return - - /* - Fix the sign. - */ - neg := a.sign != b.sign - A.sign = .Zero_or_Positive; B.sign = .Zero_or_Positive - - /* - If the magnitude of "A" is not more more than twice that of "B" we can work - on them directly, otherwise we need to work at "A" in chunks. - */ - n := B.used - m := A.used - B.used - - /* - Q = 0. We already ensured that when we called `internal_init_multi`. - */ - for m > n { - /* - (q, r) = RecursiveDivRem(A / (beta^(m-n)), B) - */ - j := (m - n) * _DIGIT_BITS - internal_shrmod(A_div, A_mod, A, j) or_return - _private_div_recursion(Q1, R, A_div, B) or_return - - /* - Q = (Q*beta!(n)) + q - */ - internal_shl(Q, Q, n * _DIGIT_BITS) or_return - internal_add(Q, Q, Q1) or_return - - /* - A = (r * beta^(m-n)) + (A % beta^(m-n)) - */ - internal_shl(R, R, (m - n) * _DIGIT_BITS) or_return - internal_add(A, R, A_mod) or_return - - /* - m = m - n - */ - m -= n - } - - /* - (q, r) = RecursiveDivRem(A, B) - */ - _private_div_recursion(Q1, R, A, B) or_return - - /* - Q = (Q * beta^m) + q, R = r - */ - internal_shl(Q, Q, m * _DIGIT_BITS) or_return - internal_add(Q, Q, Q1) or_return - - /* - Get sign before writing to dest. - */ - R.sign = .Zero_or_Positive if internal_is_zero(Q) else a.sign - - if quotient != nil { - swap(quotient, Q) - quotient.sign = .Negative if neg else .Zero_or_Positive - } - if remainder != nil { - /* - De-normalize the remainder. - */ - internal_shrmod(R, nil, R, sigma) or_return - swap(remainder, R) - } - return nil -} - -/* - Slower bit-bang division... also smaller. -*/ -@(deprecated="Use `_int_div_school`, it's 3.5x faster.") -_private_int_div_small :: proc(quotient, remainder, numerator, denominator: ^Int) -> (err: Error) { - - ta, tb, tq, q := &Int{}, &Int{}, &Int{}, &Int{} - - defer internal_destroy(ta, tb, tq, q) - - for { - internal_one(tq) or_return - - num_bits, _ := count_bits(numerator) - den_bits, _ := count_bits(denominator) - n := num_bits - den_bits - - abs(ta, numerator) or_return - abs(tb, denominator) or_return - shl(tb, tb, n) or_return - shl(tq, tq, n) or_return - - for n >= 0 { - if internal_gte(ta, tb) { - // ta -= tb - sub(ta, ta, tb) or_return - // q += tq - add( q, q, tq) or_return - } - shr1(tb, tb) or_return - shr1(tq, tq) or_return - - n -= 1 - } - - /* - Now q == quotient and ta == remainder. - */ - neg := numerator.sign != denominator.sign - if quotient != nil { - swap(quotient, q) - z, _ := is_zero(quotient) - quotient.sign = .Negative if neg && !z else .Zero_or_Positive - } - if remainder != nil { - swap(remainder, ta) - z, _ := is_zero(numerator) - remainder.sign = .Zero_or_Positive if z else numerator.sign - } - - break - } - return err -} - - - -/* - Binary split factorial algo due to: http://www.luschny.de/math/factorial/binarysplitfact.html -*/ -_private_int_factorial_binary_split :: proc(res: ^Int, n: int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - inner, outer, start, stop, temp := &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(inner, outer, start, stop, temp) - - internal_one(inner, false) or_return - internal_one(outer, false) or_return - - bits_used := ilog2(n) - - for i := bits_used; i >= 0; i -= 1 { - start := (n >> (uint(i) + 1)) + 1 | 1 - stop := (n >> uint(i)) + 1 | 1 - _private_int_recursive_product(temp, start, stop, 0) or_return - internal_mul(inner, inner, temp) or_return - internal_mul(outer, outer, inner) or_return - } - shift := n - intrinsics.count_ones(n) - - return internal_shl(res, outer, int(shift)) -} - -/* - Recursive product used by binary split factorial algorithm. -*/ -_private_int_recursive_product :: proc(res: ^Int, start, stop: int, level := int(0), allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - t1, t2 := &Int{}, &Int{} - defer internal_destroy(t1, t2) - - if level > FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { - return .Max_Iterations_Reached - } - - num_factors := (stop - start) >> 1 - if num_factors == 2 { - internal_set(t1, start, false) or_return - when true { - internal_grow(t2, t1.used + 1, false) or_return - internal_add(t2, t1, 2) or_return - } else { - internal_add(t2, t1, 2) or_return - } - return internal_mul(res, t1, t2) - } - - if num_factors > 1 { - mid := (start + num_factors) | 1 - _private_int_recursive_product(t1, start, mid, level + 1) or_return - _private_int_recursive_product(t2, mid, stop, level + 1) or_return - return internal_mul(res, t1, t2) - } - - if num_factors == 1 { - return #force_inline internal_set(res, start, true) - } - - return #force_inline internal_one(res, true) -} - -/* - Internal function computing both GCD using the binary method, - and, if target isn't `nil`, also LCM. - - Expects the `a` and `b` to have been initialized - and one or both of `res_gcd` or `res_lcm` not to be `nil`. - - If both `a` and `b` are zero, return zero. - If either `a` or `b`, return the other one. - - The `gcd` and `lcm` wrappers have already done this test, - but `gcd_lcm` wouldn't have, so we still need to perform it. - - If neither result is wanted, we have nothing to do. -*/ -_private_int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - if res_gcd == nil && res_lcm == nil { - return nil - } - - /* - We need a temporary because `res_gcd` is allowed to be `nil`. - */ - if a.used == 0 && b.used == 0 { - /* - GCD(0, 0) and LCM(0, 0) are both 0. - */ - if res_gcd != nil { - internal_zero(res_gcd) or_return - } - if res_lcm != nil { - internal_zero(res_lcm) or_return - } - return nil - } else if a.used == 0 { - /* - We can early out with GCD = B and LCM = 0 - */ - if res_gcd != nil { - internal_abs(res_gcd, b) or_return - } - if res_lcm != nil { - internal_zero(res_lcm) or_return - } - return nil - } else if b.used == 0 { - /* - We can early out with GCD = A and LCM = 0 - */ - if res_gcd != nil { - internal_abs(res_gcd, a) or_return - } - if res_lcm != nil { - internal_zero(res_lcm) or_return - } - return nil - } - - temp_gcd_res := &Int{} - defer internal_destroy(temp_gcd_res) - - /* - If neither `a` or `b` was zero, we need to compute `gcd`. - Get copies of `a` and `b` we can modify. - */ - u, v := &Int{}, &Int{} - defer internal_destroy(u, v) - internal_copy(u, a) or_return - internal_copy(v, b) or_return - - /* - Must be positive for the remainder of the algorithm. - */ - u.sign = .Zero_or_Positive; v.sign = .Zero_or_Positive - - /* - B1. Find the common power of two for `u` and `v`. - */ - u_lsb, _ := internal_count_lsb(u) - v_lsb, _ := internal_count_lsb(v) - k := min(u_lsb, v_lsb) - - if k > 0 { - /* - Divide the power of two out. - */ - internal_shr(u, u, k) or_return - internal_shr(v, v, k) or_return - } - - /* - Divide any remaining factors of two out. - */ - if u_lsb != k { - internal_shr(u, u, u_lsb - k) or_return - } - if v_lsb != k { - internal_shr(v, v, v_lsb - k) or_return - } - - for v.used != 0 { - /* - Make sure `v` is the largest. - */ - if internal_gt(u, v) { - /* - Swap `u` and `v` to make sure `v` is >= `u`. - */ - internal_swap(u, v) - } - - /* - Subtract smallest from largest. - */ - internal_sub(v, v, u) or_return - - /* - Divide out all factors of two. - */ - b, _ := internal_count_lsb(v) - internal_shr(v, v, b) or_return - } - - /* - Multiply by 2**k which we divided out at the beginning. - */ - internal_shl(temp_gcd_res, u, k) or_return - temp_gcd_res.sign = .Zero_or_Positive - - /* - We've computed `gcd`, either the long way, or because one of the inputs was zero. - If we don't want `lcm`, we're done. - */ - if res_lcm == nil { - internal_swap(temp_gcd_res, res_gcd) - return nil - } - - /* - Computes least common multiple as `|a*b|/gcd(a,b)` - Divide the smallest by the GCD. - */ - if internal_lt_abs(a, b) { - /* - Store quotient in `t2` such that `t2 * b` is the LCM. - */ - internal_div(res_lcm, a, temp_gcd_res) or_return - err = internal_mul(res_lcm, res_lcm, b) - } else { - /* - Store quotient in `t2` such that `t2 * a` is the LCM. - */ - internal_div(res_lcm, b, temp_gcd_res) or_return - err = internal_mul(res_lcm, res_lcm, a) - } - - if res_gcd != nil { - internal_swap(temp_gcd_res, res_gcd) - } - - /* - Fix the sign to positive and return. - */ - res_lcm.sign = .Zero_or_Positive - return err -} - -/* - Internal implementation of log. - Assumes `a` not to be `nil` and to have been initialized. -*/ -_private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) { - bracket_low, bracket_high, bracket_mid, t, bi_base := &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(bracket_low, bracket_high, bracket_mid, t, bi_base) - - ic := #force_inline internal_cmp(a, base) - if ic == -1 || ic == 0 { - return 1 if ic == 0 else 0, nil - } - defer if err != nil { - res = -1 - } - - internal_set(bi_base, base, true, allocator) or_return - internal_clear(bracket_mid, false, allocator) or_return - internal_clear(t, false, allocator) or_return - internal_one(bracket_low, false, allocator) or_return - internal_set(bracket_high, base, false, allocator) or_return - - low := 0; high := 1 - - /* - A kind of Giant-step/baby-step algorithm. - Idea shamelessly stolen from https://programmingpraxis.com/2010/05/07/integer-logarithms/2/ - The effect is asymptotic, hence needs benchmarks to test if the Giant-step should be skipped - for small n. - */ - - for { - /* - Iterate until `a` is bracketed between low + high. - */ - if #force_inline internal_gte(bracket_high, a) { break } - - low = high - #force_inline internal_copy(bracket_low, bracket_high) or_return - high <<= 1 - #force_inline internal_sqr(bracket_high, bracket_high) or_return - } - - for (high - low) > 1 { - mid := (high + low) >> 1 - - #force_inline internal_pow(t, bi_base, mid - low) or_return - - #force_inline internal_mul(bracket_mid, bracket_low, t) or_return - - mc := #force_inline internal_cmp(a, bracket_mid) - switch mc { - case -1: - high = mid - internal_swap(bracket_mid, bracket_high) - case 0: - return mid, nil - case 1: - low = mid - internal_swap(bracket_mid, bracket_low) - } - } - - fc := #force_inline internal_cmp(bracket_high, a) - res = high if fc == 0 else low - - return -} - -/* - Computes xR**-1 == x (mod N) via Montgomery Reduction. - This is an optimized implementation of `internal_montgomery_reduce` - which uses the comba method to quickly calculate the columns of the reduction. - Based on Algorithm 14.32 on pp.601 of HAC. -*/ -_private_montgomery_reduce_comba :: proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - W: [_WARRAY]_WORD = --- - - if x.used > _WARRAY { return .Invalid_Argument } - - /* - Get old used count. - */ - old_used := x.used - - /* - Grow `x` as required. - */ - internal_grow(x, n.used + 1) or_return - - /* - First we have to get the digits of the input into an array of double precision words W[...] - Copy the digits of `x` into W[0..`x.used` - 1] - */ - ix: int - for ix = 0; ix < x.used; ix += 1 { - W[ix] = _WORD(x.digit[ix]) - } - - /* - Zero the high words of W[a->used..m->used*2]. - */ - zero_upper := (n.used * 2) + 1 - if ix < zero_upper { - for ix = x.used; ix < zero_upper; ix += 1 { - W[ix] = {} - } - } - - /* - Now we proceed to zero successive digits from the least significant upwards. - */ - for ix = 0; ix < n.used; ix += 1 { - /* - `mu = ai * m' mod b` - - We avoid a double precision multiplication (which isn't required) - by casting the value down to a DIGIT. Note this requires - that W[ix-1] have the carry cleared (see after the inner loop) - */ - mu := ((W[ix] & _WORD(_MASK)) * _WORD(rho)) & _WORD(_MASK) - - /* - `a = a + mu * m * b**i` - - This is computed in place and on the fly. The multiplication - by b**i is handled by offseting which columns the results - are added to. - - Note the comba method normally doesn't handle carries in the - inner loop In this case we fix the carry from the previous - column since the Montgomery reduction requires digits of the - result (so far) [see above] to work. - - This is handled by fixing up one carry after the inner loop. - The carry fixups are done in order so after these loops the - first m->used words of W[] have the carries fixed. - */ - for iy := 0; iy < n.used; iy += 1 { - W[ix + iy] += mu * _WORD(n.digit[iy]) - } - - /* - Now fix carry for next digit, W[ix+1]. - */ - W[ix + 1] += (W[ix] >> _DIGIT_BITS) - } - - /* - Now we have to propagate the carries and shift the words downward - [all those least significant digits we zeroed]. - */ - - for ; ix < n.used * 2; ix += 1 { - W[ix + 1] += (W[ix] >> _DIGIT_BITS) - } - - /* copy out, A = A/b**n - * - * The result is A/b**n but instead of converting from an - * array of mp_word to mp_digit than calling mp_rshd - * we just copy them in the right order - */ - - for ix = 0; ix < (n.used + 1); ix += 1 { - x.digit[ix] = DIGIT(W[n.used + ix] & _WORD(_MASK)) - } - - /* - Set the max used. - */ - x.used = n.used + 1 - - /* - Zero old_used digits, if the input a was larger than m->used+1 we'll have to clear the digits. - */ - internal_zero_unused(x, old_used) - internal_clamp(x) - - /* - if A >= m then A = A - m - */ - if internal_gte_abs(x, n) { - return internal_sub(x, x, n) - } - return nil -} - -/* - Computes xR**-1 == x (mod N) via Montgomery Reduction. - Assumes `x` and `n` not to be nil. -*/ -_private_int_montgomery_reduce :: proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - /* - Can the fast reduction [comba] method be used? - Note that unlike in mul, you're safely allowed *less* than the available columns [255 per default], - since carries are fixed up in the inner loop. - */ - internal_clear_if_uninitialized(x, n) or_return - - digs := (n.used * 2) + 1 - if digs < _WARRAY && x.used <= _WARRAY && n.used < _MAX_COMBA { - return _private_montgomery_reduce_comba(x, n, rho) - } - - /* - Grow the input as required - */ - internal_grow(x, digs) or_return - x.used = digs - - for ix := 0; ix < n.used; ix += 1 { - /* - `mu = ai * rho mod b` - The value of rho must be precalculated via `int_montgomery_setup()`, - such that it equals -1/n0 mod b this allows the following inner loop - to reduce the input one digit at a time. - */ - - mu := DIGIT((_WORD(x.digit[ix]) * _WORD(rho)) & _WORD(_MASK)) - - /* - a = a + mu * m * b**i - Multiply and add in place. - */ - u := DIGIT(0) - iy := int(0) - for ; iy < n.used; iy += 1 { - /* - Compute product and sum. - */ - r := (_WORD(mu) * _WORD(n.digit[iy]) + _WORD(u) + _WORD(x.digit[ix + iy])) - - /* - Get carry. - */ - u = DIGIT(r >> _DIGIT_BITS) - - /* - Fix digit. - */ - x.digit[ix + iy] = DIGIT(r & _WORD(_MASK)) - } - - /* - At this point the ix'th digit of x should be zero. - Propagate carries upwards as required. - */ - for u != 0 { - x.digit[ix + iy] += u - u = x.digit[ix + iy] >> _DIGIT_BITS - x.digit[ix + iy] &= _MASK - iy += 1 - } - } - - /* - At this point the n.used'th least significant digits of x are all zero, - which means we can shift x to the right by n.used digits and the - residue is unchanged. - - x = x/b**n.used. - */ - internal_clamp(x) - _private_int_shr_leg(x, n.used) - - /* - if x >= n then x = x - n - */ - if internal_gte_abs(x, n) { - return internal_sub(x, x, n) - } - - return nil -} - -/* - Shifts with subtractions when the result is greater than b. - - The method is slightly modified to shift B unconditionally upto just under - the leading bit of b. This saves alot of multiple precision shifting. - - Assumes `a` and `b` not to be `nil`. -*/ -_private_int_montgomery_calc_normalization :: proc(a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - /* - How many bits of last digit does b use. - */ - internal_clear_if_uninitialized(a, b) or_return - - bits := internal_count_bits(b) % _DIGIT_BITS - - if b.used > 1 { - power := ((b.used - 1) * _DIGIT_BITS) + bits - 1 - internal_int_power_of_two(a, power) or_return - } else { - internal_one(a) or_return - bits = 1 - } - - /* - Now compute C = A * B mod b. - */ - for x := bits - 1; x < _DIGIT_BITS; x += 1 { - internal_int_shl1(a, a) or_return - if internal_gte_abs(a, b) { - internal_sub(a, a, b) or_return - } - } - return nil -} - -/* - Sets up the Montgomery reduction stuff. -*/ -_private_int_montgomery_setup :: proc(n: ^Int, allocator := context.allocator) -> (rho: DIGIT, err: Error) { - /* - Fast inversion mod 2**k - Based on the fact that: - - XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) - => 2*X*A - X*X*A*A = 1 - => 2*(1) - (1) = 1 - */ - internal_clear_if_uninitialized(n, allocator) or_return - - b := n.digit[0] - if b & 1 == 0 { return 0, .Invalid_Argument } - - x := (((b + 2) & 4) << 1) + b /* here x*a==1 mod 2**4 */ - x *= 2 - (b * x) /* here x*a==1 mod 2**8 */ - x *= 2 - (b * x) /* here x*a==1 mod 2**16 */ - - when _DIGIT_TYPE_BITS == 64 { - x *= 2 - (b * x) /* here x*a==1 mod 2**32 */ - x *= 2 - (b * x) /* here x*a==1 mod 2**64 */ - } - - /* - rho = -1/m mod b - */ - rho = DIGIT(((_WORD(1) << _WORD(_DIGIT_BITS)) - _WORD(x)) & _WORD(_MASK)) - return rho, nil -} - -/* - Reduces `x` mod `m`, assumes 0 < x < m**2, mu is precomputed via reduce_setup. - From HAC pp.604 Algorithm 14.42 - - Assumes `x`, `m` and `mu` all not to be `nil` and have been initialized. -*/ -_private_int_reduce :: proc(x, m, mu: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - q := &Int{} - defer internal_destroy(q) - um := m.used - - /* - q = x - */ - internal_copy(q, x) or_return - - /* - q1 = x / b**(k-1) - */ - _private_int_shr_leg(q, um - 1) - - /* - According to HAC this optimization is ok. - */ - if DIGIT(um) > DIGIT(1) << (_DIGIT_BITS - 1) { - internal_mul(q, q, mu) or_return - } else { - _private_int_mul_high(q, q, mu, um) or_return - } - - /* - q3 = q2 / b**(k+1) - */ - _private_int_shr_leg(q, um + 1) - - /* - x = x mod b**(k+1), quick (no division) - */ - internal_int_mod_bits(x, x, _DIGIT_BITS * (um + 1)) or_return - - /* - q = q * m mod b**(k+1), quick (no division) - */ - _private_int_mul(q, q, m, um + 1) or_return - - /* - x = x - q - */ - internal_sub(x, x, q) or_return - - /* - If x < 0, add b**(k+1) to it. - */ - if internal_is_negative(x) { - internal_set(q, 1) or_return - _private_int_shl_leg(q, um + 1) or_return - internal_add(x, x, q) or_return - } - - /* - Back off if it's too big. - */ - for internal_gte(x, m) { - internal_sub(x, x, m) or_return - } - - return nil -} - -/* - Reduces `a` modulo `n`, where `n` is of the form 2**p - d. -*/ -_private_int_reduce_2k :: proc(a, n: ^Int, d: DIGIT, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - q := &Int{} - defer internal_destroy(q) - - internal_zero(q) or_return - - p := internal_count_bits(n) - - for { - /* - q = a/2**p, a = a mod 2**p - */ - internal_shrmod(q, a, a, p) or_return - - if d != 1 { - /* - q = q * d - */ - internal_mul(q, q, d) or_return - } - - /* - a = a + q - */ - internal_add(a, a, q) or_return - if internal_lt_abs(a, n) { break } - internal_sub(a, a, n) or_return - } - - return nil -} - -/* - Reduces `a` modulo `n` where `n` is of the form 2**p - d - This differs from reduce_2k since "d" can be larger than a single digit. -*/ -_private_int_reduce_2k_l :: proc(a, n, d: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - q := &Int{} - defer internal_destroy(q) - - internal_zero(q) or_return - - p := internal_count_bits(n) - - for { - /* - q = a/2**p, a = a mod 2**p - */ - internal_shrmod(q, a, a, p) or_return - - /* - q = q * d - */ - internal_mul(q, q, d) or_return - - /* - a = a + q - */ - internal_add(a, a, q) or_return - if internal_lt_abs(a, n) { break } - internal_sub(a, a, n) or_return - } - - return nil -} - -/* - Determines if `internal_int_reduce_2k` can be used. - Asssumes `a` not to be `nil` and to have been initialized. -*/ -_private_int_reduce_is_2k :: proc(a: ^Int) -> (reducible: bool, err: Error) { - assert_if_nil(a) - - if internal_is_zero(a) { - return false, nil - } else if a.used == 1 { - return true, nil - } else if a.used > 1 { - iy := internal_count_bits(a) - iw := 1 - iz := DIGIT(1) - - /* - Test every bit from the second digit up, must be 1. - */ - for ix := _DIGIT_BITS; ix < iy; ix += 1 { - if a.digit[iw] & iz == 0 { - return false, nil - } - - iz <<= 1 - if iz > _DIGIT_MAX { - iw += 1 - iz = 1 - } - } - return true, nil - } else { - return true, nil - } -} - -/* - Determines if `internal_int_reduce_2k_l` can be used. - Asssumes `a` not to be `nil` and to have been initialized. -*/ -_private_int_reduce_is_2k_l :: proc(a: ^Int) -> (reducible: bool, err: Error) { - assert_if_nil(a) - - if internal_int_is_zero(a) { - return false, nil - } else if a.used == 1 { - return true, nil - } else if a.used > 1 { - /* - If more than half of the digits are -1 we're sold. - */ - ix := 0 - iy := 0 - - for ; ix < a.used; ix += 1 { - if a.digit[ix] == _DIGIT_MAX { - iy += 1 - } - } - return iy >= (a.used / 2), nil - } else { - return false, nil - } -} - -/* - Determines the setup value. - Assumes `a` is not `nil`. -*/ -_private_int_reduce_2k_setup :: proc(a: ^Int, allocator := context.allocator) -> (d: DIGIT, err: Error) { - context.allocator = allocator - - tmp := &Int{} - defer internal_destroy(tmp) - internal_zero(tmp) or_return - - internal_int_power_of_two(tmp, internal_count_bits(a)) or_return - internal_sub(tmp, tmp, a) or_return - - return tmp.digit[0], nil -} - -/* - Determines the setup value. - Assumes `mu` and `P` are not `nil`. - - d := (1 << a.bits) - a; -*/ -_private_int_reduce_2k_setup_l :: proc(mu, P: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - tmp := &Int{} - defer internal_destroy(tmp) - internal_zero(tmp) or_return - - internal_int_power_of_two(tmp, internal_count_bits(P)) or_return - internal_sub(mu, tmp, P) or_return - - return nil -} - -/* - Pre-calculate the value required for Barrett reduction. - For a given modulus "P" it calulates the value required in "mu" - Assumes `mu` and `P` are not `nil`. -*/ -_private_int_reduce_setup :: proc(mu, P: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - internal_int_power_of_two(mu, P.used * 2 * _DIGIT_BITS) or_return - return internal_int_div(mu, mu, P) -} - -/* - Determines the setup value. - Assumes `a` to not be `nil` and to have been initialized. -*/ -_private_int_dr_setup :: proc(a: ^Int) -> (d: DIGIT) { - /* - The casts are required if _DIGIT_BITS is one less than - the number of bits in a DIGIT [e.g. _DIGIT_BITS==31]. - */ - return DIGIT((1 << _DIGIT_BITS) - a.digit[0]) -} - -/* - Determines if a number is a valid DR modulus. - Assumes `a` to not be `nil` and to have been initialized. -*/ -_private_dr_is_modulus :: proc(a: ^Int) -> (res: bool) { - /* - Must be at least two digits. - */ - if a.used < 2 { return false } - - /* - Must be of the form b**k - a [a <= b] so all but the first digit must be equal to -1 (mod b). - */ - for ix := 1; ix < a.used; ix += 1 { - if a.digit[ix] != _MASK { - return false - } - } - return true -} - -/* - Reduce "x" in place modulo "n" using the Diminished Radix algorithm. - Based on algorithm from the paper - - "Generating Efficient Primes for Discrete Log Cryptosystems" - Chae Hoon Lim, Pil Joong Lee, - POSTECH Information Research Laboratories - - The modulus must be of a special format [see manual]. - Has been modified to use algorithm 7.10 from the LTM book instead - - Input x must be in the range 0 <= x <= (n-1)**2 - Assumes `x` and `n` to not be `nil` and to have been initialized. -*/ -_private_int_dr_reduce :: proc(x, n: ^Int, k: DIGIT, allocator := context.allocator) -> (err: Error) { - /* - m = digits in modulus. - */ - m := n.used - - /* - Ensure that "x" has at least 2m digits. - */ - internal_grow(x, m + m) or_return - - /* - Top of loop, this is where the code resumes if another reduction pass is required. - */ - for { - i: int - mu := DIGIT(0) - - /* - Compute (x mod B**m) + k * [x/B**m] inline and inplace. - */ - for i = 0; i < m; i += 1 { - r := _WORD(x.digit[i + m]) * _WORD(k) + _WORD(x.digit[i] + mu) - x.digit[i] = DIGIT(r & _WORD(_MASK)) - mu = DIGIT(r >> _WORD(_DIGIT_BITS)) - } - - /* - Set final carry. - */ - x.digit[i] = mu - - /* - Zero words above m. - */ - mem.zero_slice(x.digit[m + 1:][:x.used - m]) - - /* - Clamp, sub and return. - */ - internal_clamp(x) or_return - - /* - If x >= n then subtract and reduce again. - Each successive "recursion" makes the input smaller and smaller. - */ - if internal_lt_abs(x, n) { break } - - internal_sub(x, x, n) or_return - } - return nil -} - -/* - Computes res == G**X mod P. - Assumes `res`, `G`, `X` and `P` to not be `nil` and for `G`, `X` and `P` to have been initialized. -*/ -_private_int_exponent_mod :: proc(res, G, X, P: ^Int, redmode: int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - M := [_TAB_SIZE]Int{} - winsize: uint - - /* - Use a pointer to the reduction algorithm. - This allows us to use one of many reduction algorithms without modding the guts of the code with if statements everywhere. - */ - redux: #type proc(x, m, mu: ^Int, allocator := context.allocator) -> (err: Error) - - defer { - internal_destroy(&M[1]) - for x := 1 << (winsize - 1); x < (1 << winsize); x += 1 { - internal_destroy(&M[x]) - } - } - - /* - Find window size. - */ - x := internal_count_bits(X) - switch { - case x <= 7: - winsize = 2 - case x <= 36: - winsize = 3 - case x <= 140: - winsize = 4 - case x <= 450: - winsize = 5 - case x <= 1303: - winsize = 6 - case x <= 3529: - winsize = 7 - case: - winsize = 8 - } - - winsize = min(_MAX_WIN_SIZE, winsize) if _MAX_WIN_SIZE > 0 else winsize - - /* - Init M array. - Init first cell. - */ - internal_zero(&M[1]) or_return - - /* - Now init the second half of the array. - */ - for x = 1 << (winsize - 1); x < (1 << winsize); x += 1 { - internal_zero(&M[x]) or_return - } - - /* - Create `mu`, used for Barrett reduction. - */ - mu := &Int{} - defer internal_destroy(mu) - internal_zero(mu) or_return - - if redmode == 0 { - _private_int_reduce_setup(mu, P) or_return - redux = _private_int_reduce - } else { - _private_int_reduce_2k_setup_l(mu, P) or_return - redux = _private_int_reduce_2k_l - } - - /* - Create M table. - - The M table contains powers of the base, e.g. M[x] = G**x mod P. - The first half of the table is not computed, though, except for M[0] and M[1]. - */ - internal_int_mod(&M[1], G, P) or_return - - /* - Compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times. - - TODO: This can probably be replaced by computing the power and using `pow` to raise to it - instead of repeated squaring. - */ - slot := 1 << (winsize - 1) - internal_copy(&M[slot], &M[1]) or_return - - for x = 0; x < int(winsize - 1); x += 1 { - /* - Square it. - */ - internal_sqr(&M[slot], &M[slot]) or_return - - /* - Reduce modulo P - */ - redux(&M[slot], P, mu) or_return - } - - /* - Create upper table, that is M[x] = M[x-1] * M[1] (mod P) - for x = (2**(winsize - 1) + 1) to (2**winsize - 1) - */ - for x = slot + 1; x < (1 << winsize); x += 1 { - internal_mul(&M[x], &M[x - 1], &M[1]) or_return - redux(&M[x], P, mu) or_return - } - - /* - Setup result. - */ - internal_one(res) or_return - - /* - Set initial mode and bit cnt. - */ - mode := 0 - bitcnt := 1 - buf := DIGIT(0) - digidx := X.used - 1 - bitcpy := uint(0) - bitbuf := DIGIT(0) - - for { - /* - Grab next digit as required. - */ - bitcnt -= 1 - if bitcnt == 0 { - /* - If digidx == -1 we are out of digits. - */ - if digidx == -1 { break } - - /* - Read next digit and reset the bitcnt. - */ - buf = X.digit[digidx] - digidx -= 1 - bitcnt = _DIGIT_BITS - } - - /* - Grab the next msb from the exponent. - */ - y := buf >> (_DIGIT_BITS - 1) & 1 - buf <<= 1 - - /* - If the bit is zero and mode == 0 then we ignore it. - These represent the leading zero bits before the first 1 bit - in the exponent. Technically this opt is not required but it - does lower the # of trivial squaring/reductions used. - */ - if mode == 0 && y == 0 { - continue - } - - /* - If the bit is zero and mode == 1 then we square. - */ - if mode == 1 && y == 0 { - internal_sqr(res, res) or_return - redux(res, P, mu) or_return - continue - } - - /* - Else we add it to the window. - */ - bitcpy += 1 - bitbuf |= (y << (winsize - bitcpy)) - mode = 2 - - if (bitcpy == winsize) { - /* - Window is filled so square as required and multiply. - Square first. - */ - for x = 0; x < int(winsize); x += 1 { - internal_sqr(res, res) or_return - redux(res, P, mu) or_return - } - - /* - Then multiply. - */ - internal_mul(res, res, &M[bitbuf]) or_return - redux(res, P, mu) or_return - - /* - Empty window and reset. - */ - bitcpy = 0 - bitbuf = 0 - mode = 1 - } - } - - /* - If bits remain then square/multiply. - */ - if mode == 2 && bitcpy > 0 { - /* - Square then multiply if the bit is set. - */ - for x = 0; x < int(bitcpy); x += 1 { - internal_sqr(res, res) or_return - redux(res, P, mu) or_return - - bitbuf <<= 1 - if ((bitbuf & (1 << winsize)) != 0) { - /* - Then multiply. - */ - internal_mul(res, res, &M[1]) or_return - redux(res, P, mu) or_return - } - } - } - return err -} - -/* - Computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 - - Uses a left-to-right `k`-ary sliding window to compute the modular exponentiation. - The value of `k` changes based on the size of the exponent. - - Uses Montgomery or Diminished Radix reduction [whichever appropriate] - - Assumes `res`, `G`, `X` and `P` to not be `nil` and for `G`, `X` and `P` to have been initialized. -*/ -_private_int_exponent_mod_fast :: proc(res, G, X, P: ^Int, redmode: int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - M := [_TAB_SIZE]Int{} - winsize: uint - - /* - Use a pointer to the reduction algorithm. - This allows us to use one of many reduction algorithms without modding the guts of the code with if statements everywhere. - */ - redux: #type proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) - - defer { - internal_destroy(&M[1]) - for x := 1 << (winsize - 1); x < (1 << winsize); x += 1 { - internal_destroy(&M[x]) - } - } - - /* - Find window size. - */ - x := internal_count_bits(X) - switch { - case x <= 7: - winsize = 2 - case x <= 36: - winsize = 3 - case x <= 140: - winsize = 4 - case x <= 450: - winsize = 5 - case x <= 1303: - winsize = 6 - case x <= 3529: - winsize = 7 - case: - winsize = 8 - } - - winsize = min(_MAX_WIN_SIZE, winsize) if _MAX_WIN_SIZE > 0 else winsize - - /* - Init M array - Init first cell. - */ - cap := internal_int_allocated_cap(P) - internal_grow(&M[1], cap) or_return - - /* - Now init the second half of the array. - */ - for x = 1 << (winsize - 1); x < (1 << winsize); x += 1 { - internal_grow(&M[x], cap) or_return - } - - /* - Determine and setup reduction code. - */ - rho: DIGIT - - if redmode == 0 { - /* - Now setup Montgomery. - */ - rho = _private_int_montgomery_setup(P) or_return - - /* - Automatically pick the comba one if available (saves quite a few calls/ifs). - */ - if ((P.used * 2) + 1) < _WARRAY && P.used < _MAX_COMBA { - redux = _private_montgomery_reduce_comba - } else { - /* - Use slower baseline Montgomery method. - */ - redux = _private_int_montgomery_reduce - } - } else if redmode == 1 { - /* - Setup DR reduction for moduli of the form B**k - b. - */ - rho = _private_int_dr_setup(P) - redux = _private_int_dr_reduce - } else { - /* - Setup DR reduction for moduli of the form 2**k - b. - */ - rho = _private_int_reduce_2k_setup(P) or_return - redux = _private_int_reduce_2k - } - - /* - Setup result. - */ - internal_grow(res, cap) or_return - - /* - Create M table - The first half of the table is not computed, though, except for M[0] and M[1] - */ - - if redmode == 0 { - /* - Now we need R mod m. - */ - _private_int_montgomery_calc_normalization(res, P) or_return - - /* - Now set M[1] to G * R mod m. - */ - internal_mulmod(&M[1], G, res, P) or_return - } else { - internal_one(res) or_return - internal_mod(&M[1], G, P) or_return - } - - /* - Compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times. - */ - slot := 1 << (winsize - 1) - internal_copy(&M[slot], &M[1]) or_return - - for x = 0; x < int(winsize - 1); x += 1 { - internal_sqr(&M[slot], &M[slot]) or_return - redux(&M[slot], P, rho) or_return - } - - /* - Create upper table. - */ - for x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x += 1 { - internal_mul(&M[x], &M[x - 1], &M[1]) or_return - redux(&M[x], P, rho) or_return - } - - /* - Set initial mode and bit cnt. - */ - mode := 0 - bitcnt := 1 - buf := DIGIT(0) - digidx := X.used - 1 - bitcpy := 0 - bitbuf := DIGIT(0) - - for { - /* - Grab next digit as required. - */ - bitcnt -= 1 - if bitcnt == 0 { - /* - If digidx == -1 we are out of digits so break. - */ - if digidx == -1 { break } - - /* - Read next digit and reset the bitcnt. - */ - buf = X.digit[digidx] - digidx -= 1 - bitcnt = _DIGIT_BITS - } - - /* - Grab the next msb from the exponent. - */ - y := (buf >> (_DIGIT_BITS - 1)) & 1 - buf <<= 1 - - /* - If the bit is zero and mode == 0 then we ignore it. - These represent the leading zero bits before the first 1 bit in the exponent. - Technically this opt is not required but it does lower the # of trivial squaring/reductions used. - */ - if mode == 0 && y == 0 { continue } - - /* - If the bit is zero and mode == 1 then we square. - */ - if mode == 1 && y == 0 { - internal_sqr(res, res) or_return - redux(res, P, rho) or_return - continue - } - - /* - Else we add it to the window. - */ - bitcpy += 1 - bitbuf |= (y << (winsize - uint(bitcpy))) - mode = 2 - - if bitcpy == int(winsize) { - /* - Window is filled so square as required and multiply - Square first. - */ - for x = 0; x < int(winsize); x += 1 { - internal_sqr(res, res) or_return - redux(res, P, rho) or_return - } - - /* - Then multiply. - */ - internal_mul(res, res, &M[bitbuf]) or_return - redux(res, P, rho) or_return - - /* - Empty window and reset. - */ - bitcpy = 0 - bitbuf = 0 - mode = 1 - } - } - - /* - If bits remain then square/multiply. - */ - if mode == 2 && bitcpy > 0 { - /* - Square then multiply if the bit is set. - */ - for x = 0; x < bitcpy; x += 1 { - internal_sqr(res, res) or_return - redux(res, P, rho) or_return - - /* - Get next bit of the window. - */ - bitbuf <<= 1 - if bitbuf & (1 << winsize) != 0 { - /* - Then multiply. - */ - internal_mul(res, res, &M[1]) or_return - redux(res, P, rho) or_return - } - } - } - - if redmode == 0 { - /* - Fixup result if Montgomery reduction is used. - Recall that any value in a Montgomery system is actually multiplied by R mod n. - So we have to reduce one more time to cancel out the factor of R. - */ - redux(res, P, rho) or_return - } - - return nil -} - -/* - hac 14.61, pp608 -*/ -_private_inverse_modulo :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - x, y, u, v, A, B, C, D := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(x, y, u, v, A, B, C, D) - - /* - `b` cannot be negative. - */ - if b.sign == .Negative || internal_is_zero(b) { - return .Invalid_Argument - } - - /* - init temps. - */ - internal_init_multi(x, y, u, v, A, B, C, D) or_return - - /* - `x` = `a` % `b`, `y` = `b` - */ - internal_mod(x, a, b) or_return - internal_copy(y, b) or_return - - /* - 2. [modified] if x,y are both even then return an error! - */ - if internal_is_even(x) && internal_is_even(y) { - return .Invalid_Argument - } - - /* - 3. u=x, v=y, A=1, B=0, C=0, D=1 - */ - internal_copy(u, x) or_return - internal_copy(v, y) or_return - internal_one(A) or_return - internal_one(D) or_return - - for { - /* - 4. while `u` is even do: - */ - for internal_is_even(u) { - /* - 4.1 `u` = `u` / 2 - */ - internal_int_shr1(u, u) or_return - - /* - 4.2 if `A` or `B` is odd then: - */ - if internal_is_odd(A) || internal_is_odd(B) { - /* - `A` = (`A`+`y`) / 2, `B` = (`B`-`x`) / 2 - */ - internal_add(A, A, y) or_return - internal_add(B, B, x) or_return - } - /* - `A` = `A` / 2, `B` = `B` / 2 - */ - internal_int_shr1(A, A) or_return - internal_int_shr1(B, B) or_return - } - - /* - 5. while `v` is even do: - */ - for internal_is_even(v) { - /* - 5.1 `v` = `v` / 2 - */ - internal_int_shr1(v, v) or_return - - /* - 5.2 if `C` or `D` is odd then: - */ - if internal_is_odd(C) || internal_is_odd(D) { - /* - `C` = (`C`+`y`) / 2, `D` = (`D`-`x`) / 2 - */ - internal_add(C, C, y) or_return - internal_add(D, D, x) or_return - } - /* - `C` = `C` / 2, `D` = `D` / 2 - */ - internal_int_shr1(C, C) or_return - internal_int_shr1(D, D) or_return - } - - /* - 6. if `u` >= `v` then: - */ - if internal_cmp(u, v) != -1 { - /* - `u` = `u` - `v`, `A` = `A` - `C`, `B` = `B` - `D` - */ - internal_sub(u, u, v) or_return - internal_sub(A, A, C) or_return - internal_sub(B, B, D) or_return - } else { - /* v - v - u, C = C - A, D = D - B */ - internal_sub(v, v, u) or_return - internal_sub(C, C, A) or_return - internal_sub(D, D, B) or_return - } - - /* - If not zero goto step 4 - */ - if internal_is_zero(u) { - break - } - } - - /* - Now `a` = `C`, `b` = `D`, `gcd` == `g`*`v` - */ - - /* - If `v` != `1` then there is no inverse. - */ - if !internal_eq(v, 1) { - return .Invalid_Argument - } - - /* - If its too low. - */ - if internal_is_negative(C) { - internal_add(C, C, b) or_return - } - - /* - Too big. - */ - if internal_gte(C, 0) { - internal_sub(C, C, b) or_return - } - - /* - `C` is now the inverse. - */ - swap(dest, C) - - return -} - -/* - Computes the modular inverse via binary extended Euclidean algorithm, that is `dest` = 1 / `a` mod `b`. - - Based on slow invmod except this is optimized for the case where `b` is odd, - as per HAC Note 14.64 on pp. 610. -*/ -_private_inverse_modulo_odd :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - x, y, u, v, B, D := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} - defer internal_destroy(x, y, u, v, B, D) - - sign: Sign - - /* - 2. [modified] `b` must be odd. - */ - if internal_is_even(b) { return .Invalid_Argument } - - /* - Init all our temps. - */ - internal_init_multi(x, y, u, v, B, D) or_return - - /* - `x` == modulus, `y` == value to invert. - */ - internal_copy(x, b) or_return - - /* - We need `y` = `|a|`. - */ - internal_mod(y, a, b) or_return - - /* - If one of `x`, `y` is zero return an error! - */ - if internal_is_zero(x) || internal_is_zero(y) { return .Invalid_Argument } - - /* - 3. `u` = `x`, `v` = `y`, `A` = 1, `B` = 0, `C` = 0, `D` = 1 - */ - internal_copy(u, x) or_return - internal_copy(v, y) or_return - - internal_one(D) or_return - - for { - /* - 4. while `u` is even do. - */ - for internal_is_even(u) { - /* - 4.1 `u` = `u` / 2 - */ - internal_int_shr1(u, u) or_return - - /* - 4.2 if `B` is odd then: - */ - if internal_is_odd(B) { - /* - `B` = (`B` - `x`) / 2 - */ - internal_sub(B, B, x) or_return - } - - /* - `B` = `B` / 2 - */ - internal_int_shr1(B, B) or_return - } - - /* - 5. while `v` is even do: - */ - for internal_is_even(v) { - /* - 5.1 `v` = `v` / 2 - */ - internal_int_shr1(v, v) or_return - - /* - 5.2 if `D` is odd then: - */ - if internal_is_odd(D) { - /* - `D` = (`D` - `x`) / 2 - */ - internal_sub(D, D, x) or_return - } - /* - `D` = `D` / 2 - */ - internal_int_shr1(D, D) or_return - } - - /* - 6. if `u` >= `v` then: - */ - if internal_cmp(u, v) != -1 { - /* - `u` = `u` - `v`, `B` = `B` - `D` - */ - internal_sub(u, u, v) or_return - internal_sub(B, B, D) or_return - } else { - /* - `v` - `v` - `u`, `D` = `D` - `B` - */ - internal_sub(v, v, u) or_return - internal_sub(D, D, B) or_return - } - - /* - If not zero goto step 4. - */ - if internal_is_zero(u) { break } - } - - /* - Now `a` = C, `b` = D, gcd == g*v - */ - - /* - if `v` != 1 then there is no inverse - */ - if internal_cmp(v, 1) != 0 { - return .Invalid_Argument - } - - /* - `b` is now the inverse. - */ - sign = a.sign - for internal_int_is_negative(D) { - internal_add(D, D, b) or_return - } - - /* - Too big. - */ - for internal_gte_abs(D, b) { - internal_sub(D, D, b) or_return - } - - swap(dest, D) - dest.sign = sign - return nil -} - - -/* - Returns the log2 of an `Int`. - Assumes `a` not to be `nil` and to have been initialized. - Also assumes `base` is a power of two. -*/ -_private_log_power_of_two :: proc(a: ^Int, base: DIGIT) -> (log: int, err: Error) { - base := base - y: int - for y = 0; base & 1 == 0; { - y += 1 - base >>= 1 - } - log = internal_count_bits(a) - return (log - 1) / y, err -} - -/* - Copies DIGITs from `src` to `dest`. - Assumes `src` and `dest` to not be `nil` and have been initialized. -*/ -_private_copy_digits :: proc(dest, src: ^Int, digits: int, offset := int(0)) -> (err: Error) { - digits := digits - /* - If dest == src, do nothing - */ - if dest == src { - return nil - } - - digits = min(digits, len(src.digit), len(dest.digit)) - mem.copy_non_overlapping(&dest.digit[0], &src.digit[offset], size_of(DIGIT) * digits) - return nil -} - - -/* - Shift left by `digits` * _DIGIT_BITS bits. -*/ -_private_int_shl_leg :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - if digits <= 0 { return nil } - - /* - No need to shift a zero. - */ - if #force_inline internal_is_zero(quotient) { - return nil - } - - /* - Resize `quotient` to accomodate extra digits. - */ - #force_inline internal_grow(quotient, quotient.used + digits) or_return - - /* - Increment the used by the shift amount then copy upwards. - */ - - /* - Much like `_private_int_shr_leg`, this is implemented using a sliding window, - except the window goes the other way around. - */ - #no_bounds_check for x := quotient.used; x > 0; x -= 1 { - quotient.digit[x+digits-1] = quotient.digit[x-1] - } - - quotient.used += digits - mem.zero_slice(quotient.digit[:digits]) - return nil -} - -/* - Shift right by `digits` * _DIGIT_BITS bits. -*/ -_private_int_shr_leg :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { - context.allocator = allocator - - if digits <= 0 { return nil } - - /* - If digits > used simply zero and return. - */ - if digits > quotient.used { return internal_zero(quotient) } - - /* - Much like `int_shl_digit`, this is implemented using a sliding window, - except the window goes the other way around. - - b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> - /\ | ----> - \-------------------/ ----> - */ - - #no_bounds_check for x := 0; x < (quotient.used - digits); x += 1 { - quotient.digit[x] = quotient.digit[x + digits] - } - quotient.used -= digits - internal_zero_unused(quotient) - return internal_clamp(quotient) -} - -/* - ======================== End of private procedures ======================= - - =============================== Private tables =============================== - - Tables used by `internal_*` and `_*`. -*/ - -_private_int_rem_128 := [?]DIGIT{ - 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, - 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, - 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, - 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, - 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, -} -#assert(128 * size_of(DIGIT) == size_of(_private_int_rem_128)) - -_private_int_rem_105 := [?]DIGIT{ - 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, - 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, - 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, - 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, -} -#assert(105 * size_of(DIGIT) == size_of(_private_int_rem_105)) - -_PRIME_TAB_SIZE :: 256 -_private_prime_table := [_PRIME_TAB_SIZE]DIGIT{ - 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, - 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, - 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, - 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, - 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, - 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, - 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, - 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, - - 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, - 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, - 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, - 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, - 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, - 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, - 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, - 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, - - 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, - 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, - 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, - 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, - 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, - 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, - 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, - 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, - - 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, - 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, - 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, - 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, - 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, - 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, - 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, - 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653, -} -#assert(_PRIME_TAB_SIZE * size_of(DIGIT) == size_of(_private_prime_table)) - -when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) { - _factorial_table := [35]_WORD{ -/* f(00): */ 1, -/* f(01): */ 1, -/* f(02): */ 2, -/* f(03): */ 6, -/* f(04): */ 24, -/* f(05): */ 120, -/* f(06): */ 720, -/* f(07): */ 5_040, -/* f(08): */ 40_320, -/* f(09): */ 362_880, -/* f(10): */ 3_628_800, -/* f(11): */ 39_916_800, -/* f(12): */ 479_001_600, -/* f(13): */ 6_227_020_800, -/* f(14): */ 87_178_291_200, -/* f(15): */ 1_307_674_368_000, -/* f(16): */ 20_922_789_888_000, -/* f(17): */ 355_687_428_096_000, -/* f(18): */ 6_402_373_705_728_000, -/* f(19): */ 121_645_100_408_832_000, -/* f(20): */ 2_432_902_008_176_640_000, -/* f(21): */ 51_090_942_171_709_440_000, -/* f(22): */ 1_124_000_727_777_607_680_000, -/* f(23): */ 25_852_016_738_884_976_640_000, -/* f(24): */ 620_448_401_733_239_439_360_000, -/* f(25): */ 15_511_210_043_330_985_984_000_000, -/* f(26): */ 403_291_461_126_605_635_584_000_000, -/* f(27): */ 10_888_869_450_418_352_160_768_000_000, -/* f(28): */ 304_888_344_611_713_860_501_504_000_000, -/* f(29): */ 8_841_761_993_739_701_954_543_616_000_000, -/* f(30): */ 265_252_859_812_191_058_636_308_480_000_000, -/* f(31): */ 8_222_838_654_177_922_817_725_562_880_000_000, -/* f(32): */ 263_130_836_933_693_530_167_218_012_160_000_000, -/* f(33): */ 8_683_317_618_811_886_495_518_194_401_280_000_000, -/* f(34): */ 295_232_799_039_604_140_847_618_609_643_520_000_000, - } -} else { - _factorial_table := [21]_WORD{ -/* f(00): */ 1, -/* f(01): */ 1, -/* f(02): */ 2, -/* f(03): */ 6, -/* f(04): */ 24, -/* f(05): */ 120, -/* f(06): */ 720, -/* f(07): */ 5_040, -/* f(08): */ 40_320, -/* f(09): */ 362_880, -/* f(10): */ 3_628_800, -/* f(11): */ 39_916_800, -/* f(12): */ 479_001_600, -/* f(13): */ 6_227_020_800, -/* f(14): */ 87_178_291_200, -/* f(15): */ 1_307_674_368_000, -/* f(16): */ 20_922_789_888_000, -/* f(17): */ 355_687_428_096_000, -/* f(18): */ 6_402_373_705_728_000, -/* f(19): */ 121_645_100_408_832_000, -/* f(20): */ 2_432_902_008_176_640_000, - } -} - -/* - ========================= End of private tables ======================== +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + ============================= Private procedures ============================= + + Private procedures used by the above low-level routines follow. + + Don't call these yourself unless you really know what you're doing. + They include implementations that are optimimal for certain ranges of input only. + + These aren't exported for the same reasons. +*/ + + +package math_big + +import "base:intrinsics" +import "core:mem" + +/* + Multiplies |a| * |b| and only computes upto digs digits of result. + HAC pp. 595, Algorithm 14.12 Modified so you can control how + many digits of output are created. +*/ +_private_int_mul :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + /* + Can we use the fast multiplier? + */ + if digits < _WARRAY && min(a.used, b.used) < _MAX_COMBA { + return #force_inline _private_int_mul_comba(dest, a, b, digits) + } + + /* + Set up temporary output `Int`, which we'll swap for `dest` when done. + */ + + t := &Int{} + + internal_grow(t, max(digits, _DEFAULT_DIGIT_COUNT)) or_return + t.used = digits + + /* + Compute the digits of the product directly. + */ + pa := a.used + for ix := 0; ix < pa; ix += 1 { + /* + Limit ourselves to `digits` DIGITs of output. + */ + pb := min(b.used, digits - ix) + carry := _WORD(0) + iy := 0 + + /* + Compute the column of the output and propagate the carry. + */ + #no_bounds_check for iy = 0; iy < pb; iy += 1 { + /* + Compute the column as a _WORD. + */ + column := _WORD(t.digit[ix + iy]) + _WORD(a.digit[ix]) * _WORD(b.digit[iy]) + carry + + /* + The new column is the lower part of the result. + */ + t.digit[ix + iy] = DIGIT(column & _WORD(_MASK)) + + /* + Get the carry word from the result. + */ + carry = column >> _DIGIT_BITS + } + /* + Set carry if it is placed below digits + */ + if ix + iy < digits { + t.digit[ix + pb] = DIGIT(carry) + } + } + + internal_swap(dest, t) + internal_destroy(t) + return internal_clamp(dest) +} + + +/* + Multiplication using the Toom-Cook 3-way algorithm. + + Much more complicated than Karatsuba but has a lower asymptotic running time of O(N**1.464). + This algorithm is only particularly useful on VERY large inputs. + (We're talking 1000s of digits here...). + + This file contains code from J. Arndt's book "Matters Computational" + and the accompanying FXT-library with permission of the author. + + Setup from: + Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae." + 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007. + + The interpolation from above needed one temporary variable more than the interpolation here: + + Bodrato, Marco, and Alberto Zanoni. "What about Toom-Cook matrices optimality." + Centro Vito Volterra Universita di Roma Tor Vergata (2006) +*/ +_private_int_mul_toom :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + S1, S2, T1, a0, a1, a2, b0, b1, b2 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(S1, S2, T1, a0, a1, a2, b0, b1, b2) + + /* + Init temps. + */ + internal_init_multi(S1, S2, T1) or_return + + /* + B + */ + B := min(a.used, b.used) / 3 + + /* + a = a2 * x^2 + a1 * x + a0; + */ + internal_grow(a0, B) or_return + internal_grow(a1, B) or_return + internal_grow(a2, a.used - 2 * B) or_return + + a0.used, a1.used = B, B + a2.used = a.used - 2 * B + + internal_copy_digits(a0, a, a0.used) or_return + internal_copy_digits(a1, a, a1.used, B) or_return + internal_copy_digits(a2, a, a2.used, 2 * B) or_return + + internal_clamp(a0) + internal_clamp(a1) + internal_clamp(a2) + + /* + b = b2 * x^2 + b1 * x + b0; + */ + internal_grow(b0, B) or_return + internal_grow(b1, B) or_return + internal_grow(b2, b.used - 2 * B) or_return + + b0.used, b1.used = B, B + b2.used = b.used - 2 * B + + internal_copy_digits(b0, b, b0.used) or_return + internal_copy_digits(b1, b, b1.used, B) or_return + internal_copy_digits(b2, b, b2.used, 2 * B) or_return + + internal_clamp(b0) + internal_clamp(b1) + internal_clamp(b2) + + + /* + \\ S1 = (a2+a1+a0) * (b2+b1+b0); + */ + internal_add(T1, a2, a1) or_return /* T1 = a2 + a1; */ + internal_add(S2, T1, a0) or_return /* S2 = T1 + a0; */ + internal_add(dest, b2, b1) or_return /* dest = b2 + b1; */ + internal_add(S1, dest, b0) or_return /* S1 = c + b0; */ + internal_mul(S1, S1, S2) or_return /* S1 = S1 * S2; */ + + /* + \\S2 = (4*a2+2*a1+a0) * (4*b2+2*b1+b0); + */ + internal_add(T1, T1, a2) or_return /* T1 = T1 + a2; */ + internal_int_shl1(T1, T1) or_return /* T1 = T1 << 1; */ + internal_add(T1, T1, a0) or_return /* T1 = T1 + a0; */ + internal_add(dest, dest, b2) or_return /* c = c + b2; */ + internal_int_shl1(dest, dest) or_return /* c = c << 1; */ + internal_add(dest, dest, b0) or_return /* c = c + b0; */ + internal_mul(S2, T1, dest) or_return /* S2 = T1 * c; */ + + /* + \\S3 = (a2-a1+a0) * (b2-b1+b0); + */ + internal_sub(a1, a2, a1) or_return /* a1 = a2 - a1; */ + internal_add(a1, a1, a0) or_return /* a1 = a1 + a0; */ + internal_sub(b1, b2, b1) or_return /* b1 = b2 - b1; */ + internal_add(b1, b1, b0) or_return /* b1 = b1 + b0; */ + internal_mul(a1, a1, b1) or_return /* a1 = a1 * b1; */ + internal_mul(b1, a2, b2) or_return /* b1 = a2 * b2; */ + + /* + \\S2 = (S2 - S3) / 3; + */ + internal_sub(S2, S2, a1) or_return /* S2 = S2 - a1; */ + _private_int_div_3(S2, S2) or_return /* S2 = S2 / 3; \\ this is an exact division */ + internal_sub(a1, S1, a1) or_return /* a1 = S1 - a1; */ + internal_int_shr1(a1, a1) or_return /* a1 = a1 >> 1; */ + internal_mul(a0, a0, b0) or_return /* a0 = a0 * b0; */ + internal_sub(S1, S1, a0) or_return /* S1 = S1 - a0; */ + internal_sub(S2, S2, S1) or_return /* S2 = S2 - S1; */ + internal_int_shr1(S2, S2) or_return /* S2 = S2 >> 1; */ + internal_sub(S1, S1, a1) or_return /* S1 = S1 - a1; */ + internal_sub(S1, S1, b1) or_return /* S1 = S1 - b1; */ + internal_int_shl1(T1, b1) or_return /* T1 = b1 << 1; */ + internal_sub(S2, S2, T1) or_return /* S2 = S2 - T1; */ + internal_sub(a1, a1, S2) or_return /* a1 = a1 - S2; */ + + /* + P = b1*x^4+ S2*x^3+ S1*x^2+ a1*x + a0; + */ + _private_int_shl_leg(b1, 4 * B) or_return + _private_int_shl_leg(S2, 3 * B) or_return + internal_add(b1, b1, S2) or_return + _private_int_shl_leg(S1, 2 * B) or_return + internal_add(b1, b1, S1) or_return + _private_int_shl_leg(a1, 1 * B) or_return + internal_add(b1, b1, a1) or_return + internal_add(dest, b1, a0) or_return + + /* + a * b - P + */ + return nil +} + +/* + product = |a| * |b| using Karatsuba Multiplication using three half size multiplications. + + Let `B` represent the radix [e.g. 2**_DIGIT_BITS] and let `n` represent + half of the number of digits in the min(a,b) + + `a` = `a1` * `B`**`n` + `a0` + `b` = `b`1 * `B`**`n` + `b0` + + Then, a * b => 1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 + + Note that a1b1 and a0b0 are used twice and only need to be computed once. + So in total three half size (half # of digit) multiplications are performed, + a0b0, a1b1 and (a1+b1)(a0+b0) + + Note that a multiplication of half the digits requires 1/4th the number of + single precision multiplications, so in total after one call 25% of the + single precision multiplications are saved. + + Note also that the call to `internal_mul` can end up back in this function + if the a0, a1, b0, or b1 are above the threshold. + + This is known as divide-and-conquer and leads to the famous O(N**lg(3)) or O(N**1.584) + work which is asymptopically lower than the standard O(N**2) that the + baseline/comba methods use. Generally though, the overhead of this method doesn't pay off + until a certain size is reached, of around 80 used DIGITs. +*/ +_private_int_mul_karatsuba :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + x0, x1, y0, y1, t1, x0y0, x1y1 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(x0, x1, y0, y1, t1, x0y0, x1y1) + + /* + min # of digits, divided by two. + */ + B := min(a.used, b.used) >> 1 + + /* + Init all the temps. + */ + internal_grow(x0, B) or_return + internal_grow(x1, a.used - B) or_return + internal_grow(y0, B) or_return + internal_grow(y1, b.used - B) or_return + internal_grow(t1, B * 2) or_return + internal_grow(x0y0, B * 2) or_return + internal_grow(x1y1, B * 2) or_return + + /* + Now shift the digits. + */ + x0.used, y0.used = B, B + x1.used = a.used - B + y1.used = b.used - B + + /* + We copy the digits directly instead of using higher level functions + since we also need to shift the digits. + */ + internal_copy_digits(x0, a, x0.used) + internal_copy_digits(y0, b, y0.used) + internal_copy_digits(x1, a, x1.used, B) + internal_copy_digits(y1, b, y1.used, B) + + /* + Only need to clamp the lower words since by definition the + upper words x1/y1 must have a known number of digits. + */ + clamp(x0) + clamp(y0) + + /* + Now calc the products x0y0 and x1y1, + after this x0 is no longer required, free temp [x0==t2]! + */ + internal_mul(x0y0, x0, y0) or_return /* x0y0 = x0*y0 */ + internal_mul(x1y1, x1, y1) or_return /* x1y1 = x1*y1 */ + internal_add(t1, x1, x0) or_return /* now calc x1+x0 and */ + internal_add(x0, y1, y0) or_return /* t2 = y1 + y0 */ + internal_mul(t1, t1, x0) or_return /* t1 = (x1 + x0) * (y1 + y0) */ + + /* + Add x0y0. + */ + internal_add(x0, x0y0, x1y1) or_return /* t2 = x0y0 + x1y1 */ + internal_sub(t1, t1, x0) or_return /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ + + /* + shift by B. + */ + _private_int_shl_leg(t1, B) or_return /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))< (err: Error) { + context.allocator = allocator + + /* + Set up array. + */ + W: [_WARRAY]DIGIT = --- + + /* + Grow the destination as required. + */ + internal_grow(dest, digits) or_return + + /* + Number of output digits to produce. + */ + pa := min(digits, a.used + b.used) + + /* + Clear the carry + */ + _W := _WORD(0) + + ix: int + for ix = 0; ix < pa; ix += 1 { + tx, ty, iy, iz: int + + /* + Get offsets into the two bignums. + */ + ty = min(b.used - 1, ix) + tx = ix - ty + + /* + This is the number of times the loop will iterate, essentially. + while (tx++ < a->used && ty-- >= 0) { ... } + */ + + iy = min(a.used - tx, ty + 1) + + /* + Execute loop. + */ + #no_bounds_check for iz = 0; iz < iy; iz += 1 { + _W += _WORD(a.digit[tx + iz]) * _WORD(b.digit[ty - iz]) + } + + /* + Store term. + */ + W[ix] = DIGIT(_W) & _MASK + + /* + Make next carry. + */ + _W = _W >> _WORD(_DIGIT_BITS) + } + + /* + Setup dest. + */ + old_used := dest.used + dest.used = pa + + /* + Now extract the previous digit [below the carry]. + */ + copy_slice(dest.digit[0:], W[:pa]) + + /* + Clear unused digits [that existed in the old copy of dest]. + */ + internal_zero_unused(dest, old_used) + + /* + Adjust dest.used based on leading zeroes. + */ + + return internal_clamp(dest) +} + +/* + Multiplies |a| * |b| and does not compute the lower digs digits + [meant to get the higher part of the product] +*/ +_private_int_mul_high :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + /* + Can we use the fast multiplier? + */ + if a.used + b.used + 1 < _WARRAY && min(a.used, b.used) < _MAX_COMBA { + return _private_int_mul_high_comba(dest, a, b, digits) + } + + internal_grow(dest, a.used + b.used + 1) or_return + dest.used = a.used + b.used + 1 + + pa := a.used + pb := b.used + for ix := 0; ix < pa; ix += 1 { + carry := DIGIT(0) + + for iy := digits - ix; iy < pb; iy += 1 { + /* + Calculate the double precision result. + */ + r := _WORD(dest.digit[ix + iy]) + _WORD(a.digit[ix]) * _WORD(b.digit[iy]) + _WORD(carry) + + /* + Get the lower part. + */ + dest.digit[ix + iy] = DIGIT(r & _WORD(_MASK)) + + /* + Carry the carry. + */ + carry = DIGIT(r >> _WORD(_DIGIT_BITS)) + } + dest.digit[ix + pb] = carry + } + return internal_clamp(dest) +} + +/* + This is a modified version of `_private_int_mul_comba` that only produces output digits *above* `digits`. + See the comments for `_private_int_mul_comba` to see how it works. + + This is used in the Barrett reduction since for one of the multiplications + only the higher digits were needed. This essentially halves the work. + + Based on Algorithm 14.12 on pp.595 of HAC. +*/ +_private_int_mul_high_comba :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + W: [_WARRAY]DIGIT = --- + _W: _WORD = 0 + + /* + Number of output digits to produce. Grow the destination as required. + */ + pa := a.used + b.used + internal_grow(dest, pa) or_return + + ix: int + for ix = digits; ix < pa; ix += 1 { + /* + Get offsets into the two bignums. + */ + ty := min(b.used - 1, ix) + tx := ix - ty + + /* + This is the number of times the loop will iterrate, essentially it's + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy := min(a.used - tx, ty + 1) + + /* + Execute loop. + */ + for iz := 0; iz < iy; iz += 1 { + _W += _WORD(a.digit[tx + iz]) * _WORD(b.digit[ty - iz]) + } + + /* + Store term. + */ + W[ix] = DIGIT(_W) & DIGIT(_MASK) + + /* + Make next carry. + */ + _W = _W >> _WORD(_DIGIT_BITS) + } + + /* + Setup dest + */ + old_used := dest.used + dest.used = pa + + for ix = digits; ix < pa; ix += 1 { + /* + Now extract the previous digit [below the carry]. + */ + dest.digit[ix] = W[ix] + } + + /* + Zero remainder. + */ + internal_zero_unused(dest, old_used) + + /* + Adjust dest.used based on leading zeroes. + */ + return internal_clamp(dest) +} + +/* + Single-digit multiplication with the smaller number as the single-digit. +*/ +_private_int_mul_balance :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + a, b := a, b + + a0, tmp, r := &Int{}, &Int{}, &Int{} + defer internal_destroy(a0, tmp, r) + + b_size := min(a.used, b.used) + n_blocks := max(a.used, b.used) / b_size + + internal_grow(a0, b_size + 2) or_return + internal_init_multi(tmp, r) or_return + + /* + Make sure that `a` is the larger one. + */ + if a.used < b.used { + a, b = b, a + } + assert(a.used >= b.used) + + i, j := 0, 0 + for ; i < n_blocks; i += 1 { + /* + Cut a slice off of `a`. + */ + + a0.used = b_size + internal_copy_digits(a0, a, a0.used, j) + j += a0.used + internal_clamp(a0) + + /* + Multiply with `b`. + */ + internal_mul(tmp, a0, b) or_return + + /* + Shift `tmp` to the correct position. + */ + _private_int_shl_leg(tmp, b_size * i) or_return + + /* + Add to output. No carry needed. + */ + internal_add(r, r, tmp) or_return + } + + /* + The left-overs; there are always left-overs. + */ + if j < a.used { + a0.used = a.used - j + internal_copy_digits(a0, a, a0.used, j) + j += a0.used + internal_clamp(a0) + + internal_mul(tmp, a0, b) or_return + _private_int_shl_leg(tmp, b_size * i) or_return + internal_add(r, r, tmp) or_return + } + + internal_swap(dest, r) + return +} + +/* + Low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 + Assumes `dest` and `src` to not be `nil`, and `src` to have been initialized. +*/ +_private_int_sqr :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + pa := src.used + + t := &Int{}; ix, iy: int + /* + Grow `t` to maximum needed size, or `_DEFAULT_DIGIT_COUNT`, whichever is bigger. + */ + internal_grow(t, max((2 * pa) + 1, _DEFAULT_DIGIT_COUNT)) or_return + t.used = (2 * pa) + 1 + + #no_bounds_check for ix = 0; ix < pa; ix += 1 { + carry := DIGIT(0) + /* + First calculate the digit at 2*ix; calculate double precision result. + */ + r := _WORD(t.digit[ix+ix]) + (_WORD(src.digit[ix]) * _WORD(src.digit[ix])) + + /* + Store lower part in result. + */ + t.digit[ix+ix] = DIGIT(r & _WORD(_MASK)) + /* + Get the carry. + */ + carry = DIGIT(r >> _DIGIT_BITS) + + #no_bounds_check for iy = ix + 1; iy < pa; iy += 1 { + /* + First calculate the product. + */ + r = _WORD(src.digit[ix]) * _WORD(src.digit[iy]) + + /* Now calculate the double precision result. NĂ³te we use + * addition instead of *2 since it's easier to optimize + */ + r = _WORD(t.digit[ix+iy]) + r + r + _WORD(carry) + + /* + Store lower part. + */ + t.digit[ix+iy] = DIGIT(r & _WORD(_MASK)) + + /* + Get carry. + */ + carry = DIGIT(r >> _DIGIT_BITS) + } + /* + Propagate upwards. + */ + #no_bounds_check for carry != 0 { + r = _WORD(t.digit[ix+iy]) + _WORD(carry) + t.digit[ix+iy] = DIGIT(r & _WORD(_MASK)) + carry = DIGIT(r >> _WORD(_DIGIT_BITS)) + iy += 1 + } + } + + err = internal_clamp(t) + internal_swap(dest, t) + internal_destroy(t) + return err +} + +/* + The jist of squaring... + You do like mult except the offset of the tmpx [one that starts closer to zero] can't equal the offset of tmpy. + So basically you set up iy like before then you min it with (ty-tx) so that it never happens. + You double all those you add in the inner loop. After that loop you do the squares and add them in. + + Assumes `dest` and `src` not to be `nil` and `src` to have been initialized. +*/ +_private_int_sqr_comba :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + W: [_WARRAY]DIGIT = --- + + /* + Grow the destination as required. + */ + pa := uint(src.used) + uint(src.used) + internal_grow(dest, int(pa)) or_return + + /* + Number of output digits to produce. + */ + W1 := _WORD(0) + _W : _WORD = --- + ix := uint(0) + + #no_bounds_check for ; ix < pa; ix += 1 { + /* + Clear counter. + */ + _W = {} + + /* + Get offsets into the two bignums. + */ + ty := min(uint(src.used) - 1, ix) + tx := ix - ty + + /* + This is the number of times the loop will iterate, + essentially while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy := min(uint(src.used) - tx, ty + 1) + + /* + Now for squaring, tx can never equal ty. + We halve the distance since they approach at a rate of 2x, + and we have to round because odd cases need to be executed. + */ + iy = min(iy, ((ty - tx) + 1) >> 1 ) + + /* + Execute loop. + */ + #no_bounds_check for iz := uint(0); iz < iy; iz += 1 { + _W += _WORD(src.digit[tx + iz]) * _WORD(src.digit[ty - iz]) + } + + /* + Double the inner product and add carry. + */ + _W = _W + _W + W1 + + /* + Even columns have the square term in them. + */ + if ix & 1 == 0 { + _W += _WORD(src.digit[ix >> 1]) * _WORD(src.digit[ix >> 1]) + } + + /* + Store it. + */ + W[ix] = DIGIT(_W & _WORD(_MASK)) + + /* + Make next carry. + */ + W1 = _W >> _DIGIT_BITS + } + + /* + Setup dest. + */ + old_used := dest.used + dest.used = src.used + src.used + + #no_bounds_check for ix = 0; ix < pa; ix += 1 { + dest.digit[ix] = W[ix] & _MASK + } + + /* + Clear unused digits [that existed in the old copy of dest]. + */ + internal_zero_unused(dest, old_used) + + return internal_clamp(dest) +} + +/* + Karatsuba squaring, computes `dest` = `src` * `src` using three half-size squarings. + + See comments of `_private_int_mul_karatsuba` for details. + It is essentially the same algorithm but merely tuned to perform recursive squarings. +*/ +_private_int_sqr_karatsuba :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + x0, x1, t1, t2, x0x0, x1x1 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(x0, x1, t1, t2, x0x0, x1x1) + + /* + Min # of digits, divided by two. + */ + B := src.used >> 1 + + /* + Init temps. + */ + internal_grow(x0, B) or_return + internal_grow(x1, src.used - B) or_return + internal_grow(t1, src.used * 2) or_return + internal_grow(t2, src.used * 2) or_return + internal_grow(x0x0, B * 2 ) or_return + internal_grow(x1x1, (src.used - B) * 2) or_return + + /* + Now shift the digits. + */ + x0.used = B + x1.used = src.used - B + + #force_inline internal_copy_digits(x0, src, x0.used) + #force_inline mem.copy_non_overlapping(&x1.digit[0], &src.digit[B], size_of(DIGIT) * x1.used) + #force_inline internal_clamp(x0) + + /* + Now calc the products x0*x0 and x1*x1. + */ + internal_sqr(x0x0, x0) or_return + internal_sqr(x1x1, x1) or_return + + /* + Now calc (x1+x0)^2 + */ + internal_add(t1, x0, x1) or_return + internal_sqr(t1, t1) or_return + + /* + Add x0y0 + */ + internal_add(t2, x0x0, x1x1) or_return + internal_sub(t1, t1, t2) or_return + + /* + Shift by B. + */ + _private_int_shl_leg(t1, B) or_return + _private_int_shl_leg(x1x1, B * 2) or_return + internal_add(t1, t1, x0x0) or_return + internal_add(dest, t1, x1x1) or_return + + return #force_inline internal_clamp(dest) +} + +/* + Squaring using Toom-Cook 3-way algorithm. + + Setup and interpolation from algorithm SQR_3 in Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae." + 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007. +*/ +_private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(S0, a0, a1, a2) + + /* + Init temps. + */ + internal_zero(S0) or_return + + /* + B + */ + B := src.used / 3 + + /* + a = a2 * x^2 + a1 * x + a0; + */ + internal_grow(a0, B) or_return + internal_grow(a1, B) or_return + internal_grow(a2, src.used - (2 * B)) or_return + + a0.used = B + a1.used = B + a2.used = src.used - 2 * B + + #force_inline mem.copy_non_overlapping(&a0.digit[0], &src.digit[ 0], size_of(DIGIT) * a0.used) + #force_inline mem.copy_non_overlapping(&a1.digit[0], &src.digit[ B], size_of(DIGIT) * a1.used) + #force_inline mem.copy_non_overlapping(&a2.digit[0], &src.digit[2 * B], size_of(DIGIT) * a2.used) + + internal_clamp(a0) + internal_clamp(a1) + internal_clamp(a2) + + /** S0 = a0^2; */ + internal_sqr(S0, a0) or_return + + /** \\S1 = (a2 + a1 + a0)^2 */ + /** \\S2 = (a2 - a1 + a0)^2 */ + /** \\S1 = a0 + a2; */ + /** a0 = a0 + a2; */ + internal_add(a0, a0, a2) or_return + /** \\S2 = S1 - a1; */ + /** b = a0 - a1; */ + internal_sub(dest, a0, a1) or_return + /** \\S1 = S1 + a1; */ + /** a0 = a0 + a1; */ + internal_add(a0, a0, a1) or_return + /** \\S1 = S1^2; */ + /** a0 = a0^2; */ + internal_sqr(a0, a0) or_return + /** \\S2 = S2^2; */ + /** b = b^2; */ + internal_sqr(dest, dest) or_return + /** \\ S3 = 2 * a1 * a2 */ + /** \\S3 = a1 * a2; */ + /** a1 = a1 * a2; */ + internal_mul(a1, a1, a2) or_return + /** \\S3 = S3 << 1; */ + /** a1 = a1 << 1; */ + internal_shl(a1, a1, 1) or_return + /** \\S4 = a2^2; */ + /** a2 = a2^2; */ + internal_sqr(a2, a2) or_return + /** \\ tmp = (S1 + S2)/2 */ + /** \\tmp = S1 + S2; */ + /** b = a0 + b; */ + internal_add(dest, a0, dest) or_return + /** \\tmp = tmp >> 1; */ + /** b = b >> 1; */ + internal_shr(dest, dest, 1) or_return + /** \\ S1 = S1 - tmp - S3 */ + /** \\S1 = S1 - tmp; */ + /** a0 = a0 - b; */ + internal_sub(a0, a0, dest) or_return + /** \\S1 = S1 - S3; */ + /** a0 = a0 - a1; */ + internal_sub(a0, a0, a1) or_return + /** \\S2 = tmp - S4 -S0 */ + /** \\S2 = tmp - S4; */ + /** b = b - a2; */ + internal_sub(dest, dest, a2) or_return + /** \\S2 = S2 - S0; */ + /** b = b - S0; */ + internal_sub(dest, dest, S0) or_return + /** \\P = S4*x^4 + S3*x^3 + S2*x^2 + S1*x + S0; */ + /** P = a2*x^4 + a1*x^3 + b*x^2 + a0*x + S0; */ + _private_int_shl_leg( a2, 4 * B) or_return + _private_int_shl_leg( a1, 3 * B) or_return + _private_int_shl_leg(dest, 2 * B) or_return + _private_int_shl_leg( a0, 1 * B) or_return + + internal_add(a2, a2, a1) or_return + internal_add(dest, dest, a2) or_return + internal_add(dest, dest, a0) or_return + internal_add(dest, dest, S0) or_return + /** a^2 - P */ + + return #force_inline internal_clamp(dest) +} + +/* + Divide by three (based on routine from MPI and the GMP manual). +*/ +_private_int_div_3 :: proc(quotient, numerator: ^Int, allocator := context.allocator) -> (remainder: DIGIT, err: Error) { + context.allocator = allocator + + /* + b = 2^_DIGIT_BITS / 3 + */ + b := _WORD(1) << _WORD(_DIGIT_BITS) / _WORD(3) + + q := &Int{} + internal_grow(q, numerator.used) or_return + q.used = numerator.used + q.sign = numerator.sign + + w, t: _WORD + #no_bounds_check for ix := numerator.used; ix >= 0; ix -= 1 { + w = (w << _WORD(_DIGIT_BITS)) | _WORD(numerator.digit[ix]) + if w >= 3 { + /* + Multiply w by [1/3]. + */ + t = (w * b) >> _WORD(_DIGIT_BITS) + + /* + Now subtract 3 * [w/3] from w, to get the remainder. + */ + w -= t+t+t + + /* + Fixup the remainder as required since the optimization is not exact. + */ + for w >= 3 { + t += 1 + w -= 3 + } + } else { + t = 0 + } + q.digit[ix] = DIGIT(t) + } + remainder = DIGIT(w) + + /* + [optional] store the quotient. + */ + if quotient != nil { + err = clamp(q) + internal_swap(q, quotient) + } + internal_destroy(q) + return remainder, nil +} + +/* + Signed Integer Division + + c*b + d == a [i.e. a/b, c=quotient, d=remainder], HAC pp.598 Algorithm 14.20 + + Note that the description in HAC is horribly incomplete. + For example, it doesn't consider the case where digits are removed from 'x' in + the inner loop. + + It also doesn't consider the case that y has fewer than three digits, etc. + The overall algorithm is as described as 14.20 from HAC but fixed to treat these cases. +*/ +_private_int_div_school :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + error_if_immutable(quotient, remainder) or_return + + q, x, y, t1, t2 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(q, x, y, t1, t2) + + internal_grow(q, numerator.used + 2) or_return + q.used = numerator.used + 2 + + internal_init_multi(t1, t2) or_return + internal_copy(x, numerator) or_return + internal_copy(y, denominator) or_return + + /* + Fix the sign. + */ + neg := numerator.sign != denominator.sign + x.sign = .Zero_or_Positive + y.sign = .Zero_or_Positive + + /* + Normalize both x and y, ensure that y >= b/2, [b == 2**MP_DIGIT_BIT] + */ + norm := internal_count_bits(y) % _DIGIT_BITS + + if norm < _DIGIT_BITS - 1 { + norm = (_DIGIT_BITS - 1) - norm + internal_shl(x, x, norm) or_return + internal_shl(y, y, norm) or_return + } else { + norm = 0 + } + + /* + Note: HAC does 0 based, so if used==5 then it's 0,1,2,3,4, i.e. use 4 + */ + n := x.used - 1 + t := y.used - 1 + + /* + while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } + y = y*b**{n-t} + */ + + _private_int_shl_leg(y, n - t) or_return + + gte := internal_gte(x, y) + for gte { + q.digit[n - t] += 1 + internal_sub(x, x, y) or_return + gte = internal_gte(x, y) + } + + /* + Reset y by shifting it back down. + */ + _private_int_shr_leg(y, n - t) + + /* + Step 3. for i from n down to (t + 1). + */ + #no_bounds_check for i := n; i >= (t + 1); i -= 1 { + if i > x.used { continue } + + /* + step 3.1 if xi == yt then set q{i-t-1} to b-1, otherwise set q{i-t-1} to (xi*b + x{i-1})/yt + */ + if x.digit[i] == y.digit[t] { + q.digit[(i - t) - 1] = 1 << (_DIGIT_BITS - 1) + } else { + + tmp := _WORD(x.digit[i]) << _DIGIT_BITS + tmp |= _WORD(x.digit[i - 1]) + tmp /= _WORD(y.digit[t]) + if tmp > _WORD(_MASK) { + tmp = _WORD(_MASK) + } + q.digit[(i - t) - 1] = DIGIT(tmp & _WORD(_MASK)) + } + + /* while (q{i-t-1} * (yt * b + y{t-1})) > + xi * b**2 + xi-1 * b + xi-2 + + do q{i-t-1} -= 1; + */ + + iter := 0 + + q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] + 1) & _MASK + #no_bounds_check for { + q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] - 1) & _MASK + + /* + Find left hand. + */ + internal_zero(t1) + t1.digit[0] = ((t - 1) < 0) ? 0 : y.digit[t - 1] + t1.digit[1] = y.digit[t] + t1.used = 2 + internal_mul(t1, t1, q.digit[(i - t) - 1]) or_return + + /* + Find right hand. + */ + t2.digit[0] = ((i - 2) < 0) ? 0 : x.digit[i - 2] + t2.digit[1] = x.digit[i - 1] /* i >= 1 always holds */ + t2.digit[2] = x.digit[i] + t2.used = 3 + + if internal_lte(t1, t2) { + break + } + iter += 1; if iter > 100 { + return .Max_Iterations_Reached + } + } + + /* + Step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} + */ + int_mul_digit(t1, y, q.digit[(i - t) - 1]) or_return + _private_int_shl_leg(t1, (i - t) - 1) or_return + internal_sub(x, x, t1) or_return + + /* + if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } + */ + if x.sign == .Negative { + internal_copy(t1, y) or_return + _private_int_shl_leg(t1, (i - t) - 1) or_return + internal_add(x, x, t1) or_return + + q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] - 1) & _MASK + } + } + + /* + Now q is the quotient and x is the remainder, [which we have to normalize] + Get sign before writing to c. + */ + z, _ := is_zero(x) + x.sign = .Zero_or_Positive if z else numerator.sign + + if quotient != nil { + internal_clamp(q) + internal_swap(q, quotient) + quotient.sign = .Negative if neg else .Zero_or_Positive + } + + if remainder != nil { + internal_shr(x, x, norm) or_return + internal_swap(x, remainder) + } + + return nil +} + +/* + Direct implementation of algorithms 1.8 "RecursiveDivRem" and 1.9 "UnbalancedDivision" from: + + Brent, Richard P., and Paul Zimmermann. "Modern computer arithmetic" + Vol. 18. Cambridge University Press, 2010 + Available online at https://arxiv.org/pdf/1004.4710 + + pages 19ff. in the above online document. +*/ +_private_div_recursion :: proc(quotient, remainder, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + A1, A2, B1, B0, Q1, Q0, R1, R0, t := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(A1, A2, B1, B0, Q1, Q0, R1, R0, t) + + m := a.used - b.used + k := m / 2 + + if m < MUL_KARATSUBA_CUTOFF { + return _private_int_div_school(quotient, remainder, a, b) + } + + internal_init_multi(A1, A2, B1, B0, Q1, Q0, R1, R0, t) or_return + + /* + `B1` = `b` / `beta`^`k`, `B0` = `b` % `beta`^`k` + */ + internal_shrmod(B1, B0, b, k * _DIGIT_BITS) or_return + + /* + (Q1, R1) = RecursiveDivRem(A / beta^(2k), B1) + */ + internal_shrmod(A1, t, a, 2 * k * _DIGIT_BITS) or_return + _private_div_recursion(Q1, R1, A1, B1) or_return + + /* + A1 = (R1 * beta^(2k)) + (A % beta^(2k)) - (Q1 * B0 * beta^k) + */ + _private_int_shl_leg(R1, 2 * k) or_return + internal_add(A1, R1, t) or_return + internal_mul(t, Q1, B0) or_return + + /* + While A1 < 0 do Q1 = Q1 - 1, A1 = A1 + (beta^k * B) + */ + if internal_lt(A1, 0) { + internal_shl(t, b, k * _DIGIT_BITS) or_return + + for { + internal_decr(Q1) or_return + internal_add(A1, A1, t) or_return + if internal_gte(A1, 0) { break } + } + } + + /* + (Q0, R0) = RecursiveDivRem(A1 / beta^(k), B1) + */ + internal_shrmod(A1, t, A1, k * _DIGIT_BITS) or_return + _private_div_recursion(Q0, R0, A1, B1) or_return + + /* + A2 = (R0*beta^k) + (A1 % beta^k) - (Q0*B0) + */ + _private_int_shl_leg(R0, k) or_return + internal_add(A2, R0, t) or_return + internal_mul(t, Q0, B0) or_return + internal_sub(A2, A2, t) or_return + + /* + While A2 < 0 do Q0 = Q0 - 1, A2 = A2 + B. + */ + for internal_is_negative(A2) { // internal_lt(A2, 0) { + internal_decr(Q0) or_return + internal_add(A2, A2, b) or_return + } + + /* + Return q = (Q1*beta^k) + Q0, r = A2. + */ + _private_int_shl_leg(Q1, k) or_return + internal_add(quotient, Q1, Q0) or_return + + return internal_copy(remainder, A2) +} + +_private_int_div_recursive :: proc(quotient, remainder, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + A, B, Q, Q1, R, A_div, A_mod := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(A, B, Q, Q1, R, A_div, A_mod) + + internal_init_multi(A, B, Q, Q1, R, A_div, A_mod) or_return + + /* + Most significant bit of a limb. + Assumes _DIGIT_MAX < (sizeof(DIGIT) * sizeof(u8)). + */ + msb := (_DIGIT_MAX + DIGIT(1)) >> 1 + sigma := 0 + msb_b := b.digit[b.used - 1] + for msb_b < msb { + sigma += 1 + msb_b <<= 1 + } + + /* + Use that sigma to normalize B. + */ + internal_shl(B, b, sigma) or_return + internal_shl(A, a, sigma) or_return + + /* + Fix the sign. + */ + neg := a.sign != b.sign + A.sign = .Zero_or_Positive; B.sign = .Zero_or_Positive + + /* + If the magnitude of "A" is not more more than twice that of "B" we can work + on them directly, otherwise we need to work at "A" in chunks. + */ + n := B.used + m := A.used - B.used + + /* + Q = 0. We already ensured that when we called `internal_init_multi`. + */ + for m > n { + /* + (q, r) = RecursiveDivRem(A / (beta^(m-n)), B) + */ + j := (m - n) * _DIGIT_BITS + internal_shrmod(A_div, A_mod, A, j) or_return + _private_div_recursion(Q1, R, A_div, B) or_return + + /* + Q = (Q*beta!(n)) + q + */ + internal_shl(Q, Q, n * _DIGIT_BITS) or_return + internal_add(Q, Q, Q1) or_return + + /* + A = (r * beta^(m-n)) + (A % beta^(m-n)) + */ + internal_shl(R, R, (m - n) * _DIGIT_BITS) or_return + internal_add(A, R, A_mod) or_return + + /* + m = m - n + */ + m -= n + } + + /* + (q, r) = RecursiveDivRem(A, B) + */ + _private_div_recursion(Q1, R, A, B) or_return + + /* + Q = (Q * beta^m) + q, R = r + */ + internal_shl(Q, Q, m * _DIGIT_BITS) or_return + internal_add(Q, Q, Q1) or_return + + /* + Get sign before writing to dest. + */ + R.sign = .Zero_or_Positive if internal_is_zero(Q) else a.sign + + if quotient != nil { + swap(quotient, Q) + quotient.sign = .Negative if neg else .Zero_or_Positive + } + if remainder != nil { + /* + De-normalize the remainder. + */ + internal_shrmod(R, nil, R, sigma) or_return + swap(remainder, R) + } + return nil +} + +/* + Slower bit-bang division... also smaller. +*/ +@(deprecated="Use `_int_div_school`, it's 3.5x faster.") +_private_int_div_small :: proc(quotient, remainder, numerator, denominator: ^Int) -> (err: Error) { + + ta, tb, tq, q := &Int{}, &Int{}, &Int{}, &Int{} + + defer internal_destroy(ta, tb, tq, q) + + for { + internal_one(tq) or_return + + num_bits, _ := count_bits(numerator) + den_bits, _ := count_bits(denominator) + n := num_bits - den_bits + + abs(ta, numerator) or_return + abs(tb, denominator) or_return + shl(tb, tb, n) or_return + shl(tq, tq, n) or_return + + for n >= 0 { + if internal_gte(ta, tb) { + // ta -= tb + sub(ta, ta, tb) or_return + // q += tq + add( q, q, tq) or_return + } + shr1(tb, tb) or_return + shr1(tq, tq) or_return + + n -= 1 + } + + /* + Now q == quotient and ta == remainder. + */ + neg := numerator.sign != denominator.sign + if quotient != nil { + swap(quotient, q) + z, _ := is_zero(quotient) + quotient.sign = .Negative if neg && !z else .Zero_or_Positive + } + if remainder != nil { + swap(remainder, ta) + z, _ := is_zero(numerator) + remainder.sign = .Zero_or_Positive if z else numerator.sign + } + + break + } + return err +} + + + +/* + Binary split factorial algo due to: http://www.luschny.de/math/factorial/binarysplitfact.html +*/ +_private_int_factorial_binary_split :: proc(res: ^Int, n: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + inner, outer, start, stop, temp := &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(inner, outer, start, stop, temp) + + internal_one(inner, false) or_return + internal_one(outer, false) or_return + + bits_used := ilog2(n) + + for i := bits_used; i >= 0; i -= 1 { + start := (n >> (uint(i) + 1)) + 1 | 1 + stop := (n >> uint(i)) + 1 | 1 + _private_int_recursive_product(temp, start, stop, 0) or_return + internal_mul(inner, inner, temp) or_return + internal_mul(outer, outer, inner) or_return + } + shift := n - intrinsics.count_ones(n) + + return internal_shl(res, outer, int(shift)) +} + +/* + Recursive product used by binary split factorial algorithm. +*/ +_private_int_recursive_product :: proc(res: ^Int, start, stop: int, level := int(0), allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + t1, t2 := &Int{}, &Int{} + defer internal_destroy(t1, t2) + + if level > FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { + return .Max_Iterations_Reached + } + + num_factors := (stop - start) >> 1 + if num_factors == 2 { + internal_set(t1, start, false) or_return + when true { + internal_grow(t2, t1.used + 1, false) or_return + internal_add(t2, t1, 2) or_return + } else { + internal_add(t2, t1, 2) or_return + } + return internal_mul(res, t1, t2) + } + + if num_factors > 1 { + mid := (start + num_factors) | 1 + _private_int_recursive_product(t1, start, mid, level + 1) or_return + _private_int_recursive_product(t2, mid, stop, level + 1) or_return + return internal_mul(res, t1, t2) + } + + if num_factors == 1 { + return #force_inline internal_set(res, start, true) + } + + return #force_inline internal_one(res, true) +} + +/* + Internal function computing both GCD using the binary method, + and, if target isn't `nil`, also LCM. + + Expects the `a` and `b` to have been initialized + and one or both of `res_gcd` or `res_lcm` not to be `nil`. + + If both `a` and `b` are zero, return zero. + If either `a` or `b`, return the other one. + + The `gcd` and `lcm` wrappers have already done this test, + but `gcd_lcm` wouldn't have, so we still need to perform it. + + If neither result is wanted, we have nothing to do. +*/ +_private_int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + if res_gcd == nil && res_lcm == nil { + return nil + } + + /* + We need a temporary because `res_gcd` is allowed to be `nil`. + */ + if a.used == 0 && b.used == 0 { + /* + GCD(0, 0) and LCM(0, 0) are both 0. + */ + if res_gcd != nil { + internal_zero(res_gcd) or_return + } + if res_lcm != nil { + internal_zero(res_lcm) or_return + } + return nil + } else if a.used == 0 { + /* + We can early out with GCD = B and LCM = 0 + */ + if res_gcd != nil { + internal_abs(res_gcd, b) or_return + } + if res_lcm != nil { + internal_zero(res_lcm) or_return + } + return nil + } else if b.used == 0 { + /* + We can early out with GCD = A and LCM = 0 + */ + if res_gcd != nil { + internal_abs(res_gcd, a) or_return + } + if res_lcm != nil { + internal_zero(res_lcm) or_return + } + return nil + } + + temp_gcd_res := &Int{} + defer internal_destroy(temp_gcd_res) + + /* + If neither `a` or `b` was zero, we need to compute `gcd`. + Get copies of `a` and `b` we can modify. + */ + u, v := &Int{}, &Int{} + defer internal_destroy(u, v) + internal_copy(u, a) or_return + internal_copy(v, b) or_return + + /* + Must be positive for the remainder of the algorithm. + */ + u.sign = .Zero_or_Positive; v.sign = .Zero_or_Positive + + /* + B1. Find the common power of two for `u` and `v`. + */ + u_lsb, _ := internal_count_lsb(u) + v_lsb, _ := internal_count_lsb(v) + k := min(u_lsb, v_lsb) + + if k > 0 { + /* + Divide the power of two out. + */ + internal_shr(u, u, k) or_return + internal_shr(v, v, k) or_return + } + + /* + Divide any remaining factors of two out. + */ + if u_lsb != k { + internal_shr(u, u, u_lsb - k) or_return + } + if v_lsb != k { + internal_shr(v, v, v_lsb - k) or_return + } + + for v.used != 0 { + /* + Make sure `v` is the largest. + */ + if internal_gt(u, v) { + /* + Swap `u` and `v` to make sure `v` is >= `u`. + */ + internal_swap(u, v) + } + + /* + Subtract smallest from largest. + */ + internal_sub(v, v, u) or_return + + /* + Divide out all factors of two. + */ + b, _ := internal_count_lsb(v) + internal_shr(v, v, b) or_return + } + + /* + Multiply by 2**k which we divided out at the beginning. + */ + internal_shl(temp_gcd_res, u, k) or_return + temp_gcd_res.sign = .Zero_or_Positive + + /* + We've computed `gcd`, either the long way, or because one of the inputs was zero. + If we don't want `lcm`, we're done. + */ + if res_lcm == nil { + internal_swap(temp_gcd_res, res_gcd) + return nil + } + + /* + Computes least common multiple as `|a*b|/gcd(a,b)` + Divide the smallest by the GCD. + */ + if internal_lt_abs(a, b) { + /* + Store quotient in `t2` such that `t2 * b` is the LCM. + */ + internal_div(res_lcm, a, temp_gcd_res) or_return + err = internal_mul(res_lcm, res_lcm, b) + } else { + /* + Store quotient in `t2` such that `t2 * a` is the LCM. + */ + internal_div(res_lcm, b, temp_gcd_res) or_return + err = internal_mul(res_lcm, res_lcm, a) + } + + if res_gcd != nil { + internal_swap(temp_gcd_res, res_gcd) + } + + /* + Fix the sign to positive and return. + */ + res_lcm.sign = .Zero_or_Positive + return err +} + +/* + Internal implementation of log. + Assumes `a` not to be `nil` and to have been initialized. +*/ +_private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) { + bracket_low, bracket_high, bracket_mid, t, bi_base := &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(bracket_low, bracket_high, bracket_mid, t, bi_base) + + ic := #force_inline internal_cmp(a, base) + if ic == -1 || ic == 0 { + return 1 if ic == 0 else 0, nil + } + defer if err != nil { + res = -1 + } + + internal_set(bi_base, base, true, allocator) or_return + internal_clear(bracket_mid, false, allocator) or_return + internal_clear(t, false, allocator) or_return + internal_one(bracket_low, false, allocator) or_return + internal_set(bracket_high, base, false, allocator) or_return + + low := 0; high := 1 + + /* + A kind of Giant-step/baby-step algorithm. + Idea shamelessly stolen from https://programmingpraxis.com/2010/05/07/integer-logarithms/2/ + The effect is asymptotic, hence needs benchmarks to test if the Giant-step should be skipped + for small n. + */ + + for { + /* + Iterate until `a` is bracketed between low + high. + */ + if #force_inline internal_gte(bracket_high, a) { break } + + low = high + #force_inline internal_copy(bracket_low, bracket_high) or_return + high <<= 1 + #force_inline internal_sqr(bracket_high, bracket_high) or_return + } + + for (high - low) > 1 { + mid := (high + low) >> 1 + + #force_inline internal_pow(t, bi_base, mid - low) or_return + + #force_inline internal_mul(bracket_mid, bracket_low, t) or_return + + mc := #force_inline internal_cmp(a, bracket_mid) + switch mc { + case -1: + high = mid + internal_swap(bracket_mid, bracket_high) + case 0: + return mid, nil + case 1: + low = mid + internal_swap(bracket_mid, bracket_low) + } + } + + fc := #force_inline internal_cmp(bracket_high, a) + res = high if fc == 0 else low + + return +} + +/* + Computes xR**-1 == x (mod N) via Montgomery Reduction. + This is an optimized implementation of `internal_montgomery_reduce` + which uses the comba method to quickly calculate the columns of the reduction. + Based on Algorithm 14.32 on pp.601 of HAC. +*/ +_private_montgomery_reduce_comba :: proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + W: [_WARRAY]_WORD = --- + + if x.used > _WARRAY { return .Invalid_Argument } + + /* + Get old used count. + */ + old_used := x.used + + /* + Grow `x` as required. + */ + internal_grow(x, n.used + 1) or_return + + /* + First we have to get the digits of the input into an array of double precision words W[...] + Copy the digits of `x` into W[0..`x.used` - 1] + */ + ix: int + for ix = 0; ix < x.used; ix += 1 { + W[ix] = _WORD(x.digit[ix]) + } + + /* + Zero the high words of W[a->used..m->used*2]. + */ + zero_upper := (n.used * 2) + 1 + if ix < zero_upper { + for ix = x.used; ix < zero_upper; ix += 1 { + W[ix] = {} + } + } + + /* + Now we proceed to zero successive digits from the least significant upwards. + */ + for ix = 0; ix < n.used; ix += 1 { + /* + `mu = ai * m' mod b` + + We avoid a double precision multiplication (which isn't required) + by casting the value down to a DIGIT. Note this requires + that W[ix-1] have the carry cleared (see after the inner loop) + */ + mu := ((W[ix] & _WORD(_MASK)) * _WORD(rho)) & _WORD(_MASK) + + /* + `a = a + mu * m * b**i` + + This is computed in place and on the fly. The multiplication + by b**i is handled by offseting which columns the results + are added to. + + Note the comba method normally doesn't handle carries in the + inner loop In this case we fix the carry from the previous + column since the Montgomery reduction requires digits of the + result (so far) [see above] to work. + + This is handled by fixing up one carry after the inner loop. + The carry fixups are done in order so after these loops the + first m->used words of W[] have the carries fixed. + */ + for iy := 0; iy < n.used; iy += 1 { + W[ix + iy] += mu * _WORD(n.digit[iy]) + } + + /* + Now fix carry for next digit, W[ix+1]. + */ + W[ix + 1] += (W[ix] >> _DIGIT_BITS) + } + + /* + Now we have to propagate the carries and shift the words downward + [all those least significant digits we zeroed]. + */ + + for ; ix < n.used * 2; ix += 1 { + W[ix + 1] += (W[ix] >> _DIGIT_BITS) + } + + /* copy out, A = A/b**n + * + * The result is A/b**n but instead of converting from an + * array of mp_word to mp_digit than calling mp_rshd + * we just copy them in the right order + */ + + for ix = 0; ix < (n.used + 1); ix += 1 { + x.digit[ix] = DIGIT(W[n.used + ix] & _WORD(_MASK)) + } + + /* + Set the max used. + */ + x.used = n.used + 1 + + /* + Zero old_used digits, if the input a was larger than m->used+1 we'll have to clear the digits. + */ + internal_zero_unused(x, old_used) + internal_clamp(x) + + /* + if A >= m then A = A - m + */ + if internal_gte_abs(x, n) { + return internal_sub(x, x, n) + } + return nil +} + +/* + Computes xR**-1 == x (mod N) via Montgomery Reduction. + Assumes `x` and `n` not to be nil. +*/ +_private_int_montgomery_reduce :: proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + /* + Can the fast reduction [comba] method be used? + Note that unlike in mul, you're safely allowed *less* than the available columns [255 per default], + since carries are fixed up in the inner loop. + */ + internal_clear_if_uninitialized(x, n) or_return + + digs := (n.used * 2) + 1 + if digs < _WARRAY && x.used <= _WARRAY && n.used < _MAX_COMBA { + return _private_montgomery_reduce_comba(x, n, rho) + } + + /* + Grow the input as required + */ + internal_grow(x, digs) or_return + x.used = digs + + for ix := 0; ix < n.used; ix += 1 { + /* + `mu = ai * rho mod b` + The value of rho must be precalculated via `int_montgomery_setup()`, + such that it equals -1/n0 mod b this allows the following inner loop + to reduce the input one digit at a time. + */ + + mu := DIGIT((_WORD(x.digit[ix]) * _WORD(rho)) & _WORD(_MASK)) + + /* + a = a + mu * m * b**i + Multiply and add in place. + */ + u := DIGIT(0) + iy := int(0) + for ; iy < n.used; iy += 1 { + /* + Compute product and sum. + */ + r := (_WORD(mu) * _WORD(n.digit[iy]) + _WORD(u) + _WORD(x.digit[ix + iy])) + + /* + Get carry. + */ + u = DIGIT(r >> _DIGIT_BITS) + + /* + Fix digit. + */ + x.digit[ix + iy] = DIGIT(r & _WORD(_MASK)) + } + + /* + At this point the ix'th digit of x should be zero. + Propagate carries upwards as required. + */ + for u != 0 { + x.digit[ix + iy] += u + u = x.digit[ix + iy] >> _DIGIT_BITS + x.digit[ix + iy] &= _MASK + iy += 1 + } + } + + /* + At this point the n.used'th least significant digits of x are all zero, + which means we can shift x to the right by n.used digits and the + residue is unchanged. + + x = x/b**n.used. + */ + internal_clamp(x) + _private_int_shr_leg(x, n.used) + + /* + if x >= n then x = x - n + */ + if internal_gte_abs(x, n) { + return internal_sub(x, x, n) + } + + return nil +} + +/* + Shifts with subtractions when the result is greater than b. + + The method is slightly modified to shift B unconditionally upto just under + the leading bit of b. This saves alot of multiple precision shifting. + + Assumes `a` and `b` not to be `nil`. +*/ +_private_int_montgomery_calc_normalization :: proc(a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + /* + How many bits of last digit does b use. + */ + internal_clear_if_uninitialized(a, b) or_return + + bits := internal_count_bits(b) % _DIGIT_BITS + + if b.used > 1 { + power := ((b.used - 1) * _DIGIT_BITS) + bits - 1 + internal_int_power_of_two(a, power) or_return + } else { + internal_one(a) or_return + bits = 1 + } + + /* + Now compute C = A * B mod b. + */ + for x := bits - 1; x < _DIGIT_BITS; x += 1 { + internal_int_shl1(a, a) or_return + if internal_gte_abs(a, b) { + internal_sub(a, a, b) or_return + } + } + return nil +} + +/* + Sets up the Montgomery reduction stuff. +*/ +_private_int_montgomery_setup :: proc(n: ^Int, allocator := context.allocator) -> (rho: DIGIT, err: Error) { + /* + Fast inversion mod 2**k + Based on the fact that: + + XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) + => 2*X*A - X*X*A*A = 1 + => 2*(1) - (1) = 1 + */ + internal_clear_if_uninitialized(n, allocator) or_return + + b := n.digit[0] + if b & 1 == 0 { return 0, .Invalid_Argument } + + x := (((b + 2) & 4) << 1) + b /* here x*a==1 mod 2**4 */ + x *= 2 - (b * x) /* here x*a==1 mod 2**8 */ + x *= 2 - (b * x) /* here x*a==1 mod 2**16 */ + + when _DIGIT_TYPE_BITS == 64 { + x *= 2 - (b * x) /* here x*a==1 mod 2**32 */ + x *= 2 - (b * x) /* here x*a==1 mod 2**64 */ + } + + /* + rho = -1/m mod b + */ + rho = DIGIT(((_WORD(1) << _WORD(_DIGIT_BITS)) - _WORD(x)) & _WORD(_MASK)) + return rho, nil +} + +/* + Reduces `x` mod `m`, assumes 0 < x < m**2, mu is precomputed via reduce_setup. + From HAC pp.604 Algorithm 14.42 + + Assumes `x`, `m` and `mu` all not to be `nil` and have been initialized. +*/ +_private_int_reduce :: proc(x, m, mu: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + q := &Int{} + defer internal_destroy(q) + um := m.used + + /* + q = x + */ + internal_copy(q, x) or_return + + /* + q1 = x / b**(k-1) + */ + _private_int_shr_leg(q, um - 1) + + /* + According to HAC this optimization is ok. + */ + if DIGIT(um) > DIGIT(1) << (_DIGIT_BITS - 1) { + internal_mul(q, q, mu) or_return + } else { + _private_int_mul_high(q, q, mu, um) or_return + } + + /* + q3 = q2 / b**(k+1) + */ + _private_int_shr_leg(q, um + 1) + + /* + x = x mod b**(k+1), quick (no division) + */ + internal_int_mod_bits(x, x, _DIGIT_BITS * (um + 1)) or_return + + /* + q = q * m mod b**(k+1), quick (no division) + */ + _private_int_mul(q, q, m, um + 1) or_return + + /* + x = x - q + */ + internal_sub(x, x, q) or_return + + /* + If x < 0, add b**(k+1) to it. + */ + if internal_is_negative(x) { + internal_set(q, 1) or_return + _private_int_shl_leg(q, um + 1) or_return + internal_add(x, x, q) or_return + } + + /* + Back off if it's too big. + */ + for internal_gte(x, m) { + internal_sub(x, x, m) or_return + } + + return nil +} + +/* + Reduces `a` modulo `n`, where `n` is of the form 2**p - d. +*/ +_private_int_reduce_2k :: proc(a, n: ^Int, d: DIGIT, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + q := &Int{} + defer internal_destroy(q) + + internal_zero(q) or_return + + p := internal_count_bits(n) + + for { + /* + q = a/2**p, a = a mod 2**p + */ + internal_shrmod(q, a, a, p) or_return + + if d != 1 { + /* + q = q * d + */ + internal_mul(q, q, d) or_return + } + + /* + a = a + q + */ + internal_add(a, a, q) or_return + if internal_lt_abs(a, n) { break } + internal_sub(a, a, n) or_return + } + + return nil +} + +/* + Reduces `a` modulo `n` where `n` is of the form 2**p - d + This differs from reduce_2k since "d" can be larger than a single digit. +*/ +_private_int_reduce_2k_l :: proc(a, n, d: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + q := &Int{} + defer internal_destroy(q) + + internal_zero(q) or_return + + p := internal_count_bits(n) + + for { + /* + q = a/2**p, a = a mod 2**p + */ + internal_shrmod(q, a, a, p) or_return + + /* + q = q * d + */ + internal_mul(q, q, d) or_return + + /* + a = a + q + */ + internal_add(a, a, q) or_return + if internal_lt_abs(a, n) { break } + internal_sub(a, a, n) or_return + } + + return nil +} + +/* + Determines if `internal_int_reduce_2k` can be used. + Asssumes `a` not to be `nil` and to have been initialized. +*/ +_private_int_reduce_is_2k :: proc(a: ^Int) -> (reducible: bool, err: Error) { + assert_if_nil(a) + + if internal_is_zero(a) { + return false, nil + } else if a.used == 1 { + return true, nil + } else if a.used > 1 { + iy := internal_count_bits(a) + iw := 1 + iz := DIGIT(1) + + /* + Test every bit from the second digit up, must be 1. + */ + for ix := _DIGIT_BITS; ix < iy; ix += 1 { + if a.digit[iw] & iz == 0 { + return false, nil + } + + iz <<= 1 + if iz > _DIGIT_MAX { + iw += 1 + iz = 1 + } + } + return true, nil + } else { + return true, nil + } +} + +/* + Determines if `internal_int_reduce_2k_l` can be used. + Asssumes `a` not to be `nil` and to have been initialized. +*/ +_private_int_reduce_is_2k_l :: proc(a: ^Int) -> (reducible: bool, err: Error) { + assert_if_nil(a) + + if internal_int_is_zero(a) { + return false, nil + } else if a.used == 1 { + return true, nil + } else if a.used > 1 { + /* + If more than half of the digits are -1 we're sold. + */ + ix := 0 + iy := 0 + + for ; ix < a.used; ix += 1 { + if a.digit[ix] == _DIGIT_MAX { + iy += 1 + } + } + return iy >= (a.used / 2), nil + } else { + return false, nil + } +} + +/* + Determines the setup value. + Assumes `a` is not `nil`. +*/ +_private_int_reduce_2k_setup :: proc(a: ^Int, allocator := context.allocator) -> (d: DIGIT, err: Error) { + context.allocator = allocator + + tmp := &Int{} + defer internal_destroy(tmp) + internal_zero(tmp) or_return + + internal_int_power_of_two(tmp, internal_count_bits(a)) or_return + internal_sub(tmp, tmp, a) or_return + + return tmp.digit[0], nil +} + +/* + Determines the setup value. + Assumes `mu` and `P` are not `nil`. + + d := (1 << a.bits) - a; +*/ +_private_int_reduce_2k_setup_l :: proc(mu, P: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + tmp := &Int{} + defer internal_destroy(tmp) + internal_zero(tmp) or_return + + internal_int_power_of_two(tmp, internal_count_bits(P)) or_return + internal_sub(mu, tmp, P) or_return + + return nil +} + +/* + Pre-calculate the value required for Barrett reduction. + For a given modulus "P" it calulates the value required in "mu" + Assumes `mu` and `P` are not `nil`. +*/ +_private_int_reduce_setup :: proc(mu, P: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + internal_int_power_of_two(mu, P.used * 2 * _DIGIT_BITS) or_return + return internal_int_div(mu, mu, P) +} + +/* + Determines the setup value. + Assumes `a` to not be `nil` and to have been initialized. +*/ +_private_int_dr_setup :: proc(a: ^Int) -> (d: DIGIT) { + /* + The casts are required if _DIGIT_BITS is one less than + the number of bits in a DIGIT [e.g. _DIGIT_BITS==31]. + */ + return DIGIT((1 << _DIGIT_BITS) - a.digit[0]) +} + +/* + Determines if a number is a valid DR modulus. + Assumes `a` to not be `nil` and to have been initialized. +*/ +_private_dr_is_modulus :: proc(a: ^Int) -> (res: bool) { + /* + Must be at least two digits. + */ + if a.used < 2 { return false } + + /* + Must be of the form b**k - a [a <= b] so all but the first digit must be equal to -1 (mod b). + */ + for ix := 1; ix < a.used; ix += 1 { + if a.digit[ix] != _MASK { + return false + } + } + return true +} + +/* + Reduce "x" in place modulo "n" using the Diminished Radix algorithm. + Based on algorithm from the paper + + "Generating Efficient Primes for Discrete Log Cryptosystems" + Chae Hoon Lim, Pil Joong Lee, + POSTECH Information Research Laboratories + + The modulus must be of a special format [see manual]. + Has been modified to use algorithm 7.10 from the LTM book instead + + Input x must be in the range 0 <= x <= (n-1)**2 + Assumes `x` and `n` to not be `nil` and to have been initialized. +*/ +_private_int_dr_reduce :: proc(x, n: ^Int, k: DIGIT, allocator := context.allocator) -> (err: Error) { + /* + m = digits in modulus. + */ + m := n.used + + /* + Ensure that "x" has at least 2m digits. + */ + internal_grow(x, m + m) or_return + + /* + Top of loop, this is where the code resumes if another reduction pass is required. + */ + for { + i: int + mu := DIGIT(0) + + /* + Compute (x mod B**m) + k * [x/B**m] inline and inplace. + */ + for i = 0; i < m; i += 1 { + r := _WORD(x.digit[i + m]) * _WORD(k) + _WORD(x.digit[i] + mu) + x.digit[i] = DIGIT(r & _WORD(_MASK)) + mu = DIGIT(r >> _WORD(_DIGIT_BITS)) + } + + /* + Set final carry. + */ + x.digit[i] = mu + + /* + Zero words above m. + */ + mem.zero_slice(x.digit[m + 1:][:x.used - m]) + + /* + Clamp, sub and return. + */ + internal_clamp(x) or_return + + /* + If x >= n then subtract and reduce again. + Each successive "recursion" makes the input smaller and smaller. + */ + if internal_lt_abs(x, n) { break } + + internal_sub(x, x, n) or_return + } + return nil +} + +/* + Computes res == G**X mod P. + Assumes `res`, `G`, `X` and `P` to not be `nil` and for `G`, `X` and `P` to have been initialized. +*/ +_private_int_exponent_mod :: proc(res, G, X, P: ^Int, redmode: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + M := [_TAB_SIZE]Int{} + winsize: uint + + /* + Use a pointer to the reduction algorithm. + This allows us to use one of many reduction algorithms without modding the guts of the code with if statements everywhere. + */ + redux: #type proc(x, m, mu: ^Int, allocator := context.allocator) -> (err: Error) + + defer { + internal_destroy(&M[1]) + for x := 1 << (winsize - 1); x < (1 << winsize); x += 1 { + internal_destroy(&M[x]) + } + } + + /* + Find window size. + */ + x := internal_count_bits(X) + switch { + case x <= 7: + winsize = 2 + case x <= 36: + winsize = 3 + case x <= 140: + winsize = 4 + case x <= 450: + winsize = 5 + case x <= 1303: + winsize = 6 + case x <= 3529: + winsize = 7 + case: + winsize = 8 + } + + winsize = min(_MAX_WIN_SIZE, winsize) if _MAX_WIN_SIZE > 0 else winsize + + /* + Init M array. + Init first cell. + */ + internal_zero(&M[1]) or_return + + /* + Now init the second half of the array. + */ + for x = 1 << (winsize - 1); x < (1 << winsize); x += 1 { + internal_zero(&M[x]) or_return + } + + /* + Create `mu`, used for Barrett reduction. + */ + mu := &Int{} + defer internal_destroy(mu) + internal_zero(mu) or_return + + if redmode == 0 { + _private_int_reduce_setup(mu, P) or_return + redux = _private_int_reduce + } else { + _private_int_reduce_2k_setup_l(mu, P) or_return + redux = _private_int_reduce_2k_l + } + + /* + Create M table. + + The M table contains powers of the base, e.g. M[x] = G**x mod P. + The first half of the table is not computed, though, except for M[0] and M[1]. + */ + internal_int_mod(&M[1], G, P) or_return + + /* + Compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times. + + TODO: This can probably be replaced by computing the power and using `pow` to raise to it + instead of repeated squaring. + */ + slot := 1 << (winsize - 1) + internal_copy(&M[slot], &M[1]) or_return + + for x = 0; x < int(winsize - 1); x += 1 { + /* + Square it. + */ + internal_sqr(&M[slot], &M[slot]) or_return + + /* + Reduce modulo P + */ + redux(&M[slot], P, mu) or_return + } + + /* + Create upper table, that is M[x] = M[x-1] * M[1] (mod P) + for x = (2**(winsize - 1) + 1) to (2**winsize - 1) + */ + for x = slot + 1; x < (1 << winsize); x += 1 { + internal_mul(&M[x], &M[x - 1], &M[1]) or_return + redux(&M[x], P, mu) or_return + } + + /* + Setup result. + */ + internal_one(res) or_return + + /* + Set initial mode and bit cnt. + */ + mode := 0 + bitcnt := 1 + buf := DIGIT(0) + digidx := X.used - 1 + bitcpy := uint(0) + bitbuf := DIGIT(0) + + for { + /* + Grab next digit as required. + */ + bitcnt -= 1 + if bitcnt == 0 { + /* + If digidx == -1 we are out of digits. + */ + if digidx == -1 { break } + + /* + Read next digit and reset the bitcnt. + */ + buf = X.digit[digidx] + digidx -= 1 + bitcnt = _DIGIT_BITS + } + + /* + Grab the next msb from the exponent. + */ + y := buf >> (_DIGIT_BITS - 1) & 1 + buf <<= 1 + + /* + If the bit is zero and mode == 0 then we ignore it. + These represent the leading zero bits before the first 1 bit + in the exponent. Technically this opt is not required but it + does lower the # of trivial squaring/reductions used. + */ + if mode == 0 && y == 0 { + continue + } + + /* + If the bit is zero and mode == 1 then we square. + */ + if mode == 1 && y == 0 { + internal_sqr(res, res) or_return + redux(res, P, mu) or_return + continue + } + + /* + Else we add it to the window. + */ + bitcpy += 1 + bitbuf |= (y << (winsize - bitcpy)) + mode = 2 + + if (bitcpy == winsize) { + /* + Window is filled so square as required and multiply. + Square first. + */ + for x = 0; x < int(winsize); x += 1 { + internal_sqr(res, res) or_return + redux(res, P, mu) or_return + } + + /* + Then multiply. + */ + internal_mul(res, res, &M[bitbuf]) or_return + redux(res, P, mu) or_return + + /* + Empty window and reset. + */ + bitcpy = 0 + bitbuf = 0 + mode = 1 + } + } + + /* + If bits remain then square/multiply. + */ + if mode == 2 && bitcpy > 0 { + /* + Square then multiply if the bit is set. + */ + for x = 0; x < int(bitcpy); x += 1 { + internal_sqr(res, res) or_return + redux(res, P, mu) or_return + + bitbuf <<= 1 + if ((bitbuf & (1 << winsize)) != 0) { + /* + Then multiply. + */ + internal_mul(res, res, &M[1]) or_return + redux(res, P, mu) or_return + } + } + } + return err +} + +/* + Computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 + + Uses a left-to-right `k`-ary sliding window to compute the modular exponentiation. + The value of `k` changes based on the size of the exponent. + + Uses Montgomery or Diminished Radix reduction [whichever appropriate] + + Assumes `res`, `G`, `X` and `P` to not be `nil` and for `G`, `X` and `P` to have been initialized. +*/ +_private_int_exponent_mod_fast :: proc(res, G, X, P: ^Int, redmode: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + M := [_TAB_SIZE]Int{} + winsize: uint + + /* + Use a pointer to the reduction algorithm. + This allows us to use one of many reduction algorithms without modding the guts of the code with if statements everywhere. + */ + redux: #type proc(x, n: ^Int, rho: DIGIT, allocator := context.allocator) -> (err: Error) + + defer { + internal_destroy(&M[1]) + for x := 1 << (winsize - 1); x < (1 << winsize); x += 1 { + internal_destroy(&M[x]) + } + } + + /* + Find window size. + */ + x := internal_count_bits(X) + switch { + case x <= 7: + winsize = 2 + case x <= 36: + winsize = 3 + case x <= 140: + winsize = 4 + case x <= 450: + winsize = 5 + case x <= 1303: + winsize = 6 + case x <= 3529: + winsize = 7 + case: + winsize = 8 + } + + winsize = min(_MAX_WIN_SIZE, winsize) if _MAX_WIN_SIZE > 0 else winsize + + /* + Init M array + Init first cell. + */ + cap := internal_int_allocated_cap(P) + internal_grow(&M[1], cap) or_return + + /* + Now init the second half of the array. + */ + for x = 1 << (winsize - 1); x < (1 << winsize); x += 1 { + internal_grow(&M[x], cap) or_return + } + + /* + Determine and setup reduction code. + */ + rho: DIGIT + + if redmode == 0 { + /* + Now setup Montgomery. + */ + rho = _private_int_montgomery_setup(P) or_return + + /* + Automatically pick the comba one if available (saves quite a few calls/ifs). + */ + if ((P.used * 2) + 1) < _WARRAY && P.used < _MAX_COMBA { + redux = _private_montgomery_reduce_comba + } else { + /* + Use slower baseline Montgomery method. + */ + redux = _private_int_montgomery_reduce + } + } else if redmode == 1 { + /* + Setup DR reduction for moduli of the form B**k - b. + */ + rho = _private_int_dr_setup(P) + redux = _private_int_dr_reduce + } else { + /* + Setup DR reduction for moduli of the form 2**k - b. + */ + rho = _private_int_reduce_2k_setup(P) or_return + redux = _private_int_reduce_2k + } + + /* + Setup result. + */ + internal_grow(res, cap) or_return + + /* + Create M table + The first half of the table is not computed, though, except for M[0] and M[1] + */ + + if redmode == 0 { + /* + Now we need R mod m. + */ + _private_int_montgomery_calc_normalization(res, P) or_return + + /* + Now set M[1] to G * R mod m. + */ + internal_mulmod(&M[1], G, res, P) or_return + } else { + internal_one(res) or_return + internal_mod(&M[1], G, P) or_return + } + + /* + Compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times. + */ + slot := 1 << (winsize - 1) + internal_copy(&M[slot], &M[1]) or_return + + for x = 0; x < int(winsize - 1); x += 1 { + internal_sqr(&M[slot], &M[slot]) or_return + redux(&M[slot], P, rho) or_return + } + + /* + Create upper table. + */ + for x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x += 1 { + internal_mul(&M[x], &M[x - 1], &M[1]) or_return + redux(&M[x], P, rho) or_return + } + + /* + Set initial mode and bit cnt. + */ + mode := 0 + bitcnt := 1 + buf := DIGIT(0) + digidx := X.used - 1 + bitcpy := 0 + bitbuf := DIGIT(0) + + for { + /* + Grab next digit as required. + */ + bitcnt -= 1 + if bitcnt == 0 { + /* + If digidx == -1 we are out of digits so break. + */ + if digidx == -1 { break } + + /* + Read next digit and reset the bitcnt. + */ + buf = X.digit[digidx] + digidx -= 1 + bitcnt = _DIGIT_BITS + } + + /* + Grab the next msb from the exponent. + */ + y := (buf >> (_DIGIT_BITS - 1)) & 1 + buf <<= 1 + + /* + If the bit is zero and mode == 0 then we ignore it. + These represent the leading zero bits before the first 1 bit in the exponent. + Technically this opt is not required but it does lower the # of trivial squaring/reductions used. + */ + if mode == 0 && y == 0 { continue } + + /* + If the bit is zero and mode == 1 then we square. + */ + if mode == 1 && y == 0 { + internal_sqr(res, res) or_return + redux(res, P, rho) or_return + continue + } + + /* + Else we add it to the window. + */ + bitcpy += 1 + bitbuf |= (y << (winsize - uint(bitcpy))) + mode = 2 + + if bitcpy == int(winsize) { + /* + Window is filled so square as required and multiply + Square first. + */ + for x = 0; x < int(winsize); x += 1 { + internal_sqr(res, res) or_return + redux(res, P, rho) or_return + } + + /* + Then multiply. + */ + internal_mul(res, res, &M[bitbuf]) or_return + redux(res, P, rho) or_return + + /* + Empty window and reset. + */ + bitcpy = 0 + bitbuf = 0 + mode = 1 + } + } + + /* + If bits remain then square/multiply. + */ + if mode == 2 && bitcpy > 0 { + /* + Square then multiply if the bit is set. + */ + for x = 0; x < bitcpy; x += 1 { + internal_sqr(res, res) or_return + redux(res, P, rho) or_return + + /* + Get next bit of the window. + */ + bitbuf <<= 1 + if bitbuf & (1 << winsize) != 0 { + /* + Then multiply. + */ + internal_mul(res, res, &M[1]) or_return + redux(res, P, rho) or_return + } + } + } + + if redmode == 0 { + /* + Fixup result if Montgomery reduction is used. + Recall that any value in a Montgomery system is actually multiplied by R mod n. + So we have to reduce one more time to cancel out the factor of R. + */ + redux(res, P, rho) or_return + } + + return nil +} + +/* + hac 14.61, pp608 +*/ +_private_inverse_modulo :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + x, y, u, v, A, B, C, D := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(x, y, u, v, A, B, C, D) + + // `b` cannot be negative. + if b.sign == .Negative || internal_is_zero(b) { + return .Invalid_Argument + } + + // init temps. + internal_init_multi(x, y, u, v, A, B, C, D) or_return + + // `x` = `a` % `b`, `y` = `b` + internal_mod(x, a, b) or_return + internal_copy(y, b) or_return + + // 2. [modified] if x,y are both even then return an error! + if internal_is_even(x) && internal_is_even(y) { + return .Invalid_Argument + } + + // 3. u=x, v=y, A=1, B=0, C=0, D=1 + internal_copy(u, x) or_return + internal_copy(v, y) or_return + internal_one(A) or_return + internal_one(D) or_return + + for { + // 4. while `u` is even do: + for internal_is_even(u) { + // 4.1 `u` = `u` / 2 + internal_int_shr1(u, u) or_return + + // 4.2 if `A` or `B` is odd then: + if internal_is_odd(A) || internal_is_odd(B) { + // `A` = (`A`+`y`) / 2, `B` = (`B`-`x`) / 2 + internal_add(A, A, y) or_return + internal_sub(B, B, x) or_return + } + // `A` = `A` / 2, `B` = `B` / 2 + internal_int_shr1(A, A) or_return + internal_int_shr1(B, B) or_return + } + + // 5. while `v` is even do: + for internal_is_even(v) { + // 5.1 `v` = `v` / 2 + internal_int_shr1(v, v) or_return + + // 5.2 if `C` or `D` is odd then: + if internal_is_odd(C) || internal_is_odd(D) { + // `C` = (`C`+`y`) / 2, `D` = (`D`-`x`) / 2 + internal_add(C, C, y) or_return + internal_sub(D, D, x) or_return + } + // `C` = `C` / 2, `D` = `D` / 2 + internal_int_shr1(C, C) or_return + internal_int_shr1(D, D) or_return + } + + // 6. if `u` >= `v` then: + if internal_cmp(u, v) != -1 { + // `u` = `u` - `v`, `A` = `A` - `C`, `B` = `B` - `D` + internal_sub(u, u, v) or_return + internal_sub(A, A, C) or_return + internal_sub(B, B, D) or_return + } else { + // v - v - u, C = C - A, D = D - B + internal_sub(v, v, u) or_return + internal_sub(C, C, A) or_return + internal_sub(D, D, B) or_return + } + + // If not zero goto step 4 + if internal_is_zero(u) { + break + } + } + + // Now `a` = `C`, `b` = `D`, `gcd` == `g`*`v` + + // If `v` != `1` then there is no inverse. + if !internal_eq(v, 1) { + return .Invalid_Argument + } + + // If its too low. + for internal_is_negative(C) { + internal_add(C, C, b) or_return + } + + // Too big. + for internal_cmp_mag(C, b) > -1 { + internal_sub(C, C, b) or_return + } + + // `C` is now the inverse. + swap(dest, C) + return +} + +/* + Computes the modular inverse via binary extended Euclidean algorithm, that is `dest` = 1 / `a` mod `b`. + + Based on slow invmod except this is optimized for the case where `b` is odd, + as per HAC Note 14.64 on pp. 610. +*/ +_private_inverse_modulo_odd :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + x, y, u, v, B, D := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{} + defer internal_destroy(x, y, u, v, B, D) + + sign: Sign + + /* + 2. [modified] `b` must be odd. + */ + if internal_is_even(b) { return .Invalid_Argument } + + /* + Init all our temps. + */ + internal_init_multi(x, y, u, v, B, D) or_return + + /* + `x` == modulus, `y` == value to invert. + */ + internal_copy(x, b) or_return + + /* + We need `y` = `|a|`. + */ + internal_mod(y, a, b) or_return + + /* + If one of `x`, `y` is zero return an error! + */ + if internal_is_zero(x) || internal_is_zero(y) { return .Invalid_Argument } + + /* + 3. `u` = `x`, `v` = `y`, `A` = 1, `B` = 0, `C` = 0, `D` = 1 + */ + internal_copy(u, x) or_return + internal_copy(v, y) or_return + + internal_one(D) or_return + + for { + /* + 4. while `u` is even do. + */ + for internal_is_even(u) { + /* + 4.1 `u` = `u` / 2 + */ + internal_int_shr1(u, u) or_return + + /* + 4.2 if `B` is odd then: + */ + if internal_is_odd(B) { + /* + `B` = (`B` - `x`) / 2 + */ + internal_sub(B, B, x) or_return + } + + /* + `B` = `B` / 2 + */ + internal_int_shr1(B, B) or_return + } + + /* + 5. while `v` is even do: + */ + for internal_is_even(v) { + /* + 5.1 `v` = `v` / 2 + */ + internal_int_shr1(v, v) or_return + + /* + 5.2 if `D` is odd then: + */ + if internal_is_odd(D) { + /* + `D` = (`D` - `x`) / 2 + */ + internal_sub(D, D, x) or_return + } + /* + `D` = `D` / 2 + */ + internal_int_shr1(D, D) or_return + } + + /* + 6. if `u` >= `v` then: + */ + if internal_cmp(u, v) != -1 { + /* + `u` = `u` - `v`, `B` = `B` - `D` + */ + internal_sub(u, u, v) or_return + internal_sub(B, B, D) or_return + } else { + /* + `v` - `v` - `u`, `D` = `D` - `B` + */ + internal_sub(v, v, u) or_return + internal_sub(D, D, B) or_return + } + + /* + If not zero goto step 4. + */ + if internal_is_zero(u) { break } + } + + /* + Now `a` = C, `b` = D, gcd == g*v + */ + + /* + if `v` != 1 then there is no inverse + */ + if internal_cmp(v, 1) != 0 { + return .Invalid_Argument + } + + /* + `b` is now the inverse. + */ + sign = a.sign + for internal_int_is_negative(D) { + internal_add(D, D, b) or_return + } + + /* + Too big. + */ + for internal_gte_abs(D, b) { + internal_sub(D, D, b) or_return + } + + swap(dest, D) + dest.sign = sign + return nil +} + + +/* + Returns the log2 of an `Int`. + Assumes `a` not to be `nil` and to have been initialized. + Also assumes `base` is a power of two. +*/ +_private_log_power_of_two :: proc(a: ^Int, base: DIGIT) -> (log: int, err: Error) { + base := base + y: int + for y = 0; base & 1 == 0; { + y += 1 + base >>= 1 + } + log = internal_count_bits(a) + return (log - 1) / y, err +} + +/* + Copies DIGITs from `src` to `dest`. + Assumes `src` and `dest` to not be `nil` and have been initialized. +*/ +_private_copy_digits :: proc(dest, src: ^Int, digits: int, offset := int(0)) -> (err: Error) { + digits := digits + /* + If dest == src, do nothing + */ + if dest == src { + return nil + } + + digits = min(digits, len(src.digit), len(dest.digit)) + mem.copy_non_overlapping(&dest.digit[0], &src.digit[offset], size_of(DIGIT) * digits) + return nil +} + + +/* + Shift left by `digits` * _DIGIT_BITS bits. +*/ +_private_int_shl_leg :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + if digits <= 0 { return nil } + + /* + No need to shift a zero. + */ + if #force_inline internal_is_zero(quotient) { + return nil + } + + /* + Resize `quotient` to accomodate extra digits. + */ + #force_inline internal_grow(quotient, quotient.used + digits) or_return + + /* + Increment the used by the shift amount then copy upwards. + */ + + /* + Much like `_private_int_shr_leg`, this is implemented using a sliding window, + except the window goes the other way around. + */ + #no_bounds_check for x := quotient.used; x > 0; x -= 1 { + quotient.digit[x+digits-1] = quotient.digit[x-1] + } + + quotient.used += digits + mem.zero_slice(quotient.digit[:digits]) + return nil +} + +/* + Shift right by `digits` * _DIGIT_BITS bits. +*/ +_private_int_shr_leg :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator + + if digits <= 0 { return nil } + + /* + If digits > used simply zero and return. + */ + if digits > quotient.used { return internal_zero(quotient) } + + /* + Much like `int_shl_digit`, this is implemented using a sliding window, + except the window goes the other way around. + + b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> + /\ | ----> + \-------------------/ ----> + */ + + #no_bounds_check for x := 0; x < (quotient.used - digits); x += 1 { + quotient.digit[x] = quotient.digit[x + digits] + } + quotient.used -= digits + internal_zero_unused(quotient) + return internal_clamp(quotient) +} + +/* + ======================== End of private procedures ======================= + + =============================== Private tables =============================== + + Tables used by `internal_*` and `_*`. +*/ + +_private_int_rem_128 := [?]DIGIT{ + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, +} +#assert(128 * size_of(DIGIT) == size_of(_private_int_rem_128)) + +_private_int_rem_105 := [?]DIGIT{ + 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, +} +#assert(105 * size_of(DIGIT) == size_of(_private_int_rem_105)) + +_PRIME_TAB_SIZE :: 256 +_private_prime_table := [_PRIME_TAB_SIZE]DIGIT{ + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653, +} +#assert(_PRIME_TAB_SIZE * size_of(DIGIT) == size_of(_private_prime_table)) + +when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) { + _factorial_table := [35]_WORD{ +/* f(00): */ 1, +/* f(01): */ 1, +/* f(02): */ 2, +/* f(03): */ 6, +/* f(04): */ 24, +/* f(05): */ 120, +/* f(06): */ 720, +/* f(07): */ 5_040, +/* f(08): */ 40_320, +/* f(09): */ 362_880, +/* f(10): */ 3_628_800, +/* f(11): */ 39_916_800, +/* f(12): */ 479_001_600, +/* f(13): */ 6_227_020_800, +/* f(14): */ 87_178_291_200, +/* f(15): */ 1_307_674_368_000, +/* f(16): */ 20_922_789_888_000, +/* f(17): */ 355_687_428_096_000, +/* f(18): */ 6_402_373_705_728_000, +/* f(19): */ 121_645_100_408_832_000, +/* f(20): */ 2_432_902_008_176_640_000, +/* f(21): */ 51_090_942_171_709_440_000, +/* f(22): */ 1_124_000_727_777_607_680_000, +/* f(23): */ 25_852_016_738_884_976_640_000, +/* f(24): */ 620_448_401_733_239_439_360_000, +/* f(25): */ 15_511_210_043_330_985_984_000_000, +/* f(26): */ 403_291_461_126_605_635_584_000_000, +/* f(27): */ 10_888_869_450_418_352_160_768_000_000, +/* f(28): */ 304_888_344_611_713_860_501_504_000_000, +/* f(29): */ 8_841_761_993_739_701_954_543_616_000_000, +/* f(30): */ 265_252_859_812_191_058_636_308_480_000_000, +/* f(31): */ 8_222_838_654_177_922_817_725_562_880_000_000, +/* f(32): */ 263_130_836_933_693_530_167_218_012_160_000_000, +/* f(33): */ 8_683_317_618_811_886_495_518_194_401_280_000_000, +/* f(34): */ 295_232_799_039_604_140_847_618_609_643_520_000_000, + } +} else { + _factorial_table := [21]_WORD{ +/* f(00): */ 1, +/* f(01): */ 1, +/* f(02): */ 2, +/* f(03): */ 6, +/* f(04): */ 24, +/* f(05): */ 120, +/* f(06): */ 720, +/* f(07): */ 5_040, +/* f(08): */ 40_320, +/* f(09): */ 362_880, +/* f(10): */ 3_628_800, +/* f(11): */ 39_916_800, +/* f(12): */ 479_001_600, +/* f(13): */ 6_227_020_800, +/* f(14): */ 87_178_291_200, +/* f(15): */ 1_307_674_368_000, +/* f(16): */ 20_922_789_888_000, +/* f(17): */ 355_687_428_096_000, +/* f(18): */ 6_402_373_705_728_000, +/* f(19): */ 121_645_100_408_832_000, +/* f(20): */ 2_432_902_008_176_640_000, + } +} + +/* + ========================= End of private tables ======================== */ \ No newline at end of file diff --git a/core/os/os_haiku.odin b/core/os/os_haiku.odin new file mode 100644 index 000000000..06052fc42 --- /dev/null +++ b/core/os/os_haiku.odin @@ -0,0 +1,435 @@ +package os + +foreign import libc "system:c" + +import "base:runtime" +import "core:c" +import "core:strings" +import "core:sys/haiku" + +Handle :: i32 +Pid :: i32 +File_Time :: i64 +Errno :: i32 + +MAX_PATH :: haiku.PATH_MAX + +ENOSYS :: int(haiku.Errno.POSIX_ERROR_BASE) + 9 + +INVALID_HANDLE :: ~Handle(0) + +ERROR_NONE: Errno: 0 + +stdin: Handle = 0 +stdout: Handle = 1 +stderr: Handle = 2 + +pid_t :: haiku.pid_t +off_t :: haiku.off_t +dev_t :: haiku.dev_t +ino_t :: haiku.ino_t +mode_t :: haiku.mode_t +nlink_t :: haiku.nlink_t +uid_t :: haiku.uid_t +gid_t :: haiku.gid_t +blksize_t :: haiku.blksize_t +blkcnt_t :: haiku.blkcnt_t +time_t :: haiku.time_t + + +Unix_File_Time :: struct { + seconds: time_t, + nanoseconds: c.long, +} + +OS_Stat :: struct { + device_id: dev_t, // device ID that this file resides on + serial: ino_t, // this file's serial inode ID + mode: mode_t, // file mode (rwx for user, group, etc) + nlink: nlink_t, // number of hard links to this file + uid: uid_t, // user ID of the file's owner + gid: gid_t, // group ID of the file's group + size: off_t, // file size, in bytes + rdev: dev_t, // device type (not used) + block_size: blksize_t, // optimal blocksize for I/O + + last_access: Unix_File_Time, // time of last access + modified: Unix_File_Time, // time of last data modification + status_change: Unix_File_Time, // time of last file status change + birthtime: Unix_File_Time, // time of file creation + + type: u32, // attribute/index type + + blocks: blkcnt_t, // blocks allocated for file +} + +/* file access modes for open() */ +O_RDONLY :: 0x0000 /* read only */ +O_WRONLY :: 0x0001 /* write only */ +O_RDWR :: 0x0002 /* read and write */ +O_ACCMODE :: 0x0003 /* mask to get the access modes above */ +O_RWMASK :: O_ACCMODE + +/* flags for open() */ +O_EXCL :: 0x0100 /* exclusive creat */ +O_CREATE :: 0x0200 /* create and open file */ +O_TRUNC :: 0x0400 /* open with truncation */ +O_NOCTTY :: 0x1000 /* don't make tty the controlling tty */ +O_NOTRAVERSE :: 0x2000 /* do not traverse leaf link */ + +// File type +S_IFMT :: 0o170000 // Type of file mask +S_IFIFO :: 0o010000 // Named pipe (fifo) +S_IFCHR :: 0o020000 // Character special +S_IFDIR :: 0o040000 // Directory +S_IFBLK :: 0o060000 // Block special +S_IFREG :: 0o100000 // Regular +S_IFLNK :: 0o120000 // Symbolic link +S_IFSOCK :: 0o140000 // Socket +S_ISVTX :: 0o001000 // Save swapped text even after use + +// File mode + // Read, write, execute/search by owner +S_IRWXU :: 0o0700 // RWX mask for owner +S_IRUSR :: 0o0400 // R for owner +S_IWUSR :: 0o0200 // W for owner +S_IXUSR :: 0o0100 // X for owner + + // Read, write, execute/search by group +S_IRWXG :: 0o0070 // RWX mask for group +S_IRGRP :: 0o0040 // R for group +S_IWGRP :: 0o0020 // W for group +S_IXGRP :: 0o0010 // X for group + + // Read, write, execute/search by others +S_IRWXO :: 0o0007 // RWX mask for other +S_IROTH :: 0o0004 // R for other +S_IWOTH :: 0o0002 // W for other +S_IXOTH :: 0o0001 // X for other + +S_ISUID :: 0o4000 // Set user id on execution +S_ISGID :: 0o2000 // Set group id on execution +S_ISTXT :: 0o1000 // Sticky bit + +S_ISLNK :: #force_inline proc(m: u32) -> bool { return (m & S_IFMT) == S_IFLNK } +S_ISREG :: #force_inline proc(m: u32) -> bool { return (m & S_IFMT) == S_IFREG } +S_ISDIR :: #force_inline proc(m: u32) -> bool { return (m & S_IFMT) == S_IFDIR } +S_ISCHR :: #force_inline proc(m: u32) -> bool { return (m & S_IFMT) == S_IFCHR } +S_ISBLK :: #force_inline proc(m: u32) -> bool { return (m & S_IFMT) == S_IFBLK } +S_ISFIFO :: #force_inline proc(m: u32) -> bool { return (m & S_IFMT) == S_IFIFO } +S_ISSOCK :: #force_inline proc(m: u32) -> bool { return (m & S_IFMT) == S_IFSOCK } + + +foreign libc { + @(link_name="_errnop") __error :: proc() -> ^c.int --- + + @(link_name="fork") _unix_fork :: proc() -> pid_t --- + @(link_name="getthrid") _unix_getthrid :: proc() -> int --- + + @(link_name="open") _unix_open :: proc(path: cstring, flags: c.int, mode: c.int) -> Handle --- + @(link_name="close") _unix_close :: proc(fd: Handle) -> c.int --- + @(link_name="read") _unix_read :: proc(fd: Handle, buf: rawptr, size: c.size_t) -> c.ssize_t --- + @(link_name="write") _unix_write :: proc(fd: Handle, buf: rawptr, size: c.size_t) -> c.ssize_t --- + @(link_name="lseek") _unix_seek :: proc(fd: Handle, offset: off_t, whence: c.int) -> off_t --- + @(link_name="stat") _unix_stat :: proc(path: cstring, sb: ^OS_Stat) -> c.int --- + @(link_name="fstat") _unix_fstat :: proc(fd: Handle, sb: ^OS_Stat) -> c.int --- + @(link_name="lstat") _unix_lstat :: proc(path: cstring, sb: ^OS_Stat) -> c.int --- + @(link_name="readlink") _unix_readlink :: proc(path: cstring, buf: ^byte, bufsiz: c.size_t) -> c.ssize_t --- + @(link_name="access") _unix_access :: proc(path: cstring, mask: c.int) -> c.int --- + @(link_name="getcwd") _unix_getcwd :: proc(buf: cstring, len: c.size_t) -> cstring --- + @(link_name="chdir") _unix_chdir :: proc(path: cstring) -> c.int --- + @(link_name="rename") _unix_rename :: proc(old, new: cstring) -> c.int --- + @(link_name="unlink") _unix_unlink :: proc(path: cstring) -> c.int --- + @(link_name="rmdir") _unix_rmdir :: proc(path: cstring) -> c.int --- + @(link_name="mkdir") _unix_mkdir :: proc(path: cstring, mode: mode_t) -> c.int --- + + @(link_name="getpagesize") _unix_getpagesize :: proc() -> c.int --- + @(link_name="sysconf") _sysconf :: proc(name: c.int) -> c.long --- + @(link_name="fdopendir") _unix_fdopendir :: proc(fd: Handle) -> Dir --- + @(link_name="closedir") _unix_closedir :: proc(dirp: Dir) -> c.int --- + @(link_name="rewinddir") _unix_rewinddir :: proc(dirp: Dir) --- + @(link_name="readdir_r") _unix_readdir_r :: proc(dirp: Dir, entry: ^Dirent, result: ^^Dirent) -> c.int --- + + @(link_name="malloc") _unix_malloc :: proc(size: c.size_t) -> rawptr --- + @(link_name="calloc") _unix_calloc :: proc(num, size: c.size_t) -> rawptr --- + @(link_name="free") _unix_free :: proc(ptr: rawptr) --- + @(link_name="realloc") _unix_realloc :: proc(ptr: rawptr, size: c.size_t) -> rawptr --- + + @(link_name="getenv") _unix_getenv :: proc(cstring) -> cstring --- + @(link_name="realpath") _unix_realpath :: proc(path: cstring, resolved_path: rawptr) -> rawptr --- + + @(link_name="exit") _unix_exit :: proc(status: c.int) -> ! --- + + @(link_name="dlopen") _unix_dlopen :: proc(filename: cstring, flags: c.int) -> rawptr --- + @(link_name="dlsym") _unix_dlsym :: proc(handle: rawptr, symbol: cstring) -> rawptr --- + @(link_name="dlclose") _unix_dlclose :: proc(handle: rawptr) -> c.int --- + @(link_name="dlerror") _unix_dlerror :: proc() -> cstring --- +} + +MAXNAMLEN :: haiku.NAME_MAX + +Dirent :: struct { + dev: dev_t, + pdef: dev_t, + ino: ino_t, + pino: ino_t, + reclen: u16, + name: [MAXNAMLEN + 1]byte, // name +} + +Dir :: distinct rawptr // DIR* + +is_path_separator :: proc(r: rune) -> bool { + return r == '/' +} + +get_last_error :: proc "contextless" () -> int { + return int(__error()^) +} + +fork :: proc() -> (Pid, Errno) { + pid := _unix_fork() + if pid == -1 { + return Pid(-1), Errno(get_last_error()) + } + return Pid(pid), ERROR_NONE +} + +open :: proc(path: string, flags: int = O_RDONLY, mode: int = 0) -> (Handle, Errno) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + cstr := strings.clone_to_cstring(path, context.temp_allocator) + handle := _unix_open(cstr, c.int(flags), c.int(mode)) + if handle == -1 { + return INVALID_HANDLE, Errno(get_last_error()) + } + return handle, ERROR_NONE +} + +close :: proc(fd: Handle) -> Errno { + result := _unix_close(fd) + if result == -1 { + return Errno(get_last_error()) + } + return ERROR_NONE +} + +// In practice a read/write call would probably never read/write these big buffers all at once, +// which is why the number of bytes is returned and why there are procs that will call this in a +// loop for you. +// We set a max of 1GB to keep alignment and to be safe. +@(private) +MAX_RW :: 1 << 30 + +read :: proc(fd: Handle, data: []byte) -> (int, Errno) { + to_read := min(c.size_t(len(data)), MAX_RW) + bytes_read := _unix_read(fd, &data[0], to_read) + if bytes_read == -1 { + return -1, Errno(get_last_error()) + } + return int(bytes_read), ERROR_NONE +} + +write :: proc(fd: Handle, data: []byte) -> (int, Errno) { + if len(data) == 0 { + return 0, ERROR_NONE + } + + to_write := min(c.size_t(len(data)), MAX_RW) + bytes_written := _unix_write(fd, &data[0], to_write) + if bytes_written == -1 { + return -1, Errno(get_last_error()) + } + return int(bytes_written), ERROR_NONE +} + +seek :: proc(fd: Handle, offset: i64, whence: int) -> (i64, Errno) { + res := _unix_seek(fd, offset, c.int(whence)) + if res == -1 { + return -1, Errno(get_last_error()) + } + return res, ERROR_NONE +} + +file_size :: proc(fd: Handle) -> (i64, Errno) { + s, err := _fstat(fd) + if err != ERROR_NONE { + return -1, err + } + return s.size, ERROR_NONE +} + +// "Argv" arguments converted to Odin strings +args := _alloc_command_line_arguments() + +_alloc_command_line_arguments :: proc() -> []string { + res := make([]string, len(runtime.args__)) + for arg, i in runtime.args__ { + res[i] = string(arg) + } + return res +} + +@private +_stat :: proc(path: string) -> (OS_Stat, Errno) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + cstr := strings.clone_to_cstring(path, context.temp_allocator) + + // deliberately uninitialized + s: OS_Stat = --- + res := _unix_stat(cstr, &s) + if res == -1 { + return s, Errno(get_last_error()) + } + return s, ERROR_NONE +} + +@private +_lstat :: proc(path: string) -> (OS_Stat, Errno) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + cstr := strings.clone_to_cstring(path, context.temp_allocator) + + // deliberately uninitialized + s: OS_Stat = --- + res := _unix_lstat(cstr, &s) + if res == -1 { + return s, Errno(get_last_error()) + } + return s, ERROR_NONE +} + +@private +_fstat :: proc(fd: Handle) -> (OS_Stat, Errno) { + // deliberately uninitialized + s: OS_Stat = --- + res := _unix_fstat(fd, &s) + if res == -1 { + return s, Errno(get_last_error()) + } + return s, ERROR_NONE +} + +@private +_fdopendir :: proc(fd: Handle) -> (Dir, Errno) { + dirp := _unix_fdopendir(fd) + if dirp == cast(Dir)nil { + return nil, Errno(get_last_error()) + } + return dirp, ERROR_NONE +} + +@private +_closedir :: proc(dirp: Dir) -> Errno { + rc := _unix_closedir(dirp) + if rc != 0 { + return Errno(get_last_error()) + } + return ERROR_NONE +} + +@private +_rewinddir :: proc(dirp: Dir) { + _unix_rewinddir(dirp) +} + +@private +_readdir :: proc(dirp: Dir) -> (entry: Dirent, err: Errno, end_of_stream: bool) { + result: ^Dirent + rc := _unix_readdir_r(dirp, &entry, &result) + + if rc != 0 { + err = Errno(get_last_error()) + return + } + err = ERROR_NONE + + if result == nil { + end_of_stream = true + return + } + + return +} + +@private +_readlink :: proc(path: string) -> (string, Errno) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD(ignore = context.temp_allocator == context.allocator) + path_cstr := strings.clone_to_cstring(path, context.temp_allocator) + + bufsz : uint = MAX_PATH + buf := make([]byte, MAX_PATH) + for { + rc := _unix_readlink(path_cstr, &(buf[0]), bufsz) + if rc == -1 { + delete(buf) + return "", Errno(get_last_error()) + } else if rc == int(bufsz) { + bufsz += MAX_PATH + delete(buf) + buf = make([]byte, bufsz) + } else { + return strings.string_from_ptr(&buf[0], rc), ERROR_NONE + } + } +} + +absolute_path_from_handle :: proc(fd: Handle) -> (string, Errno) { + return "", Errno(ENOSYS) +} + +absolute_path_from_relative :: proc(rel: string) -> (path: string, err: Errno) { + rel := rel + if rel == "" { + rel = "." + } + + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD(ignore = context.temp_allocator == context.allocator) + rel_cstr := strings.clone_to_cstring(rel, context.temp_allocator) + + path_ptr := _unix_realpath(rel_cstr, nil) + if path_ptr == nil { + return "", Errno(get_last_error()) + } + defer _unix_free(path_ptr) + + path_cstr := transmute(cstring)path_ptr + path = strings.clone( string(path_cstr) ) + + return path, ERROR_NONE +} + +access :: proc(path: string, mask: int) -> (bool, Errno) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + cstr := strings.clone_to_cstring(path, context.temp_allocator) + res := _unix_access(cstr, c.int(mask)) + if res == -1 { + return false, Errno(get_last_error()) + } + return true, ERROR_NONE +} + +lookup_env :: proc(key: string, allocator := context.allocator) -> (value: string, found: bool) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD(ignore = context.temp_allocator == allocator) + path_str := strings.clone_to_cstring(key, context.temp_allocator) + cstr := _unix_getenv(path_str) + if cstr == nil { + return "", false + } + return strings.clone(string(cstr), allocator), true +} + +get_env :: proc(key: string, allocator := context.allocator) -> (value: string) { + value, _ = lookup_env(key, allocator) + return +} + +@(private) +_processor_core_count :: proc() -> int { + info: haiku.system_info + haiku.get_system_info(&info) + return int(info.cpu_count) +} + +exit :: proc "contextless" (code: int) -> ! { + runtime._cleanup_runtime_contextless() + _unix_exit(i32(code)) +} diff --git a/core/os/stat_unix.odin b/core/os/stat_unix.odin index dae7ab2fb..5e83c0e16 100644 --- a/core/os/stat_unix.odin +++ b/core/os/stat_unix.odin @@ -1,4 +1,4 @@ -//+build linux, darwin, freebsd, openbsd +//+build linux, darwin, freebsd, openbsd, haiku package os import "core:time" diff --git a/core/os/stream.odin b/core/os/stream.odin index d7ce11d26..25f31218c 100644 --- a/core/os/stream.odin +++ b/core/os/stream.odin @@ -32,7 +32,7 @@ _file_stream_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, } case .Read_At: - when !(ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD) { + when !(ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD || ODIN_OS == .Haiku) { n_int, os_err = read_at(fd, p, offset) n = i64(n_int) if n == 0 && os_err == 0 { @@ -46,7 +46,7 @@ _file_stream_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, err = .EOF } case .Write_At: - when !(ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD) { + when !(ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD || ODIN_OS == .Haiku) { n_int, os_err = write_at(fd, p, offset) n = i64(n_int) if n == 0 && os_err == 0 { @@ -60,7 +60,7 @@ _file_stream_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, case .Destroy: err = .Empty case .Query: - when ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD { + when ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD || ODIN_OS == .Haiku { return io.query_utility({.Close, .Flush, .Read, .Write, .Seek, .Size, .Query}) } else { return io.query_utility({.Close, .Flush, .Read, .Read_At, .Write, .Write_At, .Seek, .Size, .Query}) diff --git a/core/sync/futex_haiku.odin b/core/sync/futex_haiku.odin new file mode 100644 index 000000000..1dd719e7a --- /dev/null +++ b/core/sync/futex_haiku.odin @@ -0,0 +1,167 @@ +//+private +package sync + +import "core:c" +import "core:runtime" +import "core:sys/haiku" +import "core:sys/unix" +import "core:time" + +@(private="file") +Wait_Node :: struct { + thread: unix.pthread_t, + futex: ^Futex, + prev, next: ^Wait_Node, +} +@(private="file") +atomic_flag :: distinct bool +@(private="file") +Wait_Queue :: struct { + lock: atomic_flag, + list: Wait_Node, +} +@(private="file") +waitq_lock :: proc "contextless" (waitq: ^Wait_Queue) { + for cast(bool)atomic_exchange_explicit(&waitq.lock, atomic_flag(true), .Acquire) { + cpu_relax() // spin... + } +} +@(private="file") +waitq_unlock :: proc "contextless" (waitq: ^Wait_Queue) { + atomic_store_explicit(&waitq.lock, atomic_flag(false), .Release) +} + +// FIXME: This approach may scale badly in the future, +// possible solution - hash map (leads to deadlocks now). +@(private="file") +g_waitq: Wait_Queue + +@(init, private="file") +g_waitq_init :: proc() { + g_waitq = { + list = { + prev = &g_waitq.list, + next = &g_waitq.list, + }, + } +} + +@(private="file") +get_waitq :: #force_inline proc "contextless" (f: ^Futex) -> ^Wait_Queue { + _ = f + return &g_waitq +} + +_futex_wait :: proc "contextless" (f: ^Futex, expect: u32) -> (ok: bool) { + waitq := get_waitq(f) + waitq_lock(waitq) + defer waitq_unlock(waitq) + + head := &waitq.list + waiter := Wait_Node{ + thread = unix.pthread_self(), + futex = f, + prev = head, + next = head.next, + } + + waiter.prev.next = &waiter + waiter.next.prev = &waiter + + old_mask, mask: haiku.sigset_t + haiku.sigemptyset(&mask) + haiku.sigaddset(&mask, haiku.SIGCONT) + unix.pthread_sigmask(haiku.SIG_BLOCK, &mask, &old_mask) + + if u32(atomic_load_explicit(f, .Acquire)) == expect { + waitq_unlock(waitq) + defer waitq_lock(waitq) + + sig: c.int + haiku.sigwait(&mask, &sig) + errno := haiku.errno() + ok = errno == .OK + } + + waiter.prev.next = waiter.next + waiter.next.prev = waiter.prev + + unix.pthread_sigmask(haiku.SIG_SETMASK, &old_mask, nil) + + // FIXME: Add error handling! + return +} + +_futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expect: u32, duration: time.Duration) -> (ok: bool) { + if duration <= 0 { + return false + } + waitq := get_waitq(f) + waitq_lock(waitq) + defer waitq_unlock(waitq) + + head := &waitq.list + waiter := Wait_Node{ + thread = unix.pthread_self(), + futex = f, + prev = head, + next = head.next, + } + + waiter.prev.next = &waiter + waiter.next.prev = &waiter + + old_mask, mask: haiku.sigset_t + haiku.sigemptyset(&mask) + haiku.sigaddset(&mask, haiku.SIGCONT) + unix.pthread_sigmask(haiku.SIG_BLOCK, &mask, &old_mask) + + if u32(atomic_load_explicit(f, .Acquire)) == expect { + waitq_unlock(waitq) + defer waitq_lock(waitq) + + info: haiku.siginfo_t + ts := unix.timespec{ + tv_sec = i64(duration / 1e9), + tv_nsec = i64(duration % 1e9), + } + haiku.sigtimedwait(&mask, &info, &ts) + errno := haiku.errno() + ok = errno == .EAGAIN || errno == .OK + } + + waiter.prev.next = waiter.next + waiter.next.prev = waiter.prev + + unix.pthread_sigmask(haiku.SIG_SETMASK, &old_mask, nil) + + // FIXME: Add error handling! + return +} + +_futex_signal :: proc "contextless" (f: ^Futex) { + waitq := get_waitq(f) + waitq_lock(waitq) + defer waitq_unlock(waitq) + + head := &waitq.list + for waiter := head.next; waiter != head; waiter = waiter.next { + if waiter.futex == f { + unix.pthread_kill(waiter.thread, haiku.SIGCONT) + break + } + } +} + +_futex_broadcast :: proc "contextless" (f: ^Futex) { + waitq := get_waitq(f) + waitq_lock(waitq) + defer waitq_unlock(waitq) + + head := &waitq.list + for waiter := head.next; waiter != head; waiter = waiter.next { + if waiter.futex == f { + unix.pthread_kill(waiter.thread, haiku.SIGCONT) + } + } +} diff --git a/core/sync/primitives_haiku.odin b/core/sync/primitives_haiku.odin new file mode 100644 index 000000000..4b8f6b02d --- /dev/null +++ b/core/sync/primitives_haiku.odin @@ -0,0 +1,8 @@ +//+private +package sync + +import "core:sys/haiku" + +_current_thread_id :: proc "contextless" () -> int { + return int(haiku.find_thread(nil)) +} diff --git a/core/sys/haiku/errors.odin b/core/sys/haiku/errors.odin new file mode 100644 index 000000000..023045001 --- /dev/null +++ b/core/sys/haiku/errors.odin @@ -0,0 +1,239 @@ +//+build haiku +package sys_haiku + +import "core:c" + +Errno :: enum c.int { + // Error baselines + GENERAL_ERROR_BASE = min(c.int), + OS_ERROR_BASE = GENERAL_ERROR_BASE + 0x1000, + APP_ERROR_BASE = GENERAL_ERROR_BASE + 0x2000, + INTERFACE_ERROR_BASE = GENERAL_ERROR_BASE + 0x3000, + MEDIA_ERROR_BASE = GENERAL_ERROR_BASE + 0x4000, + TRANSLATION_ERROR_BASE = GENERAL_ERROR_BASE + 0x4800, + MIDI_ERROR_BASE = GENERAL_ERROR_BASE + 0x5000, + STORAGE_ERROR_BASE = GENERAL_ERROR_BASE + 0x6000, + POSIX_ERROR_BASE = GENERAL_ERROR_BASE + 0x7000, + MAIL_ERROR_BASE = GENERAL_ERROR_BASE + 0x8000, + PRINT_ERROR_BASE = GENERAL_ERROR_BASE + 0x9000, + DEVICE_ERROR_BASE = GENERAL_ERROR_BASE + 0xa000, + + // Developer-defined errors start at (ERRORS_END+1) + ERRORS_END = GENERAL_ERROR_BASE + 0xffff, + + // General Errors + NO_MEMORY = GENERAL_ERROR_BASE + 0, + IO_ERROR = GENERAL_ERROR_BASE + 1, + PERMISSION_DENIED = GENERAL_ERROR_BASE + 2, + BAD_INDEX = GENERAL_ERROR_BASE + 3, + BAD_TYPE = GENERAL_ERROR_BASE + 4, + BAD_VALUE = GENERAL_ERROR_BASE + 5, + MISMATCHED_VALUES = GENERAL_ERROR_BASE + 6, + NAME_NOT_FOUND = GENERAL_ERROR_BASE + 7, + NAME_IN_USE = GENERAL_ERROR_BASE + 8, + TIMED_OUT = GENERAL_ERROR_BASE + 9, + INTERRUPTED = GENERAL_ERROR_BASE + 10, + WOULD_BLOCK = GENERAL_ERROR_BASE + 11, + CANCELED = GENERAL_ERROR_BASE + 12, + NO_INIT = GENERAL_ERROR_BASE + 13, + NOT_INITIALIZED = GENERAL_ERROR_BASE + 13, + BUSY = GENERAL_ERROR_BASE + 14, + NOT_ALLOWED = GENERAL_ERROR_BASE + 15, + BAD_DATA = GENERAL_ERROR_BASE + 16, + DONT_DO_THAT = GENERAL_ERROR_BASE + 17, + + ERROR = -1, + OK = 0, + NO_ERROR = 0, + + // Kernel Kit Errors + BAD_SEM_ID = OS_ERROR_BASE + 0, + NO_MORE_SEMS = OS_ERROR_BASE + 1, + BAD_THREAD_ID = OS_ERROR_BASE + 0x100, + NO_MORE_THREADS = OS_ERROR_BASE + 0x101, + BAD_THREAD_STATE = OS_ERROR_BASE + 0x102, + BAD_TEAM_ID = OS_ERROR_BASE + 0x103, + NO_MORE_TEAMS = OS_ERROR_BASE + 0x104, + BAD_PORT_ID = OS_ERROR_BASE + 0x200, + NO_MORE_PORTS = OS_ERROR_BASE + 0x201, + BAD_IMAGE_ID = OS_ERROR_BASE + 0x300, + BAD_ADDRESS = OS_ERROR_BASE + 0x301, + NOT_AN_EXECUTABLE = OS_ERROR_BASE + 0x302, + MISSING_LIBRARY = OS_ERROR_BASE + 0x303, + MISSING_SYMBOL = OS_ERROR_BASE + 0x304, + UNKNOWN_EXECUTABLE = OS_ERROR_BASE + 0x305, + LEGACY_EXECUTABLE = OS_ERROR_BASE + 0x306, + + DEBUGGER_ALREADY_INSTALLED = OS_ERROR_BASE + 0x400, + + // Application Kit Errors + BAD_REPLY = APP_ERROR_BASE + 0, + DUPLICATE_REPLY = APP_ERROR_BASE + 1, + MESSAGE_TO_SELF = APP_ERROR_BASE + 2, + BAD_HANDLER = APP_ERROR_BASE + 3, + ALREADY_RUNNING = APP_ERROR_BASE + 4, + LAUNCH_FAILED = APP_ERROR_BASE + 5, + AMBIGUOUS_APP_LAUNCH = APP_ERROR_BASE + 6, + UNKNOWN_MIME_TYPE = APP_ERROR_BASE + 7, + BAD_SCRIPT_SYNTAX = APP_ERROR_BASE + 8, + LAUNCH_FAILED_NO_RESOLVE_LINK = APP_ERROR_BASE + 9, + LAUNCH_FAILED_EXECUTABLE = APP_ERROR_BASE + 10, + LAUNCH_FAILED_APP_NOT_FOUND = APP_ERROR_BASE + 11, + LAUNCH_FAILED_APP_IN_TRASH = APP_ERROR_BASE + 12, + LAUNCH_FAILED_NO_PREFERRED_APP = APP_ERROR_BASE + 13, + LAUNCH_FAILED_FILES_APP_NOT_FOUND = APP_ERROR_BASE + 14, + BAD_MIME_SNIFFER_RULE = APP_ERROR_BASE + 15, + NOT_A_MESSAGE = APP_ERROR_BASE + 16, + SHUTDOWN_CANCELLED = APP_ERROR_BASE + 17, + SHUTTING_DOWN = APP_ERROR_BASE + 18, + + // Storage Kit/File System Errors + FILE_ERROR = STORAGE_ERROR_BASE + 0, + // 1 was B_FILE_NOT_FOUND (deprecated) + FILE_EXISTS = STORAGE_ERROR_BASE + 2, + ENTRY_NOT_FOUND = STORAGE_ERROR_BASE + 3, + NAME_TOO_LONG = STORAGE_ERROR_BASE + 4, + NOT_A_DIRECTORY = STORAGE_ERROR_BASE + 5, + DIRECTORY_NOT_EMPTY = STORAGE_ERROR_BASE + 6, + DEVICE_FULL = STORAGE_ERROR_BASE + 7, + READ_ONLY_DEVICE = STORAGE_ERROR_BASE + 8, + IS_A_DIRECTORY = STORAGE_ERROR_BASE + 9, + NO_MORE_FDS = STORAGE_ERROR_BASE + 10, + CROSS_DEVICE_LINK = STORAGE_ERROR_BASE + 11, + LINK_LIMIT = STORAGE_ERROR_BASE + 12, + BUSTED_PIPE = STORAGE_ERROR_BASE + 13, + UNSUPPORTED = STORAGE_ERROR_BASE + 14, + PARTITION_TOO_SMALL = STORAGE_ERROR_BASE + 15, + PARTIAL_READ = STORAGE_ERROR_BASE + 16, + PARTIAL_WRITE = STORAGE_ERROR_BASE + 17, + + // Some POSIX errors + E2BIG = POSIX_ERROR_BASE + 1, + EFBIG = POSIX_ERROR_BASE + 4, + ENODEV = POSIX_ERROR_BASE + 7, + ERANGE = POSIX_ERROR_BASE + 17, + EOVERFLOW = POSIX_ERROR_BASE + 41, + EOPNOTSUPP = POSIX_ERROR_BASE + 43, + + ENOSYS = POSIX_ERROR_BASE + 9, + EAGAIN = WOULD_BLOCK, + + // New error codes that can be mapped to POSIX errors + TOO_MANY_ARGS_NEG = E2BIG, + FILE_TOO_LARGE_NEG = EFBIG, + DEVICE_NOT_FOUND_NEG = ENODEV, + RESULT_NOT_REPRESENTABLE_NEG = ERANGE, + BUFFER_OVERFLOW_NEG = EOVERFLOW, + NOT_SUPPORTED_NEG = EOPNOTSUPP, + + TOO_MANY_ARGS_POS = -E2BIG, + FILE_TOO_LARGE_POS = -EFBIG, + DEVICE_NOT_FOUND_POS = -ENODEV, + RESULT_NOT_REPRESENTABLE_POS = -ERANGE, + BUFFER_OVERFLOW_POS = -EOVERFLOW, + NOT_SUPPORTED_POS = -EOPNOTSUPP, + + // Media Kit Errors + STREAM_NOT_FOUND = MEDIA_ERROR_BASE + 0, + SERVER_NOT_FOUND = MEDIA_ERROR_BASE + 1, + RESOURCE_NOT_FOUND = MEDIA_ERROR_BASE + 2, + RESOURCE_UNAVAILABLE = MEDIA_ERROR_BASE + 3, + BAD_SUBSCRIBER = MEDIA_ERROR_BASE + 4, + SUBSCRIBER_NOT_ENTERED = MEDIA_ERROR_BASE + 5, + BUFFER_NOT_AVAILABLE = MEDIA_ERROR_BASE + 6, + LAST_BUFFER_ERROR = MEDIA_ERROR_BASE + 7, + MEDIA_SYSTEM_FAILURE = MEDIA_ERROR_BASE + 100, + MEDIA_BAD_NODE = MEDIA_ERROR_BASE + 101, + MEDIA_NODE_BUSY = MEDIA_ERROR_BASE + 102, + MEDIA_BAD_FORMAT = MEDIA_ERROR_BASE + 103, + MEDIA_BAD_BUFFER = MEDIA_ERROR_BASE + 104, + MEDIA_TOO_MANY_NODES = MEDIA_ERROR_BASE + 105, + MEDIA_TOO_MANY_BUFFERS = MEDIA_ERROR_BASE + 106, + MEDIA_NODE_ALREADY_EXISTS = MEDIA_ERROR_BASE + 107, + MEDIA_BUFFER_ALREADY_EXISTS = MEDIA_ERROR_BASE + 108, + MEDIA_CANNOT_SEEK = MEDIA_ERROR_BASE + 109, + MEDIA_CANNOT_CHANGE_RUN_MODE = MEDIA_ERROR_BASE + 110, + MEDIA_APP_ALREADY_REGISTERED = MEDIA_ERROR_BASE + 111, + MEDIA_APP_NOT_REGISTERED = MEDIA_ERROR_BASE + 112, + MEDIA_CANNOT_RECLAIM_BUFFERS = MEDIA_ERROR_BASE + 113, + MEDIA_BUFFERS_NOT_RECLAIMED = MEDIA_ERROR_BASE + 114, + MEDIA_TIME_SOURCE_STOPPED = MEDIA_ERROR_BASE + 115, + MEDIA_TIME_SOURCE_BUSY = MEDIA_ERROR_BASE + 116, + MEDIA_BAD_SOURCE = MEDIA_ERROR_BASE + 117, + MEDIA_BAD_DESTINATION = MEDIA_ERROR_BASE + 118, + MEDIA_ALREADY_CONNECTED = MEDIA_ERROR_BASE + 119, + MEDIA_NOT_CONNECTED = MEDIA_ERROR_BASE + 120, + MEDIA_BAD_CLIP_FORMAT = MEDIA_ERROR_BASE + 121, + MEDIA_ADDON_FAILED = MEDIA_ERROR_BASE + 122, + MEDIA_ADDON_DISABLED = MEDIA_ERROR_BASE + 123, + MEDIA_CHANGE_IN_PROGRESS = MEDIA_ERROR_BASE + 124, + MEDIA_STALE_CHANGE_COUNT = MEDIA_ERROR_BASE + 125, + MEDIA_ADDON_RESTRICTED = MEDIA_ERROR_BASE + 126, + MEDIA_NO_HANDLER = MEDIA_ERROR_BASE + 127, + MEDIA_DUPLICATE_FORMAT = MEDIA_ERROR_BASE + 128, + MEDIA_REALTIME_DISABLED = MEDIA_ERROR_BASE + 129, + MEDIA_REALTIME_UNAVAILABLE = MEDIA_ERROR_BASE + 130, + + // Mail Kit Errors + MAIL_NO_DAEMON = MAIL_ERROR_BASE + 0, + MAIL_UNKNOWN_USER = MAIL_ERROR_BASE + 1, + MAIL_WRONG_PASSWORD = MAIL_ERROR_BASE + 2, + MAIL_UNKNOWN_HOST = MAIL_ERROR_BASE + 3, + MAIL_ACCESS_ERROR = MAIL_ERROR_BASE + 4, + MAIL_UNKNOWN_FIELD = MAIL_ERROR_BASE + 5, + MAIL_NO_RECIPIENT = MAIL_ERROR_BASE + 6, + MAIL_INVALID_MAIL = MAIL_ERROR_BASE + 7, + + // Printing Errors + NO_PRINT_SERVER = PRINT_ERROR_BASE + 0, + + // Device Kit Errors + DEV_INVALID_IOCTL = DEVICE_ERROR_BASE + 0, + DEV_NO_MEMORY = DEVICE_ERROR_BASE + 1, + DEV_BAD_DRIVE_NUM = DEVICE_ERROR_BASE + 2, + DEV_NO_MEDIA = DEVICE_ERROR_BASE + 3, + DEV_UNREADABLE = DEVICE_ERROR_BASE + 4, + DEV_FORMAT_ERROR = DEVICE_ERROR_BASE + 5, + DEV_TIMEOUT = DEVICE_ERROR_BASE + 6, + DEV_RECALIBRATE_ERROR = DEVICE_ERROR_BASE + 7, + DEV_SEEK_ERROR = DEVICE_ERROR_BASE + 8, + DEV_ID_ERROR = DEVICE_ERROR_BASE + 9, + DEV_READ_ERROR = DEVICE_ERROR_BASE + 10, + DEV_WRITE_ERROR = DEVICE_ERROR_BASE + 11, + DEV_NOT_READY = DEVICE_ERROR_BASE + 12, + DEV_MEDIA_CHANGED = DEVICE_ERROR_BASE + 13, + DEV_MEDIA_CHANGE_REQUESTED = DEVICE_ERROR_BASE + 14, + DEV_RESOURCE_CONFLICT = DEVICE_ERROR_BASE + 15, + DEV_CONFIGURATION_ERROR = DEVICE_ERROR_BASE + 16, + DEV_DISABLED_BY_USER = DEVICE_ERROR_BASE + 17, + DEV_DOOR_OPEN = DEVICE_ERROR_BASE + 18, + DEV_INVALID_PIPE = DEVICE_ERROR_BASE + 19, + DEV_CRC_ERROR = DEVICE_ERROR_BASE + 20, + DEV_STALLED = DEVICE_ERROR_BASE + 21, + DEV_BAD_PID = DEVICE_ERROR_BASE + 22, + DEV_UNEXPECTED_PID = DEVICE_ERROR_BASE + 23, + DEV_DATA_OVERRUN = DEVICE_ERROR_BASE + 24, + DEV_DATA_UNDERRUN = DEVICE_ERROR_BASE + 25, + DEV_FIFO_OVERRUN = DEVICE_ERROR_BASE + 26, + DEV_FIFO_UNDERRUN = DEVICE_ERROR_BASE + 27, + DEV_PENDING = DEVICE_ERROR_BASE + 28, + DEV_MULTIPLE_ERRORS = DEVICE_ERROR_BASE + 29, + DEV_TOO_LATE = DEVICE_ERROR_BASE + 30, + + // Translation Kit Errors + TRANSLATION_BASE_ERROR = TRANSLATION_ERROR_BASE + 0, + NO_TRANSLATOR = TRANSLATION_ERROR_BASE + 1, + ILLEGAL_DATA = TRANSLATION_ERROR_BASE + 2, +} + +errno :: #force_inline proc "contextless" () -> Errno { + return Errno(_errnop()^) +} + +foreign import libroot "system:c" +foreign libroot { + _to_positive_error :: proc(error: c.int) -> c.int --- + _to_negative_error :: proc(error: c.int) -> c.int --- + + _errnop :: proc() -> ^c.int --- +} diff --git a/core/sys/haiku/find_directory.odin b/core/sys/haiku/find_directory.odin new file mode 100644 index 000000000..103e677d7 --- /dev/null +++ b/core/sys/haiku/find_directory.odin @@ -0,0 +1,168 @@ +//+build haiku +package sys_haiku + +import "core:c" + +directory_which :: enum c.int { + // Per volume directories + DESKTOP_DIRECTORY = 0, + TRASH_DIRECTORY, + + // System directories + SYSTEM_DIRECTORY = 1000, + SYSTEM_ADDONS_DIRECTORY = 1002, + SYSTEM_BOOT_DIRECTORY, + SYSTEM_FONTS_DIRECTORY, + SYSTEM_LIB_DIRECTORY, + SYSTEM_SERVERS_DIRECTORY, + SYSTEM_APPS_DIRECTORY, + SYSTEM_BIN_DIRECTORY, + SYSTEM_DOCUMENTATION_DIRECTORY = 1010, + SYSTEM_PREFERENCES_DIRECTORY, + SYSTEM_TRANSLATORS_DIRECTORY, + SYSTEM_MEDIA_NODES_DIRECTORY, + SYSTEM_SOUNDS_DIRECTORY, + SYSTEM_DATA_DIRECTORY, + SYSTEM_DEVELOP_DIRECTORY, + SYSTEM_PACKAGES_DIRECTORY, + SYSTEM_HEADERS_DIRECTORY, + SYSTEM_ETC_DIRECTORY = 2008, + SYSTEM_SETTINGS_DIRECTORY = 2010, + SYSTEM_LOG_DIRECTORY = 2012, + SYSTEM_SPOOL_DIRECTORY, + SYSTEM_TEMP_DIRECTORY, + SYSTEM_VAR_DIRECTORY, + SYSTEM_CACHE_DIRECTORY = 2020, + SYSTEM_NONPACKAGED_DIRECTORY = 2023, + SYSTEM_NONPACKAGED_ADDONS_DIRECTORY, + SYSTEM_NONPACKAGED_TRANSLATORS_DIRECTORY, + SYSTEM_NONPACKAGED_MEDIA_NODES_DIRECTORY, + SYSTEM_NONPACKAGED_BIN_DIRECTORY, + SYSTEM_NONPACKAGED_DATA_DIRECTORY, + SYSTEM_NONPACKAGED_FONTS_DIRECTORY, + SYSTEM_NONPACKAGED_SOUNDS_DIRECTORY, + SYSTEM_NONPACKAGED_DOCUMENTATION_DIRECTORY, + SYSTEM_NONPACKAGED_LIB_DIRECTORY, + SYSTEM_NONPACKAGED_HEADERS_DIRECTORY, + SYSTEM_NONPACKAGED_DEVELOP_DIRECTORY, + + // User directories. These are interpreted in the context of the user making the find_directory call. + USER_DIRECTORY = 3000, + USER_CONFIG_DIRECTORY, + USER_ADDONS_DIRECTORY, + USER_BOOT_DIRECTORY, + USER_FONTS_DIRECTORY, + USER_LIB_DIRECTORY, + USER_SETTINGS_DIRECTORY, + USER_DESKBAR_DIRECTORY, + USER_PRINTERS_DIRECTORY, + USER_TRANSLATORS_DIRECTORY, + USER_MEDIA_NODES_DIRECTORY, + USER_SOUNDS_DIRECTORY, + USER_DATA_DIRECTORY, + USER_CACHE_DIRECTORY, + USER_PACKAGES_DIRECTORY, + USER_HEADERS_DIRECTORY, + USER_NONPACKAGED_DIRECTORY, + USER_NONPACKAGED_ADDONS_DIRECTORY, + USER_NONPACKAGED_TRANSLATORS_DIRECTORY, + USER_NONPACKAGED_MEDIA_NODES_DIRECTORY, + USER_NONPACKAGED_BIN_DIRECTORY, + USER_NONPACKAGED_DATA_DIRECTORY, + USER_NONPACKAGED_FONTS_DIRECTORY, + USER_NONPACKAGED_SOUNDS_DIRECTORY, + USER_NONPACKAGED_DOCUMENTATION_DIRECTORY, + USER_NONPACKAGED_LIB_DIRECTORY, + USER_NONPACKAGED_HEADERS_DIRECTORY, + USER_NONPACKAGED_DEVELOP_DIRECTORY, + USER_DEVELOP_DIRECTORY, + USER_DOCUMENTATION_DIRECTORY, + USER_SERVERS_DIRECTORY, + USER_APPS_DIRECTORY, + USER_BIN_DIRECTORY, + USER_PREFERENCES_DIRECTORY, + USER_ETC_DIRECTORY, + USER_LOG_DIRECTORY, + USER_SPOOL_DIRECTORY, + USER_VAR_DIRECTORY, + + // Global directories + APPS_DIRECTORY = 4000, + PREFERENCES_DIRECTORY, + UTILITIES_DIRECTORY, + PACKAGE_LINKS_DIRECTORY, + + // Obsolete: Legacy BeOS definition to be phased out + BEOS_DIRECTORY = 1000, + BEOS_SYSTEM_DIRECTORY, + BEOS_ADDONS_DIRECTORY, + BEOS_BOOT_DIRECTORY, + BEOS_FONTS_DIRECTORY, + BEOS_LIB_DIRECTORY, + BEOS_SERVERS_DIRECTORY, + BEOS_APPS_DIRECTORY, + BEOS_BIN_DIRECTORY, + BEOS_ETC_DIRECTORY, + BEOS_DOCUMENTATION_DIRECTORY, + BEOS_PREFERENCES_DIRECTORY, + BEOS_TRANSLATORS_DIRECTORY, + BEOS_MEDIA_NODES_DIRECTORY, + BEOS_SOUNDS_DIRECTORY, +} + +find_path_flags :: enum c.int { + CREATE_DIRECTORY = 0x0001, + CREATE_PARENT_DIRECTORY = 0x0002, + EXISTING_ONLY = 0x0004, + + // find_paths() only! + SYSTEM_ONLY = 0x0010, + USER_ONLY = 0x0020, +} + +path_base_directory :: enum c.int { + INSTALLATION_LOCATION_DIRECTORY, + ADD_ONS_DIRECTORY, + APPS_DIRECTORY, + BIN_DIRECTORY, + BOOT_DIRECTORY, + CACHE_DIRECTORY, + DATA_DIRECTORY, + DEVELOP_DIRECTORY, + DEVELOP_LIB_DIRECTORY, + DOCUMENTATION_DIRECTORY, + ETC_DIRECTORY, + FONTS_DIRECTORY, + HEADERS_DIRECTORY, + LIB_DIRECTORY, + LOG_DIRECTORY, + MEDIA_NODES_DIRECTORY, + PACKAGES_DIRECTORY, + PREFERENCES_DIRECTORY, + SERVERS_DIRECTORY, + SETTINGS_DIRECTORY, + SOUNDS_DIRECTORY, + SPOOL_DIRECTORY, + TRANSLATORS_DIRECTORY, + VAR_DIRECTORY, + + // find_path() only! + IMAGE_PATH = 1000, + PACKAGE_PATH, +} + +// value that can be used instead of a pointer to a symbol in the program image +APP_IMAGE_SYMBOL :: rawptr(addr_t(0)) +// pointer to a symbol in the callers image (same as B_CURRENT_IMAGE_SYMBOL) +current_image_symbol :: proc() -> rawptr { return rawptr(current_image_symbol) } + +foreign import libroot "system:c" +foreign libroot { + find_directory :: proc(which: directory_which, volume: dev_t, createIt: bool, pathString: [^]c.char, length: i32) -> status_t --- + find_path :: proc(codePointer: rawptr, baseDirectory: path_base_directory, subPath: cstring, pathBuffer: [^]c.char, bufferSize: c.size_t) -> status_t --- + find_path_etc :: proc(codePointer: rawptr, dependency: cstring, architecture: cstring, baseDirectory: path_base_directory, subPath: cstring, flags: find_path_flags, pathBuffer: [^]c.char, bufferSize: c.size_t) -> status_t --- + find_path_for_path :: proc(path: cstring, baseDirectory: path_base_directory, subPath: cstring, pathBuffer: [^]c.char, bufferSize: c.size_t) -> status_t --- + find_path_for_path_etc :: proc(path: cstring, dependency: cstring, architecture: cstring, baseDirectory: path_base_directory, subPath: cstring, flags: find_path_flags, pathBuffer: [^]c.char, bufferSize: c.size_t) -> status_t --- + find_paths :: proc(baseDirectory: path_base_directory, subPath: cstring, _paths: ^[^][^]c.char, _pathCount: ^c.size_t) -> status_t --- + find_paths_etc :: proc(architecture: cstring, baseDirectory: path_base_directory, subPath: cstring, flags: find_path_flags, _paths: ^[^][^]c.char, _pathCount: ^c.size_t) -> status_t --- +} diff --git a/core/sys/haiku/os.odin b/core/sys/haiku/os.odin new file mode 100644 index 000000000..1e00145eb --- /dev/null +++ b/core/sys/haiku/os.odin @@ -0,0 +1,502 @@ +//+build haiku +package sys_haiku + +import "core:c" +import "core:sys/unix" + +foreign import libroot "system:c" + +PATH_MAX :: 1024 +NAME_MAX :: 256 +MAXPATHLEN :: PATH_MAX + +FILE_NAME_LENGTH :: NAME_MAX +PATH_NAME_LENGTH :: MAXPATHLEN +OS_NAME_LENGTH :: 32 + +// Areas + +area_info :: struct { + area: area_id, + name: [OS_NAME_LENGTH]c.char, + size: c.size_t, + lock: u32, + protection: u32, + team: team_id, + ram_size: u32, + copy_count: u32, + in_count: u32, + out_count: u32, + address: rawptr, +} + +area_locking :: enum u32 { + NO_LOCK = 0, + LAZY_LOCK = 1, + FULL_LOCK = 2, + CONTIGUOUS = 3, + LOMEM = 4, // CONTIGUOUS, < 16 MB physical address + _32_BIT_FULL_LOCK = 5, // FULL_LOCK, < 4 GB physical addresses + _32_BIT_CONTIGUOUS = 6, // CONTIGUOUS, < 4 GB physical address +} + +// for create_area() and clone_area() +address_spec :: enum u32 { + ANY_ADDRESS = 0, + EXACT_ADDRESS = 1, + BASE_ADDRESS = 2, + CLONE_ADDRESS = 3, + ANY_KERNEL_ADDRESS = 4, + // ANY_KERNEL_BLOCK_ADDRESS = 5, + RANDOMIZED_ANY_ADDRESS = 6, + RANDOMIZED_BASE_ADDRESS = 7, +} + +area_protection_flags :: enum u32 { + READ_AREA = 1 << 0, + WRITE_AREA = 1 << 1, + EXECUTE_AREA = 1 << 2, + // "stack" protection is not available on most platforms - it's used + // to only commit memory as needed, and have guard pages at the + // bottom of the stack. + STACK_AREA = 1 << 3, + CLONEABLE_AREA = 1 << 8, +} + +foreign libroot { + create_area :: proc(name: cstring, startAddress: ^rawptr, addressSpec: address_spec, size: c.size_t, lock: area_locking, protection: area_protection_flags) -> area_id --- + clone_area :: proc(name: cstring, destAddress: ^rawptr, addressSpec: address_spec, protection: area_protection_flags, source: area_id) -> area_id --- + find_area :: proc(name: cstring) -> area_id --- + area_for :: proc(address: rawptr) -> area_id --- + delete_area :: proc(id: area_id) -> status_t --- + resize_area :: proc(id: area_id, newSize: c.size_t) -> status_t --- + set_area_protection :: proc(id: area_id, newProtection: area_protection_flags) -> status_t --- + _get_area_info :: proc(id: area_id, areaInfo: ^area_info, size: c.size_t) -> status_t --- + _get_next_area_info :: proc(team: team_id, cookie: ^c.ssize_t, areaInfo: ^area_info, size: c.size_t) -> status_t --- +} + +// Ports + +port_info :: struct { + port: port_id, + team: team_id, + name: [OS_NAME_LENGTH]c.char, + capacity: i32, // queue depth + queue_count: i32, // # msgs waiting to be read + total_count: i32, // total # msgs read so far +} + +port_flags :: enum u32 { + USE_USER_MEMCPY = 0x80000000, + // read the message, but don't remove it; kernel-only; memory must be locked + PEEK_PORT_MESSAGE = 0x100, +} + +foreign libroot { + create_port :: proc(capacity: i32, name: cstring) -> port_id --- + find_port :: proc(name: cstring) -> port_id --- + read_port :: proc(port: port_id, code: ^i32, buffer: rawptr, bufferSize: c.size_t) -> c.ssize_t --- + read_port_etc :: proc(port: port_id, code: ^i32, buffer: rawptr, bufferSize: c.size_t, flags: port_flags, timeout: bigtime_t) -> c.ssize_t --- + write_port :: proc(port: port_id, code: i32, buffer: rawptr, bufferSize: c.size_t) -> status_t --- + write_port_etc :: proc(port: port_id, code: i32, buffer: rawptr, bufferSize: c.size_t, flags: port_flags, timeout: bigtime_t) -> status_t --- + close_port :: proc(port: port_id) -> status_t --- + delete_port :: proc(port: port_id) -> status_t --- + port_buffer_size :: proc(port: port_id) -> c.ssize_t --- + port_buffer_size_etc :: proc(port: port_id, flags: port_flags, timeout: bigtime_t) -> c.ssize_t --- + port_count :: proc(port: port_id) -> c.ssize_t --- + set_port_owner :: proc(port: port_id, team: team_id) -> status_t --- + _get_port_info :: proc(port: port_id, portInfo: ^port_info, portInfoSize: c.size_t) -> status_t --- + _get_next_port_info :: proc(team: team_id, cookie: ^i32, portInfo: ^port_info, portInfoSize: c.size_t) -> status_t --- +} + +// Semaphores + +sem_info :: struct { + sem: sem_id, + team: team_id, + name: [OS_NAME_LENGTH]c.char, + count: i32, + latest_holder: thread_id, +} + +semaphore_flags :: enum u32 { + CAN_INTERRUPT = 0x01, // acquisition of the semaphore can be interrupted (system use only) + CHECK_PERMISSION = 0x04, // ownership will be checked (system use only) + KILL_CAN_INTERRUPT = 0x20, // acquisition of the semaphore can be interrupted by SIGKILL[THR], even if not CAN_INTERRUPT (system use only) + + // release_sem_etc() only flags + DO_NOT_RESCHEDULE = 0x02, // thread is not rescheduled + RELEASE_ALL = 0x08, // all waiting threads will be woken up, count will be zeroed + RELEASE_IF_WAITING_ONLY = 0x10, // release count only if there are any threads waiting +} + +foreign libroot { + create_sem :: proc(count: i32, name: cstring) -> sem_id --- + delete_sem :: proc(id: sem_id) -> status_t --- + acquire_sem :: proc(id: sem_id) -> status_t --- + acquire_sem_etc :: proc(id: sem_id, count: i32, flags: semaphore_flags, timeout: bigtime_t) -> status_t --- + release_sem :: proc(id: sem_id) -> status_t --- + release_sem_etc :: proc(id: sem_id, count: i32, flags: semaphore_flags) -> status_t --- + switch_sem :: proc(semToBeReleased: sem_id) -> status_t --- + switch_sem_etc :: proc(semToBeReleased: sem_id, id: sem_id, count: i32, flags: semaphore_flags, timeout: bigtime_t) -> status_t --- + get_sem_count :: proc(id: sem_id, threadCount: ^i32) -> status_t --- + set_sem_owner :: proc(id: sem_id, team: team_id) -> status_t --- + _get_sem_info :: proc(id: sem_id, info: ^sem_info, infoSize: c.size_t) -> status_t --- + _get_next_sem_info :: proc(team: team_id, cookie: ^i32, info: ^sem_info, infoSize: c.size_t) -> status_t --- +} + +// Teams + +team_info :: struct { + team: team_id, + thread_count: i32, + image_count: i32, + area_count: i32, + debugger_nub_thread: thread_id, + debugger_nub_port: port_id, + argc: i32, + args: [64]c.char, + uid: uid_t, + gid: gid_t, + + // Haiku R1 extensions + real_uid: uid_t, + real_gid: gid_t, + group_id: pid_t, + session_id: pid_t, + parent: team_id, + name: [OS_NAME_LENGTH]c.char, + start_time: bigtime_t, +} + +CURRENT_TEAM :: 0 +SYSTEM_TEAM :: 1 + +team_usage_info :: struct { + user_time: bigtime_t, + kernel_time: bigtime_t, +} + +team_usage_who :: enum i32 { + // compatible to sys/resource.h RUSAGE_SELF and RUSAGE_CHILDREN + SELF = 0, + CHILDREN = -1, +} + +foreign libroot { + // see also: send_signal() + kill_team :: proc(team: team_id) -> status_t --- + _get_team_info :: proc(id: team_id, info: ^team_info, size: c.size_t) -> status_t --- + _get_next_team_info :: proc(cookie: ^i32, info: ^team_info, size: c.size_t) -> status_t --- + _get_team_usage_info :: proc(id: team_id, who: team_usage_who, info: ^team_usage_info, size: c.size_t) -> status_t --- +} + +// Threads + +thread_state :: enum c.int { + RUNNING = 1, + READY, + RECEIVING, + ASLEEP, + SUSPENDED, + WAITING, +} + +thread_info :: struct { + thread: thread_id, + team: team_id, + name: [OS_NAME_LENGTH]c.char, + state: thread_state, + priority: thread_priority, + sem: sem_id, + user_time: bigtime_t, + kernel_time: bigtime_t, + stack_base: rawptr, + stack_end: rawptr, +} + +thread_priority :: enum i32 { + IDLE_PRIORITY = 0, + LOWEST_ACTIVE_PRIORITY = 1, + LOW_PRIORITY = 5, + NORMAL_PRIORITY = 10, + DISPLAY_PRIORITY = 15, + URGENT_DISPLAY_PRIORITY = 20, + REAL_TIME_DISPLAY_PRIORITY = 100, + URGENT_PRIORITY = 110, + REAL_TIME_PRIORITY = 120, +} + +FIRST_REAL_TIME_PRIORITY :: thread_priority.REAL_TIME_PRIORITY + +// time base for snooze_*(), compatible with the clockid_t constants defined in +SYSTEM_TIMEBASE :: 0 + +thread_func :: #type proc "c" (rawptr) -> status_t + +foreign libroot { + spawn_thread :: proc(thread_func, name: cstring, priority: thread_priority, data: rawptr) -> thread_id --- + kill_thread :: proc(thread: thread_id) -> status_t --- + resume_thread :: proc(thread: thread_id) -> status_t --- + suspend_thread :: proc(thread: thread_id) -> status_t --- + rename_thread :: proc(thread: thread_id, newName: cstring) -> status_t --- + set_thread_priority :: proc(thread: thread_id, newPriority: thread_priority) -> status_t --- + exit_thread :: proc(status: status_t) --- + wait_for_thread :: proc(thread: thread_id, returnValue: ^status_t) -> status_t --- + // FIXME: Find and define those flags. + wait_for_thread_etc :: proc(id: thread_id, flags: u32, timeout: bigtime_t, _returnCode: ^status_t) -> status_t --- + on_exit_thread :: proc(callback: proc "c" (rawptr), data: rawptr) -> status_t --- + find_thread :: proc(name: cstring) -> thread_id --- + send_data :: proc(thread: thread_id, code: i32, buffer: rawptr, bufferSize: c.size_t) -> status_t --- + receive_data :: proc(sender: ^thread_id, buffer: rawptr, bufferSize: c.size_t) -> i32 --- + has_data :: proc(thread: thread_id) -> bool --- + snooze :: proc(amount: bigtime_t) -> status_t --- + // FIXME: Find and define those flags. + snooze_etc :: proc(amount: bigtime_t, timeBase: c.int, flags: u32) -> status_t --- + snooze_until :: proc(time: bigtime_t, timeBase: c.int) -> status_t --- + _get_thread_info :: proc(id: thread_id, info: ^thread_info, size: c.size_t) -> status_t --- + _get_next_thread_info :: proc(team: team_id, cookie: ^i32, info: ^thread_info, size: c.size_t) -> status_t --- + // bridge to the pthread API + get_pthread_thread_id :: proc(thread: pthread_t) -> thread_id --- +} + +// Time + +foreign libroot { + real_time_clock :: proc() -> c.ulong --- + set_real_time_clock :: proc(secsSinceJan1st1970: c.ulong) --- + real_time_clock_usecs :: proc() -> bigtime_t --- + // time since booting in microseconds + system_time :: proc() -> bigtime_t --- + // time since booting in nanoseconds + system_time_nsecs :: proc() -> nanotime_t --- +} + +// Alarm + +alarm_mode :: enum u32 { + ONE_SHOT_ABSOLUTE_ALARM = 1, + ONE_SHOT_RELATIVE_ALARM, + PERIODIC_ALARM, // "when" specifies the period +} + +foreign libroot { + set_alarm :: proc(_when: bigtime_t, mode: alarm_mode) -> bigtime_t --- +} + +// Debugger + +foreign libroot { + debugger :: proc(message: cstring) --- + /* + calling this function with a non-zero value will cause your thread + to receive signals for any exceptional conditions that occur (i.e. + you'll get SIGSEGV for data access exceptions, SIGFPE for floating + point errors, SIGILL for illegal instructions, etc). + + to re-enable the default debugger pass a zero. + */ + disable_debugger :: proc(state: c.int) -> c.int --- +} + +// System information + +cpu_info :: struct { + active_time: bigtime_t, + enabled: bool, + current_frequency: u64, +} + +system_info :: struct { + boot_time: bigtime_t, // time of boot (usecs since 1/1/1970) + + cpu_count: u32, // number of cpus + + max_pages: u64, // total # of accessible pages + used_pages: u64, // # of accessible pages in use + cached_pages: u64, + block_cache_pages: u64, + ignored_pages: u64, // # of ignored/inaccessible pages + + needed_memory: u64, + free_memory: u64, + + max_swap_pages: u64, + free_swap_pages: u64, + + page_faults: u32, // # of page faults + + max_sems: u32, + used_sems: u32, + + max_ports: u32, + used_ports: u32, + + max_threads: u32, + used_threads: u32, + + max_teams: u32, + used_teams: u32, + + kernel_name: [FILE_NAME_LENGTH]c.char, + kernel_build_date: [OS_NAME_LENGTH]c.char, + kernel_build_time: [OS_NAME_LENGTH]c.char, + + kernel_version: i64, + abi: u32, // the system API +} + +topology_level_type :: enum c.int { + UNKNOWN, + ROOT, + SMT, + CORE, + PACKAGE, +} + +cpu_platform :: enum c.int { + UNKNOWN, + x86, + x86_64, + PPC, + PPC_64, + M68K, + ARM, + ARM_64, + ALPHA, + MIPS, + SH, + SPARC, + RISC_V, +} + +cpu_vendor :: enum c.int { + UNKNOWN, + AMD, + CYRIX, + IDT, + INTEL, + NATIONAL_SEMICONDUCTOR, + RISE, + TRANSMETA, + VIA, + IBM, + MOTOROLA, + NEC, + HYGON, + SUN, + FUJITSU, +} + +cpu_topology_node_info :: struct { + id: u32, + type: topology_level_type, + level: u32, + + data: struct #raw_union { + _root: struct { + platform: cpu_platform, + }, + _package: struct { + vendor: cpu_vendor, + cache_line_size: u32 + }, + _core: struct { + model: u32, + default_frequency: u64, + }, + }, +} + +// FIXME: Add cpuid_info when bit fields are ready. + +foreign libroot { + get_system_info :: proc(info: ^system_info) -> status_t --- + _get_cpu_info_etc :: proc(firstCPU: u32, cpuCount: u32, info: ^cpu_info, size: c.size_t) -> status_t --- + get_cpu_topology_info :: proc(topologyInfos: [^]cpu_topology_node_info, topologyInfoCount: ^u32) -> status_t --- + + is_computer_on :: proc() -> i32 --- + is_computer_on_fire :: proc() -> f64 --- +} + +// Signal.h + +SIG_BLOCK :: 1 +SIG_UNBLOCK :: 2 +SIG_SETMASK :: 3 + +/* + * The list of all defined signals: + * + * The numbering of signals for Haiku attempts to maintain + * some consistency with UN*X conventions so that things + * like "kill -9" do what you expect. + */ + +SIGHUP :: 1 // hangup -- tty is gone! +SIGINT :: 2 // interrupt +SIGQUIT :: 3 // `quit' special character typed in tty +SIGILL :: 4 // illegal instruction +SIGCHLD :: 5 // child process exited +SIGABRT :: 6 // abort() called, dont' catch +SIGPIPE :: 7 // write to a pipe w/no readers +SIGFPE :: 8 // floating point exception +SIGKILL :: 9 // kill a team (not catchable) +SIGSTOP :: 10 // suspend a thread (not catchable) +SIGSEGV :: 11 // segmentation violation (read: invalid pointer) +SIGCONT :: 12 // continue execution if suspended +SIGTSTP :: 13 // `stop' special character typed in tty +SIGALRM :: 14 // an alarm has gone off (see alarm()) +SIGTERM :: 15 // termination requested +SIGTTIN :: 16 // read of tty from bg process +SIGTTOU :: 17 // write to tty from bg process +SIGUSR1 :: 18 // app defined signal 1 +SIGUSR2 :: 19 // app defined signal 2 +SIGWINCH :: 20 // tty window size changed +SIGKILLTHR :: 21 // be specific: kill just the thread, not team +SIGTRAP :: 22 // Trace/breakpoint trap +SIGPOLL :: 23 // Pollable event +SIGPROF :: 24 // Profiling timer expired +SIGSYS :: 25 // Bad system call +SIGURG :: 26 // High bandwidth data is available at socket +SIGVTALRM :: 27 // Virtual timer expired +SIGXCPU :: 28 // CPU time limit exceeded +SIGXFSZ :: 29 // File size limit exceeded +SIGBUS :: 30 // access to undefined portion of a memory object + +sigval :: struct #raw_union { + sival_int: c.int, + sival_ptr: rawptr, +} + +siginfo_t :: struct { + si_signo: c.int, // signal number + si_code: c.int, // signal code + si_errno: c.int, // if non zero, an error number associated with this signal + + si_pid: pid_t, // sending process ID + si_uid: uid_t, // real user ID of sending process + si_addr: rawptr, // address of faulting instruction + si_status: c.int, // exit value or signal + si_band: c.long, // band event for SIGPOLL + si_value: sigval, // signal value +} + +foreign libroot { + // signal set (sigset_t) manipulation + sigemptyset :: proc(set: ^sigset_t) -> c.int --- + sigfillset :: proc(set: ^sigset_t) -> c.int --- + sigaddset :: proc(set: ^sigset_t, _signal: c.int) -> c.int --- + sigdelset :: proc(set: ^sigset_t, _signal: c.int) -> c.int --- + sigismember :: proc(set: ^sigset_t, _signal: c.int) -> c.int --- + // querying and waiting for signals + sigpending :: proc(set: ^sigset_t) -> c.int --- + sigsuspend :: proc(mask: ^sigset_t) -> c.int --- + sigpause :: proc(_signal: c.int) -> c.int --- + sigwait :: proc(set: ^sigset_t, _signal: ^c.int) -> c.int --- + sigwaitinfo :: proc(set: ^sigset_t, info: ^siginfo_t) -> c.int --- + sigtimedwait :: proc(set: ^sigset_t, info: ^siginfo_t, timeout: ^unix.timespec) -> c.int --- + + send_signal :: proc(threadID: thread_id, signal: c.uint) -> c.int --- + set_signal_stack :: proc(base: rawptr, size: c.size_t) --- +} diff --git a/core/sys/haiku/types.odin b/core/sys/haiku/types.odin new file mode 100644 index 000000000..0440d5a98 --- /dev/null +++ b/core/sys/haiku/types.odin @@ -0,0 +1,54 @@ +//+build haiku +package sys_haiku + +import "core:c" + +status_t :: i32 +bigtime_t :: i64 +nanotime_t :: i64 +type_code :: u32 +perform_code :: u32 + +phys_addr_t :: uintptr +phys_size_t :: phys_addr_t +generic_addr_t :: uintptr +generic_size_t :: generic_addr_t + +area_id :: i32 +port_id :: i32 +sem_id :: i32 +team_id :: i32 +thread_id :: i32 + +blkcnt_t :: i64 +blksize_t :: i32 +fsblkcnt_t :: i64 +fsfilcnt_t :: i64 +off_t :: i64 +ino_t :: i64 +cnt_t :: i32 +dev_t :: i32 +pid_t :: i32 +id_t :: i32 + +uid_t :: u32 +gid_t :: u32 +mode_t :: u32 +umode_t :: u32 +nlink_t :: i32 + +caddr_t :: ^c.char + +addr_t :: phys_addr_t +key_t :: i32 + +clockid_t :: i32 + +time_t :: i64 when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 else i32 + +sig_atomic_t :: c.int +sigset_t :: u64 + +image_id :: i32 + +pthread_t :: rawptr diff --git a/core/sys/unix/pthread_haiku.odin b/core/sys/unix/pthread_haiku.odin new file mode 100644 index 000000000..1278f34fe --- /dev/null +++ b/core/sys/unix/pthread_haiku.odin @@ -0,0 +1,71 @@ +package unix + +import "core:c" + +pthread_t :: distinct rawptr +pthread_attr_t :: distinct rawptr +pthread_mutex_t :: distinct rawptr +pthread_mutexattr_t :: distinct rawptr +pthread_cond_t :: distinct rawptr +pthread_condattr_t :: distinct rawptr +pthread_rwlock_t :: distinct rawptr +pthread_rwlockattr_t :: distinct rawptr +pthread_barrier_t :: distinct rawptr +pthread_barrierattr_t :: distinct rawptr +pthread_spinlock_t :: distinct rawptr + +pthread_key_t :: distinct c.int +pthread_once_t :: struct { + state: c.int, + mutex: pthread_mutex_t, +} + +PTHREAD_MUTEX_DEFAULT :: 0 +PTHREAD_MUTEX_NORMAL :: 1 +PTHREAD_MUTEX_ERRORCHECK :: 2 +PTHREAD_MUTEX_RECURSIVE :: 3 + +PTHREAD_DETACHED :: 0x1 +PTHREAD_SCOPE_SYSTEM :: 0x2 +PTHREAD_INHERIT_SCHED :: 0x4 +PTHREAD_NOFLOAT :: 0x8 + +PTHREAD_CREATE_DETACHED :: PTHREAD_DETACHED +PTHREAD_CREATE_JOINABLE :: 0 +PTHREAD_SCOPE_PROCESS :: 0 +PTHREAD_EXPLICIT_SCHED :: 0 + +SCHED_FIFO :: 1 +SCHED_RR :: 2 +SCHED_SPORADIC :: 3 +SCHED_OTHER :: 4 + +sched_param :: struct { + sched_priority: c.int, +} + +sem_t :: distinct rawptr + +PTHREAD_CANCEL_ENABLE :: 0 +PTHREAD_CANCEL_DISABLE :: 1 +PTHREAD_CANCEL_DEFERRED :: 0 +PTHREAD_CANCEL_ASYNCHRONOUS :: 2 + +foreign import libc "system:c" + +@(default_calling_convention="c") +foreign libc { + sem_open :: proc(name: cstring, flags: c.int) -> ^sem_t --- + + sem_init :: proc(sem: ^sem_t, pshared: c.int, initial_value: c.uint) -> c.int --- + sem_destroy :: proc(sem: ^sem_t) -> c.int --- + sem_post :: proc(sem: ^sem_t) -> c.int --- + sem_wait :: proc(sem: ^sem_t) -> c.int --- + sem_trywait :: proc(sem: ^sem_t) -> c.int --- + + pthread_yield :: proc() --- + + pthread_setcancelstate :: proc (state: c.int, old_state: ^c.int) -> c.int --- + pthread_setcanceltype :: proc (type: c.int, old_type: ^c.int) -> c.int --- + pthread_cancel :: proc (thread: pthread_t) -> c.int --- +} diff --git a/core/sys/unix/pthread_unix.odin b/core/sys/unix/pthread_unix.odin index 8bf397647..4fe3c8dfa 100644 --- a/core/sys/unix/pthread_unix.odin +++ b/core/sys/unix/pthread_unix.odin @@ -1,4 +1,4 @@ -//+build linux, darwin, freebsd, openbsd +//+build linux, darwin, freebsd, openbsd, haiku package unix foreign import "system:pthread" @@ -16,6 +16,8 @@ foreign pthread { // retval is a pointer to a location to put the return value of the thread proc. pthread_join :: proc(t: pthread_t, retval: ^rawptr) -> c.int --- + pthread_kill :: proc(t: pthread_t, sig: c.int) -> c.int --- + pthread_self :: proc() -> pthread_t --- pthread_equal :: proc(a, b: pthread_t) -> b32 --- @@ -31,15 +33,9 @@ foreign pthread { pthread_attr_getschedparam :: proc(attrs: ^pthread_attr_t, param: ^sched_param) -> c.int --- pthread_attr_setschedparam :: proc(attrs: ^pthread_attr_t, param: ^sched_param) -> c.int --- - pthread_attr_getschedpolicy :: proc(t: ^pthread_attr_t, policy: ^c.int) -> c.int --- - pthread_attr_setschedpolicy :: proc(t: ^pthread_attr_t, policy: c.int) -> c.int --- - // states: PTHREAD_CREATE_DETACHED, PTHREAD_CREATE_JOINABLE pthread_attr_setdetachstate :: proc(attrs: ^pthread_attr_t, detach_state: c.int) -> c.int --- - - // scheds: PTHREAD_INHERIT_SCHED, PTHREAD_EXPLICIT_SCHED - pthread_attr_setinheritsched :: proc(attrs: ^pthread_attr_t, sched: c.int) -> c.int --- - + // NOTE(tetra, 2019-11-06): WARNING: Different systems have different alignment requirements. // For maximum usefulness, use the OS's page size. // ALSO VERY MAJOR WARNING: `stack_ptr` must be the LAST byte of the stack on systems @@ -52,8 +48,20 @@ foreign pthread { pthread_attr_setstack :: proc(attrs: ^pthread_attr_t, stack_ptr: rawptr, stack_size: u64) -> c.int --- pthread_attr_getstack :: proc(attrs: ^pthread_attr_t, stack_ptr: ^rawptr, stack_size: ^u64) -> c.int --- - sched_yield :: proc() -> c.int --- + pthread_sigmask :: proc(how: c.int, set: rawptr, oldset: rawptr) -> c.int --- + sched_yield :: proc() -> c.int --- +} + +// NOTE: Unimplemented in Haiku. +when ODIN_OS != .Haiku { + foreign pthread { + // scheds: PTHREAD_INHERIT_SCHED, PTHREAD_EXPLICIT_SCHED + pthread_attr_setinheritsched :: proc(attrs: ^pthread_attr_t, sched: c.int) -> c.int --- + + pthread_attr_getschedpolicy :: proc(t: ^pthread_attr_t, policy: ^c.int) -> c.int --- + pthread_attr_setschedpolicy :: proc(t: ^pthread_attr_t, policy: c.int) -> c.int --- + } } @(default_calling_convention="c") diff --git a/core/sys/unix/time_unix.odin b/core/sys/unix/time_unix.odin index 108067dd4..088dc378b 100644 --- a/core/sys/unix/time_unix.odin +++ b/core/sys/unix/time_unix.odin @@ -1,4 +1,4 @@ -//+build linux, darwin, freebsd, openbsd +//+build linux, darwin, freebsd, openbsd, haiku package unix when ODIN_OS == .Darwin { diff --git a/core/thread/thread_unix.odin b/core/thread/thread_unix.odin index 19e421646..c75710873 100644 --- a/core/thread/thread_unix.odin +++ b/core/thread/thread_unix.odin @@ -1,4 +1,4 @@ -// +build linux, darwin, freebsd, openbsd +// +build linux, darwin, freebsd, openbsd, haiku // +private package thread @@ -78,7 +78,9 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread { // NOTE(tetra, 2019-11-01): These only fail if their argument is invalid. assert(unix.pthread_attr_setdetachstate(&attrs, unix.PTHREAD_CREATE_JOINABLE) == 0) - assert(unix.pthread_attr_setinheritsched(&attrs, unix.PTHREAD_EXPLICIT_SCHED) == 0) + when ODIN_OS != .Haiku { + assert(unix.pthread_attr_setinheritsched(&attrs, unix.PTHREAD_EXPLICIT_SCHED) == 0) + } thread := new(Thread) if thread == nil { @@ -88,8 +90,11 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread { // Set thread priority. policy: i32 - res := unix.pthread_attr_getschedpolicy(&attrs, &policy) - assert(res == 0) + res: i32 + when ODIN_OS != .Haiku { + res = unix.pthread_attr_getschedpolicy(&attrs, &policy) + assert(res == 0) + } params: unix.sched_param res = unix.pthread_attr_getschedparam(&attrs, ¶ms) assert(res == 0) diff --git a/core/time/time_unix.odin b/core/time/time_unix.odin index ba0d91527..1c46b5994 100644 --- a/core/time/time_unix.odin +++ b/core/time/time_unix.odin @@ -1,5 +1,5 @@ //+private -//+build linux, darwin, freebsd, openbsd +//+build linux, darwin, freebsd, openbsd, haiku package time import "core:sys/unix" diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 0bcb9f298..fdaa971f1 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -18,6 +18,7 @@ enum TargetOsKind : u16 { TargetOs_essence, TargetOs_freebsd, TargetOs_openbsd, + TargetOs_haiku, TargetOs_wasi, TargetOs_js, @@ -78,6 +79,7 @@ gb_global String target_os_names[TargetOs_COUNT] = { str_lit("essence"), str_lit("freebsd"), str_lit("openbsd"), + str_lit("haiku"), str_lit("wasi"), str_lit("js"), @@ -542,6 +544,13 @@ gb_global TargetMetrics target_openbsd_amd64 = { str_lit("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"), }; +gb_global TargetMetrics target_haiku_amd64 = { + TargetOs_haiku, + TargetArch_amd64, + 8, 8, 8, 16, + str_lit("x86_64-unknown-haiku"), +}; + gb_global TargetMetrics target_essence_amd64 = { TargetOs_essence, TargetArch_amd64, @@ -641,6 +650,7 @@ gb_global NamedTargetMetrics named_targets[] = { { str_lit("freebsd_amd64"), &target_freebsd_amd64 }, { str_lit("openbsd_amd64"), &target_openbsd_amd64 }, + { str_lit("haiku_amd64"), &target_haiku_amd64 }, { str_lit("freestanding_wasm32"), &target_freestanding_wasm32 }, { str_lit("wasi_wasm32"), &target_wasi_wasm32 }, @@ -872,6 +882,58 @@ gb_internal String internal_odin_root_dir(void) { return path; } +#elif defined(GB_SYSTEM_HAIKU) + +#include + +gb_internal String path_to_fullpath(gbAllocator a, String s, bool *ok_); + +gb_internal String internal_odin_root_dir(void) { + String path = global_module_path; + isize len, i; + u8 *text; + + if (global_module_path_set) { + return global_module_path; + } + + auto path_buf = array_make(heap_allocator(), 300); + defer (array_free(&path_buf)); + + len = 0; + for (;;) { + u32 sz = path_buf.count; + int res = find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, nullptr, &path_buf[0], sz); + if(res == B_OK) { + len = sz; + break; + } else { + array_resize(&path_buf, sz + 1); + } + } + + mutex_lock(&string_buffer_mutex); + defer (mutex_unlock(&string_buffer_mutex)); + + text = gb_alloc_array(permanent_allocator(), u8, len + 1); + gb_memmove(text, &path_buf[0], len); + + path = path_to_fullpath(heap_allocator(), make_string(text, len), nullptr); + + for (i = path.len-1; i >= 0; i--) { + u8 c = path[i]; + if (c == '/' || c == '\\') { + break; + } + path.len--; + } + + global_module_path = path; + global_module_path_set = true; + + return path; +} + #elif defined(GB_SYSTEM_OSX) #include @@ -888,6 +950,7 @@ gb_internal String internal_odin_root_dir(void) { } auto path_buf = array_make(heap_allocator(), 300); + defer (array_free(&path_buf)); len = 0; for (;;) { @@ -920,9 +983,6 @@ gb_internal String internal_odin_root_dir(void) { global_module_path = path; global_module_path_set = true; - - // array_free(&path_buf); - return path; } #else @@ -1301,6 +1361,8 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta metrics = &target_freebsd_amd64; #elif defined(GB_SYSTEM_OPENBSD) metrics = &target_openbsd_amd64; + #elif defined(GB_SYSTEM_HAIKU) + metrics = &target_haiku_amd64; #elif defined(GB_CPU_ARM) metrics = &target_linux_arm64; #else @@ -1405,6 +1467,9 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta case TargetOs_openbsd: bc->link_flags = str_lit("-arch x86-64 "); break; + case TargetOs_haiku: + bc->link_flags = str_lit("-arch x86-64 "); + break; } } else if (bc->metrics.arch == TargetArch_i386) { switch (bc->metrics.os) { diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index c85fb28d6..e1b1cd693 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -4928,6 +4928,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case TargetOs_essence: case TargetOs_freebsd: case TargetOs_openbsd: + case TargetOs_haiku: switch (build_context.metrics.arch) { case TargetArch_i386: case TargetArch_amd64: diff --git a/src/checker.cpp b/src/checker.cpp index 5827fc695..72c0ae574 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1010,6 +1010,7 @@ gb_internal void init_universal(void) { {"Linux", TargetOs_linux}, {"Essence", TargetOs_essence}, {"FreeBSD", TargetOs_freebsd}, + {"Haiku", TargetOs_haiku}, {"OpenBSD", TargetOs_openbsd}, {"WASI", TargetOs_wasi}, {"JS", TargetOs_js}, diff --git a/src/gb/gb.h b/src/gb/gb.h index 93d250f21..702647121 100644 --- a/src/gb/gb.h +++ b/src/gb/gb.h @@ -83,6 +83,10 @@ extern "C" { #ifndef GB_SYSTEM_OPENBSD #define GB_SYSTEM_OPENBSD 1 #endif + #elif defined(__HAIKU__) || defined(__haiku__) + #ifndef GB_SYSTEM_HAIKU + #define GB_SYSTEM_HAIKU 1 + #endif #else #error This UNIX operating system is not supported #endif @@ -206,7 +210,7 @@ extern "C" { #endif #include // NOTE(bill): malloc on linux #include - #if !defined(GB_SYSTEM_OSX) && !defined(__FreeBSD__) && !defined(__OpenBSD__) + #if !defined(GB_SYSTEM_OSX) && !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__HAIKU__) #include #endif #include @@ -247,6 +251,13 @@ extern "C" { #include #define lseek64 lseek #endif + +#if defined(GB_SYSTEM_HAIKU) + #include + #include + #include + #define lseek64 lseek +#endif #if defined(GB_SYSTEM_UNIX) #include @@ -801,6 +812,13 @@ typedef struct gbAffinity { isize thread_count; isize threads_per_core; } gbAffinity; +#elif defined(GB_SYSTEM_HAIKU) +typedef struct gbAffinity { + b32 is_accurate; + isize core_count; + isize thread_count; + isize threads_per_core; +} gbAffinity; #else #error TODO(bill): Unknown system #endif @@ -2984,6 +3002,8 @@ gb_inline u32 gb_thread_current_id(void) { __asm__("mov %%fs:0x10,%0" : "=r"(thread_id)); #elif defined(GB_SYSTEM_LINUX) thread_id = gettid(); +#elif defined(GB_SYSTEM_HAIKU) + thread_id = find_thread(NULL); #else #error Unsupported architecture for gb_thread_current_id() #endif @@ -3184,7 +3204,9 @@ b32 gb_affinity_set(gbAffinity *a, isize core, isize thread_index) { //info.affinity_tag = cast(integer_t)index; //result = thread_policy_set(thread, THREAD_AFFINITY_POLICY, cast(thread_policy_t)&info, THREAD_AFFINITY_POLICY_COUNT); +#if !defined(GB_SYSTEM_HAIKU) result = pthread_setaffinity_np(thread, sizeof(cpuset_t), &mn); +#endif return result == 0; } @@ -3236,6 +3258,29 @@ b32 gb_affinity_set(gbAffinity *a, isize core, isize thread_index) { return true; } +isize gb_affinity_thread_count_for_core(gbAffinity *a, isize core) { + GB_ASSERT(0 <= core && core < a->core_count); + return a->threads_per_core; +} +#elif defined(GB_SYSTEM_HAIKU) +#include + +void gb_affinity_init(gbAffinity *a) { + a->core_count = sysconf(_SC_NPROCESSORS_ONLN); + a->threads_per_core = 1; + a->is_accurate = a->core_count > 0; + a->core_count = a->is_accurate ? a->core_count : 1; + a->thread_count = a->core_count; +} + +void gb_affinity_destroy(gbAffinity *a) { + gb_unused(a); +} + +b32 gb_affinity_set(gbAffinity *a, isize core, isize thread_index) { + return true; +} + isize gb_affinity_thread_count_for_core(gbAffinity *a, isize core) { GB_ASSERT(0 <= core && core < a->core_count); return a->threads_per_core; @@ -5457,7 +5502,7 @@ gb_inline b32 gb_file_copy(char const *existing_filename, char const *new_filena } } - gb_free(buf); + gb_mfree(buf); close(new_fd); close(existing_fd); diff --git a/src/linker.cpp b/src/linker.cpp index 0144c4aaf..0cdeaf8d9 100644 --- a/src/linker.cpp +++ b/src/linker.cpp @@ -474,8 +474,8 @@ gb_internal i32 linker_stage(LinkerData *gen) { link_settings = gb_string_appendc(link_settings, "-Wl,-fini,'_odin_exit_point' "); } - } else if (build_context.metrics.os != TargetOs_openbsd) { - // OpenBSD defaults to PIE executable. do not pass -no-pie for it. + } else if (build_context.metrics.os != TargetOs_openbsd && build_context.metrics.os != TargetOs_haiku) { + // OpenBSD and Haiku default to PIE executable. do not pass -no-pie for it. link_settings = gb_string_appendc(link_settings, "-no-pie "); } diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index efba19f23..ca4341525 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -2564,8 +2564,8 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { switch (build_context.reloc_mode) { case RelocMode_Default: - if (build_context.metrics.os == TargetOs_openbsd) { - // Always use PIC for OpenBSD: it defaults to PIE + if (build_context.metrics.os == TargetOs_openbsd || build_context.metrics.os == TargetOs_haiku) { + // Always use PIC for OpenBSD and Haiku: they default to PIE reloc_mode = LLVMRelocPIC; } break; diff --git a/src/path.cpp b/src/path.cpp index de80c9def..742bba7f8 100644 --- a/src/path.cpp +++ b/src/path.cpp @@ -1,461 +1,461 @@ -/* - Path handling utilities. -*/ -#if !defined(GB_SYSTEM_WINDOWS) -#include -#endif - -gb_internal String remove_extension_from_path(String const &s) { - if (s.len != 0 && s.text[s.len-1] == '.') { - return s; - } - for (isize i = s.len-1; i >= 0; i--) { - if (s[i] == '.') { - return substring(s, 0, i); - } - } - return s; -} - -gb_internal String remove_directory_from_path(String const &s) { - isize len = 0; - for (isize i = s.len-1; i >= 0; i--) { - if (s[i] == '/' || - s[i] == '\\') { - break; - } - len += 1; - } - return substring(s, s.len-len, s.len); -} - - -// NOTE(Mark Naughton): getcwd as String -#if !defined(GB_SYSTEM_WINDOWS) -gb_internal String get_current_directory(void) { - char cwd[256]; - getcwd(cwd, 256); - - return make_string_c(cwd); -} - -#else -gb_internal String get_current_directory(void) { - gbAllocator a = heap_allocator(); - - wchar_t cwd[256]; - GetCurrentDirectoryW(256, cwd); - - String16 wstr = make_string16_c(cwd); - - return string16_to_string(a, wstr); -} -#endif - -gb_internal bool path_is_directory(String path); - -gb_internal String directory_from_path(String const &s) { - if (path_is_directory(s)) { - return s; - } - - isize i = s.len-1; - for (; i >= 0; i--) { - if (s[i] == '/' || - s[i] == '\\') { - break; - } - } - if (i >= 0) { - return substring(s, 0, i); - } - return substring(s, 0, 0); -} - -#if defined(GB_SYSTEM_WINDOWS) - gb_internal bool path_is_directory(String path) { - gbAllocator a = heap_allocator(); - String16 wstr = string_to_string16(a, path); - defer (gb_free(a, wstr.text)); - - i32 attribs = GetFileAttributesW(wstr.text); - if (attribs < 0) return false; - - return (attribs & FILE_ATTRIBUTE_DIRECTORY) != 0; - } - -#else - gb_internal bool path_is_directory(String path) { - gbAllocator a = heap_allocator(); - char *copy = cast(char *)copy_string(a, path).text; - defer (gb_free(a, copy)); - - struct stat s; - if (stat(copy, &s) == 0) { - return (s.st_mode & S_IFDIR) != 0; - } - return false; - } -#endif - - -gb_internal String path_to_full_path(gbAllocator a, String path) { - gbAllocator ha = heap_allocator(); - char *path_c = gb_alloc_str_len(ha, cast(char *)path.text, path.len); - defer (gb_free(ha, path_c)); - - char *fullpath = gb_path_get_full_name(a, path_c); - String res = string_trim_whitespace(make_string_c(fullpath)); -#if defined(GB_SYSTEM_WINDOWS) - for (isize i = 0; i < res.len; i++) { - if (res.text[i] == '\\') { - res.text[i] = '/'; - } - } -#endif - return copy_string(a, res); -} - -struct Path { - String basename; - String name; - String ext; -}; - -// NOTE(Jeroen): Naively turns a Path into a string. -gb_internal String path_to_string(gbAllocator a, Path path) { - if (path.basename.len + path.name.len + path.ext.len == 0) { - return make_string(nullptr, 0); - } - - isize len = path.basename.len + 1 + path.name.len + 1; - if (path.ext.len > 0) { - len += path.ext.len + 1; - } - - u8 *str = gb_alloc_array(a, u8, len); - - isize i = 0; - gb_memmove(str+i, path.basename.text, path.basename.len); i += path.basename.len; - - gb_memmove(str+i, "/", 1); i += 1; - - gb_memmove(str+i, path.name.text, path.name.len); i += path.name.len; - if (path.ext.len > 0) { - gb_memmove(str+i, ".", 1); i += 1; - gb_memmove(str+i, path.ext.text, path.ext.len); i += path.ext.len; - } - str[i] = 0; - - String res = make_string(str, i); - res = string_trim_whitespace(res); - return res; -} - -// NOTE(Jeroen): Naively turns a Path into a string, then normalizes it using `path_to_full_path`. -gb_internal String path_to_full_path(gbAllocator a, Path path) { - String temp = path_to_string(heap_allocator(), path); - defer (gb_free(heap_allocator(), temp.text)); - - return path_to_full_path(a, temp); -} - -// NOTE(Jeroen): Takes a path like "odin" or "W:\Odin", turns it into a full path, -// and then breaks it into its components to make a Path. -gb_internal Path path_from_string(gbAllocator a, String const &path) { - Path res = {}; - - if (path.len == 0) return res; - - String fullpath = path_to_full_path(a, path); - defer (gb_free(heap_allocator(), fullpath.text)); - - res.basename = directory_from_path(fullpath); - res.basename = copy_string(a, res.basename); - - if (path_is_directory(fullpath)) { - // It's a directory. We don't need to tinker with the name and extension. - // It could have a superfluous trailing `/`. Remove it if so. - if (res.basename.len > 0 && res.basename.text[res.basename.len - 1] == '/') { - res.basename.len--; - } - return res; - } - - // Note(Dragos): Is the copy_string required if it's a substring? - isize name_start = (res.basename.len > 0) ? res.basename.len + 1 : res.basename.len; - res.name = substring(fullpath, name_start, fullpath.len); - res.name = remove_extension_from_path(res.name); - res.name = copy_string(a, res.name); - - res.ext = path_extension(fullpath, false); // false says not to include the dot. - res.ext = copy_string(a, res.ext); - return res; -} - -// NOTE(Jeroen): Takes a path String and returns the last path element. -gb_internal String last_path_element(String const &path) { - isize count = 0; - u8 * start = (u8 *)(&path.text[path.len - 1]); - for (isize length = path.len; length > 0 && path.text[length - 1] != '/'; length--) { - count++; - start--; - } - if (count > 0) { - start++; // Advance past the `/` and return the substring. - String res = make_string(start, count); - return res; - } - // Must be a root path like `/` or `C:/`, return empty String. - return STR_LIT(""); -} - -gb_internal bool path_is_directory(Path path) { - String path_string = path_to_full_path(heap_allocator(), path); - defer (gb_free(heap_allocator(), path_string.text)); - - return path_is_directory(path_string); -} - -struct FileInfo { - String name; - String fullpath; - i64 size; - bool is_dir; -}; - -enum ReadDirectoryError { - ReadDirectory_None, - - ReadDirectory_InvalidPath, - ReadDirectory_NotExists, - ReadDirectory_Permission, - ReadDirectory_NotDir, - ReadDirectory_Empty, - ReadDirectory_Unknown, - - ReadDirectory_COUNT, -}; - -gb_internal i64 get_file_size(String path) { - char *c_str = alloc_cstring(heap_allocator(), path); - defer (gb_free(heap_allocator(), c_str)); - - gbFile f = {}; - gbFileError err = gb_file_open(&f, c_str); - defer (gb_file_close(&f)); - if (err != gbFileError_None) { - return -1; - } - return gb_file_size(&f); -} - - -#if defined(GB_SYSTEM_WINDOWS) -gb_internal ReadDirectoryError read_directory(String path, Array *fi) { - GB_ASSERT(fi != nullptr); - - - while (path.len > 0) { - Rune end = path[path.len-1]; - if (end == '/') { - path.len -= 1; - } else if (end == '\\') { - path.len -= 1; - } else { - break; - } - } - - if (path.len == 0) { - return ReadDirectory_InvalidPath; - } - { - char *c_str = alloc_cstring(temporary_allocator(), path); - gbFile f = {}; - gbFileError file_err = gb_file_open(&f, c_str); - defer (gb_file_close(&f)); - - switch (file_err) { - case gbFileError_Invalid: return ReadDirectory_InvalidPath; - case gbFileError_NotExists: return ReadDirectory_NotExists; - // case gbFileError_Permission: return ReadDirectory_Permission; - } - } - - if (!path_is_directory(path)) { - return ReadDirectory_NotDir; - } - - - gbAllocator a = heap_allocator(); - char *new_path = gb_alloc_array(a, char, path.len+3); - defer (gb_free(a, new_path)); - - gb_memmove(new_path, path.text, path.len); - gb_memmove(new_path+path.len, "/*", 2); - new_path[path.len+2] = 0; - - String np = make_string(cast(u8 *)new_path, path.len+2); - String16 wstr = string_to_string16(a, np); - defer (gb_free(a, wstr.text)); - - WIN32_FIND_DATAW file_data = {}; - HANDLE find_file = FindFirstFileW(wstr.text, &file_data); - if (find_file == INVALID_HANDLE_VALUE) { - return ReadDirectory_Unknown; - } - defer (FindClose(find_file)); - - array_init(fi, a, 0, 100); - - do { - wchar_t *filename_w = file_data.cFileName; - u64 size = cast(u64)file_data.nFileSizeLow; - size |= (cast(u64)file_data.nFileSizeHigh) << 32; - String name = string16_to_string(a, make_string16_c(filename_w)); - if (name == "." || name == "..") { - gb_free(a, name.text); - continue; - } - - String filepath = {}; - filepath.len = path.len+1+name.len; - filepath.text = gb_alloc_array(a, u8, filepath.len+1); - defer (gb_free(a, filepath.text)); - gb_memmove(filepath.text, path.text, path.len); - gb_memmove(filepath.text+path.len, "/", 1); - gb_memmove(filepath.text+path.len+1, name.text, name.len); - - FileInfo info = {}; - info.name = name; - info.fullpath = path_to_full_path(a, filepath); - info.size = cast(i64)size; - info.is_dir = (file_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; - array_add(fi, info); - } while (FindNextFileW(find_file, &file_data)); - - if (fi->count == 0) { - return ReadDirectory_Empty; - } - - return ReadDirectory_None; -} -#elif defined(GB_SYSTEM_LINUX) || defined(GB_SYSTEM_OSX) || defined(GB_SYSTEM_FREEBSD) || defined(GB_SYSTEM_OPENBSD) - -#include - -gb_internal ReadDirectoryError read_directory(String path, Array *fi) { - GB_ASSERT(fi != nullptr); - - gbAllocator a = heap_allocator(); - - char *c_path = alloc_cstring(a, path); - defer (gb_free(a, c_path)); - - DIR *dir = opendir(c_path); - if (!dir) { - switch (errno) { - case ENOENT: - return ReadDirectory_NotExists; - case EACCES: - return ReadDirectory_Permission; - case ENOTDIR: - return ReadDirectory_NotDir; - default: - // ENOMEM: out of memory - // EMFILE: per-process limit on open fds reached - // ENFILE: system-wide limit on total open files reached - return ReadDirectory_Unknown; - } - GB_PANIC("unreachable"); - } - - array_init(fi, a, 0, 100); - - for (;;) { - struct dirent *entry = readdir(dir); - if (entry == nullptr) { - break; - } - - String name = make_string_c(entry->d_name); - if (name == "." || name == "..") { - continue; - } - - String filepath = {}; - filepath.len = path.len+1+name.len; - filepath.text = gb_alloc_array(a, u8, filepath.len+1); - defer (gb_free(a, filepath.text)); - gb_memmove(filepath.text, path.text, path.len); - gb_memmove(filepath.text+path.len, "/", 1); - gb_memmove(filepath.text+path.len+1, name.text, name.len); - filepath.text[filepath.len] = 0; - - - struct stat dir_stat = {}; - - if (stat((char *)filepath.text, &dir_stat)) { - continue; - } - - if (S_ISDIR(dir_stat.st_mode)) { - continue; - } - - i64 size = dir_stat.st_size; - - FileInfo info = {}; - info.name = name; - info.fullpath = path_to_full_path(a, filepath); - info.size = size; - array_add(fi, info); - } - - if (fi->count == 0) { - return ReadDirectory_Empty; - } - - return ReadDirectory_None; -} - - -#else -#error Implement read_directory -#endif - -#if !defined(GB_SYSTEM_WINDOWS) -gb_internal bool write_directory(String path) { - char const *pathname = (char *) path.text; - - if (access(pathname, W_OK) < 0) { - return false; - } - - return true; -} -#else -gb_internal bool write_directory(String path) { - String16 wstr = string_to_string16(heap_allocator(), path); - LPCWSTR wdirectory_name = wstr.text; - - HANDLE directory = CreateFileW(wdirectory_name, - GENERIC_WRITE, - 0, - NULL, - OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, - NULL); - - if (directory == INVALID_HANDLE_VALUE) { - DWORD error_code = GetLastError(); - if (error_code == ERROR_ACCESS_DENIED) { - return false; - } - } - - CloseHandle(directory); - return true; -} -#endif +/* + Path handling utilities. +*/ +#if !defined(GB_SYSTEM_WINDOWS) +#include +#endif + +gb_internal String remove_extension_from_path(String const &s) { + if (s.len != 0 && s.text[s.len-1] == '.') { + return s; + } + for (isize i = s.len-1; i >= 0; i--) { + if (s[i] == '.') { + return substring(s, 0, i); + } + } + return s; +} + +gb_internal String remove_directory_from_path(String const &s) { + isize len = 0; + for (isize i = s.len-1; i >= 0; i--) { + if (s[i] == '/' || + s[i] == '\\') { + break; + } + len += 1; + } + return substring(s, s.len-len, s.len); +} + + +// NOTE(Mark Naughton): getcwd as String +#if !defined(GB_SYSTEM_WINDOWS) +gb_internal String get_current_directory(void) { + char cwd[256]; + getcwd(cwd, 256); + + return make_string_c(cwd); +} + +#else +gb_internal String get_current_directory(void) { + gbAllocator a = heap_allocator(); + + wchar_t cwd[256]; + GetCurrentDirectoryW(256, cwd); + + String16 wstr = make_string16_c(cwd); + + return string16_to_string(a, wstr); +} +#endif + +gb_internal bool path_is_directory(String path); + +gb_internal String directory_from_path(String const &s) { + if (path_is_directory(s)) { + return s; + } + + isize i = s.len-1; + for (; i >= 0; i--) { + if (s[i] == '/' || + s[i] == '\\') { + break; + } + } + if (i >= 0) { + return substring(s, 0, i); + } + return substring(s, 0, 0); +} + +#if defined(GB_SYSTEM_WINDOWS) + gb_internal bool path_is_directory(String path) { + gbAllocator a = heap_allocator(); + String16 wstr = string_to_string16(a, path); + defer (gb_free(a, wstr.text)); + + i32 attribs = GetFileAttributesW(wstr.text); + if (attribs < 0) return false; + + return (attribs & FILE_ATTRIBUTE_DIRECTORY) != 0; + } + +#else + gb_internal bool path_is_directory(String path) { + gbAllocator a = heap_allocator(); + char *copy = cast(char *)copy_string(a, path).text; + defer (gb_free(a, copy)); + + struct stat s; + if (stat(copy, &s) == 0) { + return (s.st_mode & S_IFDIR) != 0; + } + return false; + } +#endif + + +gb_internal String path_to_full_path(gbAllocator a, String path) { + gbAllocator ha = heap_allocator(); + char *path_c = gb_alloc_str_len(ha, cast(char *)path.text, path.len); + defer (gb_free(ha, path_c)); + + char *fullpath = gb_path_get_full_name(a, path_c); + String res = string_trim_whitespace(make_string_c(fullpath)); +#if defined(GB_SYSTEM_WINDOWS) + for (isize i = 0; i < res.len; i++) { + if (res.text[i] == '\\') { + res.text[i] = '/'; + } + } +#endif + return copy_string(a, res); +} + +struct Path { + String basename; + String name; + String ext; +}; + +// NOTE(Jeroen): Naively turns a Path into a string. +gb_internal String path_to_string(gbAllocator a, Path path) { + if (path.basename.len + path.name.len + path.ext.len == 0) { + return make_string(nullptr, 0); + } + + isize len = path.basename.len + 1 + path.name.len + 1; + if (path.ext.len > 0) { + len += path.ext.len + 1; + } + + u8 *str = gb_alloc_array(a, u8, len); + + isize i = 0; + gb_memmove(str+i, path.basename.text, path.basename.len); i += path.basename.len; + + gb_memmove(str+i, "/", 1); i += 1; + + gb_memmove(str+i, path.name.text, path.name.len); i += path.name.len; + if (path.ext.len > 0) { + gb_memmove(str+i, ".", 1); i += 1; + gb_memmove(str+i, path.ext.text, path.ext.len); i += path.ext.len; + } + str[i] = 0; + + String res = make_string(str, i); + res = string_trim_whitespace(res); + return res; +} + +// NOTE(Jeroen): Naively turns a Path into a string, then normalizes it using `path_to_full_path`. +gb_internal String path_to_full_path(gbAllocator a, Path path) { + String temp = path_to_string(heap_allocator(), path); + defer (gb_free(heap_allocator(), temp.text)); + + return path_to_full_path(a, temp); +} + +// NOTE(Jeroen): Takes a path like "odin" or "W:\Odin", turns it into a full path, +// and then breaks it into its components to make a Path. +gb_internal Path path_from_string(gbAllocator a, String const &path) { + Path res = {}; + + if (path.len == 0) return res; + + String fullpath = path_to_full_path(a, path); + defer (gb_free(heap_allocator(), fullpath.text)); + + res.basename = directory_from_path(fullpath); + res.basename = copy_string(a, res.basename); + + if (path_is_directory(fullpath)) { + // It's a directory. We don't need to tinker with the name and extension. + // It could have a superfluous trailing `/`. Remove it if so. + if (res.basename.len > 0 && res.basename.text[res.basename.len - 1] == '/') { + res.basename.len--; + } + return res; + } + + // Note(Dragos): Is the copy_string required if it's a substring? + isize name_start = (res.basename.len > 0) ? res.basename.len + 1 : res.basename.len; + res.name = substring(fullpath, name_start, fullpath.len); + res.name = remove_extension_from_path(res.name); + res.name = copy_string(a, res.name); + + res.ext = path_extension(fullpath, false); // false says not to include the dot. + res.ext = copy_string(a, res.ext); + return res; +} + +// NOTE(Jeroen): Takes a path String and returns the last path element. +gb_internal String last_path_element(String const &path) { + isize count = 0; + u8 * start = (u8 *)(&path.text[path.len - 1]); + for (isize length = path.len; length > 0 && path.text[length - 1] != '/'; length--) { + count++; + start--; + } + if (count > 0) { + start++; // Advance past the `/` and return the substring. + String res = make_string(start, count); + return res; + } + // Must be a root path like `/` or `C:/`, return empty String. + return STR_LIT(""); +} + +gb_internal bool path_is_directory(Path path) { + String path_string = path_to_full_path(heap_allocator(), path); + defer (gb_free(heap_allocator(), path_string.text)); + + return path_is_directory(path_string); +} + +struct FileInfo { + String name; + String fullpath; + i64 size; + bool is_dir; +}; + +enum ReadDirectoryError { + ReadDirectory_None, + + ReadDirectory_InvalidPath, + ReadDirectory_NotExists, + ReadDirectory_Permission, + ReadDirectory_NotDir, + ReadDirectory_Empty, + ReadDirectory_Unknown, + + ReadDirectory_COUNT, +}; + +gb_internal i64 get_file_size(String path) { + char *c_str = alloc_cstring(heap_allocator(), path); + defer (gb_free(heap_allocator(), c_str)); + + gbFile f = {}; + gbFileError err = gb_file_open(&f, c_str); + defer (gb_file_close(&f)); + if (err != gbFileError_None) { + return -1; + } + return gb_file_size(&f); +} + + +#if defined(GB_SYSTEM_WINDOWS) +gb_internal ReadDirectoryError read_directory(String path, Array *fi) { + GB_ASSERT(fi != nullptr); + + + while (path.len > 0) { + Rune end = path[path.len-1]; + if (end == '/') { + path.len -= 1; + } else if (end == '\\') { + path.len -= 1; + } else { + break; + } + } + + if (path.len == 0) { + return ReadDirectory_InvalidPath; + } + { + char *c_str = alloc_cstring(temporary_allocator(), path); + gbFile f = {}; + gbFileError file_err = gb_file_open(&f, c_str); + defer (gb_file_close(&f)); + + switch (file_err) { + case gbFileError_Invalid: return ReadDirectory_InvalidPath; + case gbFileError_NotExists: return ReadDirectory_NotExists; + // case gbFileError_Permission: return ReadDirectory_Permission; + } + } + + if (!path_is_directory(path)) { + return ReadDirectory_NotDir; + } + + + gbAllocator a = heap_allocator(); + char *new_path = gb_alloc_array(a, char, path.len+3); + defer (gb_free(a, new_path)); + + gb_memmove(new_path, path.text, path.len); + gb_memmove(new_path+path.len, "/*", 2); + new_path[path.len+2] = 0; + + String np = make_string(cast(u8 *)new_path, path.len+2); + String16 wstr = string_to_string16(a, np); + defer (gb_free(a, wstr.text)); + + WIN32_FIND_DATAW file_data = {}; + HANDLE find_file = FindFirstFileW(wstr.text, &file_data); + if (find_file == INVALID_HANDLE_VALUE) { + return ReadDirectory_Unknown; + } + defer (FindClose(find_file)); + + array_init(fi, a, 0, 100); + + do { + wchar_t *filename_w = file_data.cFileName; + u64 size = cast(u64)file_data.nFileSizeLow; + size |= (cast(u64)file_data.nFileSizeHigh) << 32; + String name = string16_to_string(a, make_string16_c(filename_w)); + if (name == "." || name == "..") { + gb_free(a, name.text); + continue; + } + + String filepath = {}; + filepath.len = path.len+1+name.len; + filepath.text = gb_alloc_array(a, u8, filepath.len+1); + defer (gb_free(a, filepath.text)); + gb_memmove(filepath.text, path.text, path.len); + gb_memmove(filepath.text+path.len, "/", 1); + gb_memmove(filepath.text+path.len+1, name.text, name.len); + + FileInfo info = {}; + info.name = name; + info.fullpath = path_to_full_path(a, filepath); + info.size = cast(i64)size; + info.is_dir = (file_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + array_add(fi, info); + } while (FindNextFileW(find_file, &file_data)); + + if (fi->count == 0) { + return ReadDirectory_Empty; + } + + return ReadDirectory_None; +} +#elif defined(GB_SYSTEM_LINUX) || defined(GB_SYSTEM_OSX) || defined(GB_SYSTEM_FREEBSD) || defined(GB_SYSTEM_OPENBSD) || defined(GB_SYSTEM_HAIKU) + +#include + +gb_internal ReadDirectoryError read_directory(String path, Array *fi) { + GB_ASSERT(fi != nullptr); + + gbAllocator a = heap_allocator(); + + char *c_path = alloc_cstring(a, path); + defer (gb_free(a, c_path)); + + DIR *dir = opendir(c_path); + if (!dir) { + switch (errno) { + case ENOENT: + return ReadDirectory_NotExists; + case EACCES: + return ReadDirectory_Permission; + case ENOTDIR: + return ReadDirectory_NotDir; + default: + // ENOMEM: out of memory + // EMFILE: per-process limit on open fds reached + // ENFILE: system-wide limit on total open files reached + return ReadDirectory_Unknown; + } + GB_PANIC("unreachable"); + } + + array_init(fi, a, 0, 100); + + for (;;) { + struct dirent *entry = readdir(dir); + if (entry == nullptr) { + break; + } + + String name = make_string_c(entry->d_name); + if (name == "." || name == "..") { + continue; + } + + String filepath = {}; + filepath.len = path.len+1+name.len; + filepath.text = gb_alloc_array(a, u8, filepath.len+1); + defer (gb_free(a, filepath.text)); + gb_memmove(filepath.text, path.text, path.len); + gb_memmove(filepath.text+path.len, "/", 1); + gb_memmove(filepath.text+path.len+1, name.text, name.len); + filepath.text[filepath.len] = 0; + + + struct stat dir_stat = {}; + + if (stat((char *)filepath.text, &dir_stat)) { + continue; + } + + if (S_ISDIR(dir_stat.st_mode)) { + continue; + } + + i64 size = dir_stat.st_size; + + FileInfo info = {}; + info.name = name; + info.fullpath = path_to_full_path(a, filepath); + info.size = size; + array_add(fi, info); + } + + if (fi->count == 0) { + return ReadDirectory_Empty; + } + + return ReadDirectory_None; +} + + +#else +#error Implement read_directory +#endif + +#if !defined(GB_SYSTEM_WINDOWS) +gb_internal bool write_directory(String path) { + char const *pathname = (char *) path.text; + + if (access(pathname, W_OK) < 0) { + return false; + } + + return true; +} +#else +gb_internal bool write_directory(String path) { + String16 wstr = string_to_string16(heap_allocator(), path); + LPCWSTR wdirectory_name = wstr.text; + + HANDLE directory = CreateFileW(wdirectory_name, + GENERIC_WRITE, + 0, + NULL, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS, + NULL); + + if (directory == INVALID_HANDLE_VALUE) { + DWORD error_code = GetLastError(); + if (error_code == ERROR_ACCESS_DENIED) { + return false; + } + } + + CloseHandle(directory); + return true; +} +#endif diff --git a/src/threading.cpp b/src/threading.cpp index 725b58c89..a469435d2 100644 --- a/src/threading.cpp +++ b/src/threading.cpp @@ -492,6 +492,8 @@ gb_internal u32 thread_current_id(void) { __asm__("mov %%fs:0x10,%0" : "=r"(thread_id)); #elif defined(GB_SYSTEM_LINUX) thread_id = gettid(); +#elif defined(GB_SYSTEM_HAIKU) + thread_id = find_thread(NULL); #else #error Unsupported architecture for thread_current_id() #endif @@ -831,8 +833,178 @@ gb_internal void futex_wait(Futex *f, Footex val) { WaitOnAddress(f, (void *)&val, sizeof(val), INFINITE); } while (f->load() == val); } + +#elif defined(GB_SYSTEM_HAIKU) + +// Futex implementation taken from https://tavianator.com/2023/futex.html + +#include +#include + +struct _Spinlock { + std::atomic_flag state; + + void init() { + state.clear(); + } + + void lock() { + while (state.test_and_set(std::memory_order_acquire)) { + #if defined(GB_CPU_X86) + _mm_pause(); + #else + (void)0; // spin... + #endif + } + } + + void unlock() { + state.clear(std::memory_order_release); + } +}; + +struct Futex_Waitq; + +struct Futex_Waiter { + _Spinlock lock; + pthread_t thread; + Futex *futex; + Futex_Waitq *waitq; + Futex_Waiter *prev, *next; +}; + +struct Futex_Waitq { + _Spinlock lock; + Futex_Waiter list; + + void init() { + auto head = &list; + head->prev = head->next = head; + } +}; + +// FIXME: This approach may scale badly in the future, +// possible solution - hash map (leads to deadlocks now). + +Futex_Waitq g_waitq = { + .lock = ATOMIC_FLAG_INIT, + .list = { + .prev = &g_waitq.list, + .next = &g_waitq.list, + }, +}; + +Futex_Waitq *get_waitq(Futex *f) { + // Future hash map method... + return &g_waitq; +} + +void futex_signal(Futex *f) { + auto waitq = get_waitq(f); + + waitq->lock.lock(); + + auto head = &waitq->list; + for (auto waiter = head->next; waiter != head; waiter = waiter->next) { + if (waiter->futex != f) { + continue; + } + waitq->lock.unlock(); + pthread_kill(waiter->thread, SIGCONT); + return; + } + + waitq->lock.unlock(); +} + +void futex_broadcast(Futex *f) { + auto waitq = get_waitq(f); + + waitq->lock.lock(); + + auto head = &waitq->list; + for (auto waiter = head->next; waiter != head; waiter = waiter->next) { + if (waiter->futex != f) { + continue; + } + if (waiter->next == head) { + waitq->lock.unlock(); + pthread_kill(waiter->thread, SIGCONT); + return; + } else { + pthread_kill(waiter->thread, SIGCONT); + } + } + + waitq->lock.unlock(); +} + +void futex_wait(Futex *f, Footex val) { + Futex_Waiter waiter; + waiter.thread = pthread_self(); + waiter.futex = f; + + auto waitq = get_waitq(f); + while (waitq->lock.state.test_and_set(std::memory_order_acquire)) { + if (f->load(std::memory_order_relaxed) != val) { + return; + } + #if defined(GB_CPU_X86) + _mm_pause(); + #else + (void)0; // spin... + #endif + } + + waiter.waitq = waitq; + waiter.lock.init(); + waiter.lock.lock(); + + auto head = &waitq->list; + waiter.prev = head->prev; + waiter.next = head; + waiter.prev->next = &waiter; + waiter.next->prev = &waiter; + + waiter.prev->next = &waiter; + waiter.next->prev = &waiter; + + sigset_t old_mask, mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCONT); + pthread_sigmask(SIG_BLOCK, &mask, &old_mask); + + if (f->load(std::memory_order_relaxed) == val) { + waiter.lock.unlock(); + waitq->lock.unlock(); + + int sig; + sigwait(&mask, &sig); + + waitq->lock.lock(); + waiter.lock.lock(); + + while (waitq != waiter.waitq) { + auto req = waiter.waitq; + waiter.lock.unlock(); + waitq->lock.unlock(); + waitq = req; + waitq->lock.lock(); + waiter.lock.lock(); + } + } + + waiter.prev->next = waiter.next; + waiter.next->prev = waiter.prev; + + pthread_sigmask(SIG_SETMASK, &old_mask, NULL); + + waiter.lock.unlock(); + waitq->lock.unlock(); +} + #endif #if defined(GB_SYSTEM_WINDOWS) #pragma warning(pop) -#endif \ No newline at end of file +#endif diff --git a/src/tilde.cpp b/src/tilde.cpp index 06428f317..4fc7d1c9b 100644 --- a/src/tilde.cpp +++ b/src/tilde.cpp @@ -825,6 +825,7 @@ gb_internal bool cg_generate_code(Checker *c, LinkerData *linker_data) { case TargetOs_essence: case TargetOs_freebsd: case TargetOs_openbsd: + case TargetOs_haiku: debug_format = TB_DEBUGFMT_DWARF; break; } diff --git a/vendor/raylib/raylib.odin b/vendor/raylib/raylib.odin index feb8d05a5..51b43c565 100644 --- a/vendor/raylib/raylib.odin +++ b/vendor/raylib/raylib.odin @@ -86,7 +86,6 @@ import "core:fmt" import "core:mem" import "core:strings" -USE_LINALG :: #config(RAYLIB_USE_LINALG, true) import "core:math/linalg" _ :: linalg @@ -213,39 +212,19 @@ BLANK :: Color{ 0, 0, 0, 0 } // Blank (Transparent) MAGENTA :: Color{ 255, 0, 255, 255 } // Magenta RAYWHITE :: Color{ 245, 245, 245, 255 } // My own White (raylib logo) +// Vector2 type +Vector2 :: linalg.Vector2f32 +// Vector3 type +Vector3 :: linalg.Vector3f32 +// Vector4 type +Vector4 :: linalg.Vector4f32 -when USE_LINALG { - // Vector2 type - Vector2 :: linalg.Vector2f32 - // Vector3 type - Vector3 :: linalg.Vector3f32 - // Vector4 type - Vector4 :: linalg.Vector4f32 +// Quaternion type +Quaternion :: linalg.Quaternionf32 - // Quaternion type - Quaternion :: linalg.Quaternionf32 +// Matrix type (OpenGL style 4x4 - right handed, stored column major) +Matrix :: linalg.Matrix4x4f32 - // Matrix type (OpenGL style 4x4 - right handed, column major) - Matrix :: linalg.Matrix4x4f32 -} else { - // Vector2 type - Vector2 :: distinct [2]f32 - // Vector3 type - Vector3 :: distinct [3]f32 - // Vector4 type - Vector4 :: distinct [4]f32 - - // Quaternion type - Quaternion :: distinct quaternion128 - - // Matrix, 4x4 components, column major, OpenGL style, right handed - Matrix :: struct { - m0, m4, m8, m12: f32, // Matrix first row (4 components) - m1, m5, m9, m13: f32, // Matrix second row (4 components) - m2, m6, m10, m14: f32, // Matrix third row (4 components) - m3, m7, m11, m15: f32, // Matrix fourth row (4 components) - } -} // Color, 4 components, R8G8B8A8 (32bit) // diff --git a/vendor/raylib/raymath.odin b/vendor/raylib/raymath.odin index 9770ecfb1..c657152c1 100644 --- a/vendor/raylib/raymath.odin +++ b/vendor/raylib/raymath.odin @@ -85,33 +85,33 @@ Vector2SubtractValue :: proc "c" (v: Vector2, value: f32) -> Vector2 { return v - value } // Calculate vector length -@(require_results, deprecated="Prefer linalg.length(v)") +@(require_results) Vector2Length :: proc "c" (v: Vector2) -> f32 { return linalg.length(v) } // Calculate vector square length -@(require_results, deprecated="Prefer linalg.length2(v)") +@(require_results) Vector2LengthSqr :: proc "c" (v: Vector2) -> f32 { return linalg.length2(v) } // Calculate two vectors dot product -@(require_results, deprecated="Prefer linalg.dot(v1, v2)") +@(require_results) Vector2DotProduct :: proc "c" (v1, v2: Vector2) -> f32 { return linalg.dot(v1, v2) } // Calculate distance between two vectors -@(require_results, deprecated="Prefer linalg.distance(v1, v2)") +@(require_results) Vector2Distance :: proc "c" (v1, v2: Vector2) -> f32 { return linalg.distance(v1, v2) } // Calculate square distance between two vectors -@(require_results, deprecated="Prefer linalg.length2(v2-v1)") +@(require_results) Vector2DistanceSqrt :: proc "c" (v1, v2: Vector2) -> f32 { return linalg.length2(v2-v1) } // Calculate angle between two vectors // NOTE: Angle is calculated from origin point (0, 0) -@(require_results, deprecated="Prefer linalg.angle_between(v1, v2)") +@(require_results) Vector2Angle :: proc "c" (v1, v2: Vector2) -> f32 { return linalg.angle_between(v1, v2) } @@ -146,7 +146,7 @@ Vector2Divide :: proc "c" (v1, v2: Vector2) -> Vector2 { return v1 / v2 } // Normalize provided vector -@(require_results, deprecated="Prefer linalg.normalize0(v)") +@(require_results) Vector2Normalize :: proc "c" (v: Vector2) -> Vector2 { return linalg.normalize0(v) } @@ -270,38 +270,38 @@ Vector3SubtractValue :: proc "c" (v: Vector3, value: f32) -> Vector3 { return v - value } // Calculate vector length -@(require_results, deprecated="Prefer linalg.length(v)") +@(require_results) Vector3Length :: proc "c" (v: Vector3) -> f32 { return linalg.length(v) } // Calculate vector square length -@(require_results, deprecated="Prefer linalg.length2(v)") +@(require_results) Vector3LengthSqr :: proc "c" (v: Vector3) -> f32 { return linalg.length2(v) } // Calculate two vectors dot product -@(require_results, deprecated="Prefer linalg.dot(v1, v2)") +@(require_results) Vector3DotProduct :: proc "c" (v1, v2: Vector3) -> f32 { return linalg.dot(v1, v2) } // Calculate two vectors dot product -@(require_results, deprecated="Prefer linalg.cross(v1, v2)") +@(require_results) Vector3CrossProduct :: proc "c" (v1, v2: Vector3) -> Vector3 { return linalg.cross(v1, v2) } // Calculate distance between two vectors -@(require_results, deprecated="Prefer linalg.distance(v1, v2)") +@(require_results) Vector3Distance :: proc "c" (v1, v2: Vector3) -> f32 { return linalg.distance(v1, v2) } // Calculate square distance between two vectors -@(require_results, deprecated="Prefer linalg.length2(v2-v1)") +@(require_results) Vector3DistanceSqrt :: proc "c" (v1, v2: Vector3) -> f32 { return linalg.length2(v2-v1) } // Calculate angle between two vectors // NOTE: Angle is calculated from origin point (0, 0) -@(require_results, deprecated="Prefer linalg.angle_between(v1, v2)") +@(require_results) Vector3Angle :: proc "c" (v1, v2: Vector3) -> f32 { return linalg.angle_between(v1, v2) } @@ -336,7 +336,7 @@ Vector3Divide :: proc "c" (v1, v2: Vector3) -> Vector3 { return v1 / v2 } // Normalize provided vector -@(require_results, deprecated="Prefer linalg.normalize0(v)") +@(require_results) Vector3Normalize :: proc "c" (v: Vector3) -> Vector3 { return linalg.normalize0(v) } @@ -364,7 +364,7 @@ Vector3OrthoNormalize :: proc "c" (v1, v2: ^Vector3) { } // Transform a vector by quaternion rotation -@(require_results, deprecated="Prefer linalg.mul(q, v") +@(require_results) Vector3RotateByQuaternion :: proc "c" (v: Vector3, q: Quaternion) -> Vector3 { return linalg.mul(q, v) } @@ -480,12 +480,12 @@ Vector3Equals :: proc "c" (p, q: Vector3) -> bool { } -@(require_results, deprecated="Prefer linalg.min(v1, v2)") +@(require_results) Vector3Min :: proc "c" (v1, v2: Vector3) -> Vector3 { return linalg.min(v1, v2) } -@(require_results, deprecated="Prefer linalg.max(v1, v2)") +@(require_results) Vector3Max :: proc "c" (v1, v2: Vector3) -> Vector3 { return linalg.max(v1, v2) } @@ -539,25 +539,25 @@ Vector3Unproject :: proc "c" (source: Vector3, projection: Matrix, view: Matrix) //---------------------------------------------------------------------------------- // Compute matrix determinant -@(require_results, deprecated="Prefer linalg.determinant(mat)") +@(require_results) MatrixDeterminant :: proc "c" (mat: Matrix) -> f32 { return linalg.determinant(mat) } // Get the trace of the matrix (sum of the values along the diagonal) -@(require_results, deprecated="Prefer linalg.trace(mat)") +@(require_results) MatrixTrace :: proc "c" (mat: Matrix) -> f32 { return linalg.trace(mat) } // Transposes provided matrix -@(require_results, deprecated="Prefer linalg.transpose(mat)") +@(require_results) MatrixTranspose :: proc "c" (mat: Matrix) -> Matrix { return linalg.transpose(mat) } // Invert provided matrix -@(require_results, deprecated="Prefer linalg.inverse(mat)") +@(require_results) MatrixInvert :: proc "c" (mat: Matrix) -> Matrix { return linalg.inverse(mat) } @@ -704,7 +704,7 @@ QuaternionLength :: proc "c" (q: Quaternion) -> f32 { return abs(q) } // Normalize provided quaternion -@(require_results, deprecated="Prefer linalg.normalize0(q)") +@(require_results) QuaternionNormalize :: proc "c" (q: Quaternion) -> Quaternion { return linalg.normalize0(q) }