#shared_global_scope; __multi3 :: proc(a, b: u128) -> u128 #cc_c #link_name "__multi3" { bits_in_dword_2 :: size_of(i64) * 4; lower_mask :: u128(~u64(0) >> bits_in_dword_2); when ODIN_ENDIAN == "bit" { TWords :: raw_union { all: u128; using _: struct {lo, hi: u64;}; }; } else { TWords :: raw_union { all: u128; using _: struct {hi, lo: u64;}; }; } r: TWords; t: u64; r.lo = u64(a & lower_mask) * u64(b & lower_mask); t = r.lo >> bits_in_dword_2; r.lo &= u64(lower_mask); t += u64(a >> bits_in_dword_2) * u64(b & lower_mask); r.lo += u64(t & u64(lower_mask)) << bits_in_dword_2; r.hi = t >> bits_in_dword_2; t = r.lo >> bits_in_dword_2; r.lo &= u64(lower_mask); t += u64(b >> bits_in_dword_2) * u64(a & lower_mask); r.lo += u64(t & u64(lower_mask)) << bits_in_dword_2; r.hi += t >> bits_in_dword_2; r.hi += u64(a >> bits_in_dword_2) * u64(b >> bits_in_dword_2); return r.all; } __u128_mod :: proc(a, b: u128) -> u128 #cc_c #link_name "__umodti3" { r: u128; __u128_quo_mod(a, b, &r); return r; } __u128_quo :: proc(a, b: u128) -> u128 #cc_c #link_name "__udivti3" { return __u128_quo_mod(a, b, nil); } __i128_mod :: proc(a, b: i128) -> i128 #cc_c #link_name "__modti3" { r: i128; __i128_quo_mod(a, b, &r); return r; } __i128_quo :: proc(a, b: i128) -> i128 #cc_c #link_name "__divti3" { return __i128_quo_mod(a, b, nil); } __i128_quo_mod :: proc(a, b: i128, rem: ^i128) -> (quo: i128) #cc_c #link_name "__divmodti4" { s: i128; s = b >> 127; b = (b~s) - s; s = a >> 127; b = (a~s) - s; uquo: u128; urem := __u128_quo_mod(transmute(u128, a), transmute(u128, b), &uquo); iquo := transmute(i128, uquo); irem := transmute(i128, urem); iquo = (iquo~s) - s; irem = (irem~s) - s; if rem != nil { rem^ = irem; } return iquo; } __u128_quo_mod :: proc(a, b: u128, rem: ^u128) -> (quo: u128) #cc_c #link_name "__udivmodti4" { alo, ahi := u64(a), u64(a>>64); blo, bhi := u64(b), u64(b>>64); if b == 0 { if rem != nil { rem^ = 0; } return u128(alo/blo); } r, d, x, q: u128 = a, b, 1, 0; for r >= d && (d>>127)&1 == 0 { x <<= 1; d <<= 1; } for x != 0 { if r >= d { r -= d; q |= x; } x >>= 1; d >>= 1; } if rem != nil do rem^ = r; return q; } /* __f16_to_f32 :: proc(f: f16) -> f32 #cc_c #no_inline #link_name "__gnu_h2f_ieee" { when true { // Source: https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ FP32 :: raw_union {u: u32, f: f32}; magic, was_infnan: FP32; magic.u = (254-15) << 23; was_infnan.u = (127+16) << 23; hu := transmute(u16, f); o := FP32{}; o.u = u32(hu & 0x7fff) << 13); o.f *= magic.f; if o.f >= was_infnan.f { o.u |= 255 << 23; } o.u |= u32(hu & 0x8000) << 16; return o.f; } else { return 0; } } __f32_to_f16 :: proc(f_: f32) -> f16 #cc_c #no_inline #link_name "__gnu_f2h_ieee" { when false { // Source: https://gist.github.com/rygorous/2156668 FP16 :: raw_union {u: u16, f: f16}; FP32 :: raw_union {u: u32, f: f32}; f32infty, f16infty, magic: FP32; f32infty.u = 255<<23; f16infty.u = 31<<23; magic.u = 15<<23; sign_mask :: u32(0x80000000); round_mask :: ~u32(0x0fff); f := transmute(FP32, f_); o: FP16; sign := f.u & sign_mask; f.u ~= sign; // NOTE all the integer compares in this function can be safely // compiled into signed compares since all operands are below // 0x80000000. Important if you want fast straight SSE2 code // (since there's no unsigned PCMPGTD). if f.u >= f32infty.u { // Inf or NaN (all exponent bits set) o.u = f.u > f32infty.u ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf } else { // (De)normalized number or zero f.u &= round_mask; f.f *= magic.f; f.u -= round_mask; if f.u > f16infty.u { f.u = f16infty.u; // Clamp to signed infinity if overflowed } o.u = u16(f.u >> 13); // Take the bits! } o.u |= u16(sign >> 16); return o.f; } else { f := transmute(u32, f_); h: u16; hs, he, hf: u16; fs := (f >> 31) & 1; fe := (f >> 23) & 0b1111_1111; ff := (f >> 0) & 0b0111_1111_1111_1111_1111_1111; add_one := false; if (fe == 0) { he = 0; } else if (fe == 255) { he = 31; hf = ff != 0 ? 0x200 : 0; } else { ne := fe - 127 + 15; if ne >= 31 { he = 31; } else if ne <= 0 { if (14-ne) <= 24 { mant := ff | 0x800000; hf = u16(mant >> (14-ne)); if (mant >> (13-ne)) & 1 != 0 { add_one = true; } } } else { he = u16(ne); hf = u16(ff >> 13); if ff&0x1000 != 0 { add_one = true; } } } hs = u16(hs); h |= (he&0b0001_1111)<<10; h |= (hf&0b0011_1111_1111); if add_one { h++; } h |= (hs&1) << 15; return transmute(f16, h); } } __f64_to_f16 :: proc(f: f64) -> f16 #cc_c #no_inline #link_name "__truncdfhf2" { return __f32_to_f16(f32(f)); } __f16_to_f64 :: proc(f: f16) -> f64 #cc_c #no_inline { return f64(__f16_to_f32(f)); } */