Files
Odin/core/_soft_numbers.odin
T
2017-08-10 23:46:12 +01:00

227 lines
4.8 KiB
Odin

#shared_global_scope;
__multi3 :: proc(a, b: u128) -> u128 #cc_c #link_name "__multi3" {
bits_in_dword_2 :: size_of(i64) * 4;
lower_mask :: u128(~u64(0) >> bits_in_dword_2);
TWords :: struct #raw_union {
all: u128;
using _: struct {
when ODIN_ENDIAN == "big" {
lo, hi: u64;
} else {
hi, lo: u64;
}
};
};
r: TWords;
t: u64;
r.lo = u64(a & lower_mask) * u64(b & lower_mask);
t = r.lo >> bits_in_dword_2;
r.lo &= u64(lower_mask);
t += u64(a >> bits_in_dword_2) * u64(b & lower_mask);
r.lo += u64(t & u64(lower_mask)) << bits_in_dword_2;
r.hi = t >> bits_in_dword_2;
t = r.lo >> bits_in_dword_2;
r.lo &= u64(lower_mask);
t += u64(b >> bits_in_dword_2) * u64(a & lower_mask);
r.lo += u64(t & u64(lower_mask)) << bits_in_dword_2;
r.hi += t >> bits_in_dword_2;
r.hi += u64(a >> bits_in_dword_2) * u64(b >> bits_in_dword_2);
return r.all;
}
__u128_mod :: proc(a, b: u128) -> u128 #cc_c #link_name "__umodti3" {
r: u128;
__u128_quo_mod(a, b, &r);
return r;
}
__u128_quo :: proc(a, b: u128) -> u128 #cc_c #link_name "__udivti3" {
return __u128_quo_mod(a, b, nil);
}
__i128_mod :: proc(a, b: i128) -> i128 #cc_c #link_name "__modti3" {
r: i128;
__i128_quo_mod(a, b, &r);
return r;
}
__i128_quo :: proc(a, b: i128) -> i128 #cc_c #link_name "__divti3" {
return __i128_quo_mod(a, b, nil);
}
__i128_quo_mod :: proc(a, b: i128, rem: ^i128) -> (quo: i128) #cc_c #link_name "__divmodti4" {
s: i128;
s = b >> 127;
b = (b~s) - s;
s = a >> 127;
b = (a~s) - s;
uquo: u128;
urem := __u128_quo_mod(transmute(u128)a, transmute(u128)b, &uquo);
iquo := transmute(i128)uquo;
irem := transmute(i128)urem;
iquo = (iquo~s) - s;
irem = (irem~s) - s;
if rem != nil do rem^ = irem;
return iquo;
}
__u128_quo_mod :: proc(a, b: u128, rem: ^u128) -> (quo: u128) #cc_c #link_name "__udivmodti4" {
alo := u64(a);
blo := u64(b);
if b == 0 {
if rem != nil do rem^ = 0;
return u128(alo/blo);
}
r, d, x, q: u128 = a, b, 1, 0;
for r >= d && (d>>127)&1 == 0 {
x <<= 1;
d <<= 1;
}
for x != 0 {
if r >= d {
r -= d;
q |= x;
}
x >>= 1;
d >>= 1;
}
if rem != nil do rem^ = r;
return q;
}
/*
__f16_to_f32 :: proc(f: f16) -> f32 #cc_c #no_inline #link_name "__gnu_h2f_ieee" {
when true {
// Source: https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
FP32 :: struct #raw_union {u: u32, f: f32};
magic, was_infnan: FP32;
magic.u = (254-15) << 23;
was_infnan.u = (127+16) << 23;
hu := transmute(u16, f);
o := FP32{};
o.u = u32(hu & 0x7fff) << 13);
o.f *= magic.f;
if o.f >= was_infnan.f {
o.u |= 255 << 23;
}
o.u |= u32(hu & 0x8000) << 16;
return o.f;
} else {
return 0;
}
}
__f32_to_f16 :: proc(f_: f32) -> f16 #cc_c #no_inline #link_name "__gnu_f2h_ieee" {
when false {
// Source: https://gist.github.com/rygorous/2156668
FP16 :: struct #raw_union {u: u16, f: f16};
FP32 :: struct #raw_union {u: u32, f: f32};
f32infty, f16infty, magic: FP32;
f32infty.u = 255<<23;
f16infty.u = 31<<23;
magic.u = 15<<23;
sign_mask :: u32(0x80000000);
round_mask :: ~u32(0x0fff);
f := transmute(FP32, f_);
o: FP16;
sign := f.u & sign_mask;
f.u ~= sign;
// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code
// (since there's no unsigned PCMPGTD).
if f.u >= f32infty.u { // Inf or NaN (all exponent bits set)
o.u = f.u > f32infty.u ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
} else { // (De)normalized number or zero
f.u &= round_mask;
f.f *= magic.f;
f.u -= round_mask;
if f.u > f16infty.u {
f.u = f16infty.u; // Clamp to signed infinity if overflowed
}
o.u = u16(f.u >> 13); // Take the bits!
}
o.u |= u16(sign >> 16);
return o.f;
} else {
f := transmute(u32, f_);
h: u16;
hs, he, hf: u16;
fs := (f >> 31) & 1;
fe := (f >> 23) & 0b1111_1111;
ff := (f >> 0) & 0b0111_1111_1111_1111_1111_1111;
add_one := false;
if (fe == 0) {
he = 0;
} else if (fe == 255) {
he = 31;
hf = ff != 0 ? 0x200 : 0;
} else {
ne := fe - 127 + 15;
if ne >= 31 {
he = 31;
} else if ne <= 0 {
if (14-ne) <= 24 {
mant := ff | 0x800000;
hf = u16(mant >> (14-ne));
if (mant >> (13-ne)) & 1 != 0 {
add_one = true;
}
}
} else {
he = u16(ne);
hf = u16(ff >> 13);
if ff&0x1000 != 0 {
add_one = true;
}
}
}
hs = u16(hs);
h |= (he&0b0001_1111)<<10;
h |= (hf&0b0011_1111_1111);
if add_one {
h++;
}
h |= (hs&1) << 15;
return transmute(f16, h);
}
}
__f64_to_f16 :: proc(f: f64) -> f16 #cc_c #no_inline #link_name "__truncdfhf2" {
return __f32_to_f16(f32(f));
}
__f16_to_f64 :: proc(f: f16) -> f64 #cc_c #no_inline {
return f64(__f16_to_f32(f));
}
*/