From eb8b0d7a03ab2fb3a066c9135c429b97e7bad346 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 16 Nov 2021 14:26:43 +0000 Subject: [PATCH] Add `log1p`, `erf`, `erfc`, `ilogb` `logb` (implemented based of FreeBSD's) --- core/math/math.odin | 190 ++++++++++++++++-- core/math/math_erf.odin | 410 ++++++++++++++++++++++++++++++++++++++ core/math/math_log1p.odin | 198 ++++++++++++++++++ 3 files changed, 778 insertions(+), 20 deletions(-) create mode 100644 core/math/math_erf.odin create mode 100644 core/math/math_log1p.odin diff --git a/core/math/math.odin b/core/math/math.odin index f966ed11f..97fd4bd16 100644 --- a/core/math/math.odin +++ b/core/math/math.odin @@ -197,22 +197,16 @@ log :: proc{ log_f64, log_f64le, log_f64be, } -log2_f16 :: proc "contextless" (x: f16) -> f16 { return ln(x)/LN2 } -log2_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log2_f16(f16(x))) } -log2_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log2_f16(f16(x))) } - -log2_f32 :: proc "contextless" (x: f32) -> f32 { return ln(x)/LN2 } -log2_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log2_f32(f32(x))) } -log2_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log2_f32(f32(x))) } - -log2_f64 :: proc "contextless" (x: f64) -> f64 { return ln(x)/LN2 } -log2_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log2_f64(f64(x))) } -log2_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log2_f64(f64(x))) } -log2 :: proc{ - log2_f16, log2_f16le, log2_f16be, - log2_f32, log2_f32le, log2_f32be, - log2_f64, log2_f64le, log2_f64be, -} +log2_f16 :: logb_f16 +log2_f16le :: logb_f16le +log2_f16be :: logb_f16be +log2_f32 :: logb_f32 +log2_f32le :: logb_f32le +log2_f32be :: logb_f32be +log2_f64 :: logb_f64 +log2_f64le :: logb_f64le +log2_f64be :: logb_f64be +log2 :: logb log10_f16 :: proc "contextless" (x: f16) -> f16 { return ln(x)/LN10 } log10_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log10_f16(f16(x))) } @@ -1394,18 +1388,174 @@ tanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) { return (t - 1) / (t + 1) } -asinh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) { - return ln(x + sqrt(x*x + 1)) +asinh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) { + // The original C code, the long comment, and the constants + // below are from FreeBSD's /usr/src/lib/msun/src/s_asinh.c + // and came with this notice. + // + // ==================================================== + // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + // + // Developed at SunPro, a Sun Microsystems, Inc. business. + // Permission to use, copy, modify, and distribute this + // software is freely granted, provided that this notice + // is preserved. + // ==================================================== + + LN2 :: 0h3FE62E42FEFA39EF + NEAR_ZERO :: 1.0 / (1 << 28) + LARGE :: 1 << 28 + + x := f64(y) + + if is_nan(x) || is_inf(x) { + return T(x) + } + sign := false + if x < 0 { + x = -x + sign = true + } + temp: f64 + switch { + case x > LARGE: + temp = ln(x) + LN2 + case x > 2: + temp = ln(2*x + 1/(sqrt(x*x + 1) + x)) + case x < NEAR_ZERO: + temp = x + case: + temp = log1p(x + x*x/(1 + sqrt(1 + x*x))) + } + + if sign { + temp = -temp + } + return T(temp) } -acosh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) { - return ln(x + sqrt(x*x - 1)) +acosh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) { + // The original C code, the long comment, and the constants + // below are from FreeBSD's /usr/src/lib/msun/src/e_acosh.c + // and came with this notice. + // + // ==================================================== + // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + // + // Developed at SunPro, a Sun Microsystems, Inc. business. + // Permission to use, copy, modify, and distribute this + // software is freely granted, provided that this notice + // is preserved. + // ==================================================== + + LARGE :: 1<<28 + LN2 :: 0h3FE62E42FEFA39EF + x := f64(y) + switch { + case x < 1 || is_nan(x): + return T(nan_f64()) + case x == 1: + return 0 + case x >= LARGE: + return T(ln(x) + LN2) + case x > 2: + return T(ln(2*x - 1/(x+sqrt(x*x-1)))) + } + t := x-1 + return T(log1p(t + sqrt(2*t + t*t))) } atanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) { return 0.5*ln((1+x)/(1-x)) } +ilogb_f16 :: proc "contextless" (val: f16) -> int { + switch { + case val == 0: return int(min(i32)) + case is_nan(val): return int(max(i32)) + case is_inf(val): return int(max(i32)) + } + x, exp := normalize_f16(val) + return int(((transmute(u16)x)>>F16_SHIFT)&F16_MASK) - F16_BIAS + exp +} +ilogb_f32 :: proc "contextless" (val: f32) -> int { + switch { + case val == 0: return int(min(i32)) + case is_nan(val): return int(max(i32)) + case is_inf(val): return int(max(i32)) + } + x, exp := normalize_f32(val) + return int(((transmute(u32)x)>>F32_SHIFT)&F32_MASK) - F32_BIAS + exp +} +ilogb_f64 :: proc "contextless" (val: f64) -> int { + switch { + case val == 0: return int(min(i32)) + case is_nan(val): return int(max(i32)) + case is_inf(val): return int(max(i32)) + } + x, exp := normalize_f64(val) + return int(((transmute(u64)x)>>F64_SHIFT)&F64_MASK) - F64_BIAS + exp +} +ilogb_f16le :: proc "contextless" (value: f16le) -> int { return ilogb_f16(f16(value)) } +ilogb_f16be :: proc "contextless" (value: f16be) -> int { return ilogb_f16(f16(value)) } +ilogb_f32le :: proc "contextless" (value: f32le) -> int { return ilogb_f32(f32(value)) } +ilogb_f32be :: proc "contextless" (value: f32be) -> int { return ilogb_f32(f32(value)) } +ilogb_f64le :: proc "contextless" (value: f64le) -> int { return ilogb_f64(f64(value)) } +ilogb_f64be :: proc "contextless" (value: f64be) -> int { return ilogb_f64(f64(value)) } +ilogb :: proc { + ilogb_f16, + ilogb_f32, + ilogb_f64, + ilogb_f16le, + ilogb_f16be, + ilogb_f32le, + ilogb_f32be, + ilogb_f64le, + ilogb_f64be, +} + +logb_f16 :: proc "contextless" (val: f16) -> f16 { + switch { + case val == 0: return inf_f16(-1) + case is_inf(val): return inf_f16(+1) + case is_nan(val): return val + } + return f16(ilogb(val)) +} +logb_f32 :: proc "contextless" (val: f32) -> f32 { + switch { + case val == 0: return inf_f32(-1) + case is_inf(val): return inf_f32(+1) + case is_nan(val): return val + } + return f32(ilogb(val)) +} +logb_f64 :: proc "contextless" (val: f64) -> f64 { + switch { + case val == 0: return inf_f64(-1) + case is_inf(val): return inf_f64(+1) + case is_nan(val): return val + } + return f64(ilogb(val)) +} +logb_f16le :: proc "contextless" (value: f16le) -> f16le { return f16le(logb_f16(f16(value))) } +logb_f16be :: proc "contextless" (value: f16be) -> f16be { return f16be(logb_f16(f16(value))) } +logb_f32le :: proc "contextless" (value: f32le) -> f32le { return f32le(logb_f32(f32(value))) } +logb_f32be :: proc "contextless" (value: f32be) -> f32be { return f32be(logb_f32(f32(value))) } +logb_f64le :: proc "contextless" (value: f64le) -> f64le { return f64le(logb_f64(f64(value))) } +logb_f64be :: proc "contextless" (value: f64be) -> f64be { return f64be(logb_f64(f64(value))) } +logb :: proc { + logb_f16, + logb_f32, + logb_f64, + logb_f16le, + logb_f16be, + logb_f32le, + logb_f32be, + logb_f64le, + logb_f64be, +} + F16_DIG :: 3 F16_EPSILON :: 0.00097656 F16_GUARD :: 0 diff --git a/core/math/math_erf.odin b/core/math/math_erf.odin new file mode 100644 index 000000000..cdade59c5 --- /dev/null +++ b/core/math/math_erf.odin @@ -0,0 +1,410 @@ +package math + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/s_erf.c and +// came with this notice. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// double erf(double x) +// double erfc(double x) +// x +// 2 |\ +// erf(x) = --------- | exp(-t*t)dt +// sqrt(pi) \| +// 0 +// +// erfc(x) = 1-erf(x) +// Note that +// erf(-x) = -erf(x) +// erfc(-x) = 2 - erfc(x) +// +// Method: +// 1. For |x| in [0, 0.84375] +// erf(x) = x + x*R(x**2) +// erfc(x) = 1 - erf(x) if x in [-.84375,0.25] +// = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] +// where R = P/Q where P is an odd poly of degree 8 and +// Q is an odd poly of degree 10. +// -57.90 +// | R - (erf(x)-x)/x | <= 2 +// +// +// Remark. The formula is derived by noting +// erf(x) = (2/sqrt(pi))*(x - x**3/3 + x**5/10 - x**7/42 + ....) +// and that +// 2/sqrt(pi) = 1.128379167095512573896158903121545171688 +// is close to one. The interval is chosen because the fix +// point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is +// near 0.6174), and by some experiment, 0.84375 is chosen to +// guarantee the error is less than one ulp for erf. +// +// 2. For |x| in [0.84375,1.25], let s = |x| - 1, and +// c = 0.84506291151 rounded to single (24 bits) +// erf(x) = sign(x) * (c + P1(s)/Q1(s)) +// erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 +// 1+(c+P1(s)/Q1(s)) if x < 0 +// |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 +// Remark: here we use the taylor series expansion at x=1. +// erf(1+s) = erf(1) + s*Poly(s) +// = 0.845.. + P1(s)/Q1(s) +// That is, we use rational approximation to approximate +// erf(1+s) - (c = (single)0.84506291151) +// Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] +// where +// P1(s) = degree 6 poly in s +// Q1(s) = degree 6 poly in s +// +// 3. For x in [1.25,1/0.35(~2.857143)], +// erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) +// erf(x) = 1 - erfc(x) +// where +// R1(z) = degree 7 poly in z, (z=1/x**2) +// S1(z) = degree 8 poly in z +// +// 4. For x in [1/0.35,28] +// erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 +// = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 +// erf(x) = sign(x) *(1 - tiny) (raise inexact) +// erfc(x) = tiny*tiny (raise underflow) if x > 0 +// = 2 - tiny if x<0 +// +// 7. Special case: +// erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, +// erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, +// erfc/erf(NaN) is NaN + +erf :: proc{ + erf_f16, + erf_f16le, + erf_f16be, + erf_f32, + erf_f32le, + erf_f32be, + erf_f64, +} + +erf_f16 :: proc "contextless" (x: f16) -> f16 { return f16(erf_f64(f64(x))) } +erf_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erf_f64(f64(x))) } +erf_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erf_f64(f64(x))) } +erf_f32 :: proc "contextless" (x: f32) -> f32 { return f32(erf_f64(f64(x))) } +erf_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erf_f64(f64(x))) } +erf_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erf_f64(f64(x))) } + +erf_f64 :: proc "contextless" (x: f64) -> f64 { + erx :: 0h3FEB0AC160000000 + // Coefficients for approximation to erf in [0, 0.84375] + efx :: 0h3FC06EBA8214DB69 + efx8 :: 0h3FF06EBA8214DB69 + pp0 :: 0h3FC06EBA8214DB68 + pp1 :: 0hBFD4CD7D691CB913 + pp2 :: 0hBF9D2A51DBD7194F + pp3 :: 0hBF77A291236668E4 + pp4 :: 0hBEF8EAD6120016AC + qq1 :: 0h3FD97779CDDADC09 + qq2 :: 0h3FB0A54C5536CEBA + qq3 :: 0h3F74D022C4D36B0F + qq4 :: 0h3F215DC9221C1A10 + qq5 :: 0hBED09C4342A26120 + // Coefficients for approximation to erf in [0.84375, 1.25] + pa0 :: 0hBF6359B8BEF77538 + pa1 :: 0h3FDA8D00AD92B34D + pa2 :: 0hBFD7D240FBB8C3F1 + pa3 :: 0h3FD45FCA805120E4 + pa4 :: 0hBFBC63983D3E28EC + pa5 :: 0h3FA22A36599795EB + pa6 :: 0hBF61BF380A96073F + qa1 :: 0h3FBB3E6618EEE323 + qa2 :: 0h3FE14AF092EB6F33 + qa3 :: 0h3FB2635CD99FE9A7 + qa4 :: 0h3FC02660E763351F + qa5 :: 0h3F8BEDC26B51DD1C + qa6 :: 0h3F888B545735151D + // Coefficients for approximation to erfc in [1.25, 1/0.35] + ra0 :: 0hBF843412600D6435 + ra1 :: 0hBFE63416E4BA7360 + ra2 :: 0hC0251E0441B0E726 + ra3 :: 0hC04F300AE4CBA38D + ra4 :: 0hC0644CB184282266 + ra5 :: 0hC067135CEBCCABB2 + ra6 :: 0hC054526557E4D2F2 + ra7 :: 0hC023A0EFC69AC25C + sa1 :: 0h4033A6B9BD707687 + sa2 :: 0h4061350C526AE721 + sa3 :: 0h407B290DD58A1A71 + sa4 :: 0h40842B1921EC2868 + sa5 :: 0h407AD02157700314 + sa6 :: 0h405B28A3EE48AE2C + sa7 :: 0h401A47EF8E484A93 + sa8 :: 0hBFAEEFF2EE749A62 + // Coefficients for approximation to erfc in [1/.35, 28] + rb0 :: 0hBF84341239E86F4A + rb1 :: 0hBFE993BA70C285DE + rb2 :: 0hC031C209555F995A + rb3 :: 0hC064145D43C5ED98 + rb4 :: 0hC083EC881375F228 + rb5 :: 0hC09004616A2E5992 + rb6 :: 0hC07E384E9BDC383F + sb1 :: 0h403E568B261D5190 + sb2 :: 0h40745CAE221B9F0A + sb3 :: 0h409802EB189D5118 + sb4 :: 0h40A8FFB7688C246A + sb5 :: 0h40A3F219CEDF3BE6 + sb6 :: 0h407DA874E79FE763 + sb7 :: 0hC03670E242712D62 + + + VERY_TINY :: 0h0080000000000000 + SMALL :: 1.0 / (1 << 28) // 2**-28 + + // special cases + switch { + case is_nan(x): + return nan_f64() + case is_inf(x, 1): + return 1 + case is_inf(x, -1): + return -1 + } + x := x + sign := false + if x < 0 { + x = -x + sign = true + } + if x < 0.84375 { // |x| < 0.84375 + temp: f64 + if x < SMALL { // |x| < 2**-28 + if x < VERY_TINY { + temp = 0.125 * (8.0*x + efx8*x) // avoid underflow + } else { + temp = x + efx*x + } + } else { + z := x * x + r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4))) + s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + y := r / s + temp = x + x*y + } + if sign { + return -temp + } + return temp + } + if x < 1.25 { // 0.84375 <= |x| < 1.25 + s := x - 1 + P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))) + Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))) + if sign { + return -erx - P/Q + } + return erx + P/Q + } + if x >= 6 { // inf > |x| >= 6 + if sign { + return -1 + } + return 1 + } + s := 1 / (x * x) + R, S: f64 + if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143 + R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7)))))) + S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8))))))) + } else { // |x| >= 1 / 0.35 ~ 2.857143 + R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6))))) + S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7)))))) + } + z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x + r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S) + if sign { + return r/x - 1 + } + return 1 - r/x +} + + +erfc :: proc{ + erfc_f16, + erfc_f16le, + erfc_f16be, + erfc_f32, + erfc_f32le, + erfc_f32be, + erfc_f64, +} + +erfc_f16 :: proc "contextless" (x: f16) -> f16 { return f16(erfc_f64(f64(x))) } +erfc_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erfc_f64(f64(x))) } +erfc_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erfc_f64(f64(x))) } +erfc_f32 :: proc "contextless" (x: f32) -> f32 { return f32(erfc_f64(f64(x))) } +erfc_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erfc_f64(f64(x))) } +erfc_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erfc_f64(f64(x))) } + +erfc_f64 :: proc "contextless" (x: f64) -> f64 { + erx :: 0h3FEB0AC160000000 + // Coefficients for approximation to erf in [0, 0.84375] + efx :: 0h3FC06EBA8214DB69 + efx8 :: 0h3FF06EBA8214DB69 + pp0 :: 0h3FC06EBA8214DB68 + pp1 :: 0hBFD4CD7D691CB913 + pp2 :: 0hBF9D2A51DBD7194F + pp3 :: 0hBF77A291236668E4 + pp4 :: 0hBEF8EAD6120016AC + qq1 :: 0h3FD97779CDDADC09 + qq2 :: 0h3FB0A54C5536CEBA + qq3 :: 0h3F74D022C4D36B0F + qq4 :: 0h3F215DC9221C1A10 + qq5 :: 0hBED09C4342A26120 + // Coefficients for approximation to erf in [0.84375, 1.25] + pa0 :: 0hBF6359B8BEF77538 + pa1 :: 0h3FDA8D00AD92B34D + pa2 :: 0hBFD7D240FBB8C3F1 + pa3 :: 0h3FD45FCA805120E4 + pa4 :: 0hBFBC63983D3E28EC + pa5 :: 0h3FA22A36599795EB + pa6 :: 0hBF61BF380A96073F + qa1 :: 0h3FBB3E6618EEE323 + qa2 :: 0h3FE14AF092EB6F33 + qa3 :: 0h3FB2635CD99FE9A7 + qa4 :: 0h3FC02660E763351F + qa5 :: 0h3F8BEDC26B51DD1C + qa6 :: 0h3F888B545735151D + // Coefficients for approximation to erfc in [1.25, 1/0.35] + ra0 :: 0hBF843412600D6435 + ra1 :: 0hBFE63416E4BA7360 + ra2 :: 0hC0251E0441B0E726 + ra3 :: 0hC04F300AE4CBA38D + ra4 :: 0hC0644CB184282266 + ra5 :: 0hC067135CEBCCABB2 + ra6 :: 0hC054526557E4D2F2 + ra7 :: 0hC023A0EFC69AC25C + sa1 :: 0h4033A6B9BD707687 + sa2 :: 0h4061350C526AE721 + sa3 :: 0h407B290DD58A1A71 + sa4 :: 0h40842B1921EC2868 + sa5 :: 0h407AD02157700314 + sa6 :: 0h405B28A3EE48AE2C + sa7 :: 0h401A47EF8E484A93 + sa8 :: 0hBFAEEFF2EE749A62 + // Coefficients for approximation to erfc in [1/.35, 28] + rb0 :: 0hBF84341239E86F4A + rb1 :: 0hBFE993BA70C285DE + rb2 :: 0hC031C209555F995A + rb3 :: 0hC064145D43C5ED98 + rb4 :: 0hC083EC881375F228 + rb5 :: 0hC09004616A2E5992 + rb6 :: 0hC07E384E9BDC383F + sb1 :: 0h403E568B261D5190 + sb2 :: 0h40745CAE221B9F0A + sb3 :: 0h409802EB189D5118 + sb4 :: 0h40A8FFB7688C246A + sb5 :: 0h40A3F219CEDF3BE6 + sb6 :: 0h407DA874E79FE763 + sb7 :: 0hC03670E242712D62 + + TINY :: 1.0 / (1 << 56) // 2**-56 + // special cases + switch { + case is_nan(x): + return nan_f64() + case is_inf(x, 1): + return 0 + case is_inf(x, -1): + return 2 + } + x := x + sign := false + if x < 0 { + x = -x + sign = true + } + if x < 0.84375 { // |x| < 0.84375 + temp: f64 + if x < TINY { // |x| < 2**-56 + temp = x + } else { + z := x * x + r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4))) + s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + y := r / s + if x < 0.25 { // |x| < 1/4 + temp = x + x*y + } else { + temp = 0.5 + (x*y + (x - 0.5)) + } + } + if sign { + return 1 + temp + } + return 1 - temp + } + if x < 1.25 { // 0.84375 <= |x| < 1.25 + s := x - 1 + P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))) + Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))) + if sign { + return 1 + erx + P/Q + } + return 1 - erx - P/Q + + } + if x < 28 { // |x| < 28 + s := 1 / (x * x) + R, S: f64 + if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143 + R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7)))))) + S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8))))))) + } else { // |x| >= 1 / 0.35 ~ 2.857143 + if sign && x > 6 { + return 2 // x < -6 + } + R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6))))) + S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7)))))) + } + z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x + r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S) + if sign { + return 2 - r/x + } + return r / x + } + if sign { + return 2 + } + return 0 +} \ No newline at end of file diff --git a/core/math/math_log1p.odin b/core/math/math_log1p.odin new file mode 100644 index 000000000..07e790666 --- /dev/null +++ b/core/math/math_log1p.odin @@ -0,0 +1,198 @@ +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// double log1p(double x) +// +// Method : +// 1. Argument Reduction: find k and f such that +// 1+x = 2**k * (1+f), +// where sqrt(2)/2 < 1+f < sqrt(2) . +// +// Note. If k=0, then f=x is exact. However, if k!=0, then f +// may not be representable exactly. In that case, a correction +// term is need. Let u=1+x rounded. Let c = (1+x)-u, then +// log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), +// and add back the correction term c/u. +// (Note: when x > 2**53, one can simply return log(x)) +// +// 2. Approximation of log1p(f). +// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) +// = 2s + 2/3 s**3 + 2/5 s**5 + ....., +// = 2s + s*R +// We use a special Reme algorithm on [0,0.1716] to generate +// a polynomial of degree 14 to approximate R The maximum error +// of this polynomial approximation is bounded by 2**-58.45. In +// other words, +// 2 4 6 8 10 12 14 +// R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s +// (the values of Lp1 to Lp7 are listed in the program) +// and +// | 2 14 | -58.45 +// | Lp1*s +...+Lp7*s - R(z) | <= 2 +// | | +// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. +// In order to guarantee error in log below 1ulp, we compute log +// by +// log1p(f) = f - (hfsq - s*(hfsq+R)). +// +// 3. Finally, log1p(x) = k*ln2 + log1p(f). +// = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) +// Here ln2 is split into two floating point number: +// ln2_hi + ln2_lo, +// where n*ln2_hi is always exact for |n| < 2000. +// +// Special cases: +// log1p(x) is NaN with signal if x < -1 (including -INF) ; +// log1p(+INF) is +INF; log1p(-1) is -INF with signal; +// log1p(NaN) is that NaN with no signal. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// +// Note: Assuming log() return accurate answer, the following +// algorithm can be used to compute log1p(x) to within a few ULP: +// +// u = 1+x; +// if(u==1.0) return x ; else +// return log(u)*(x/(u-1.0)); +// +// See HP-15C Advanced Functions Handbook, p.193. + +log1p :: proc { + log1p_f16, + log1p_f32, + log1p_f64, + log1p_f16le, + log1p_f16be, + log1p_f32le, + log1p_f32be, + log1p_f64le, + log1p_f64be, +} +log1p_f16 :: proc "contextless" (x: f16) -> f16 { return f16(log1p_f64(f64(x))) } +log1p_f32 :: proc "contextless" (x: f32) -> f32 { return f32(log1p_f64(f64(x))) } +log1p_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log1p_f64(f64(x))) } +log1p_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log1p_f64(f64(x))) } +log1p_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log1p_f64(f64(x))) } +log1p_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log1p_f64(f64(x))) } +log1p_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log1p_f64(f64(x))) } +log1p_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log1p_f64(f64(x))) } + +log1p_f64 :: proc "contextless" (x: f64) -> f64 { + SQRT2_M1 :: 0h3fda827999fcef34 // Sqrt(2)-1 + SQRT2_HALF_M1 :: 0hbfd2bec333018866 // Sqrt(2)/2-1 + SMALL :: 0h3e20000000000000 // 2**-29 + TINY :: 1.0 / (1 << 54) // 2**-54 + TWO53 :: 1 << 53 // 2**53 + LN2HI :: 0h3fe62e42fee00000 + LN2LO :: 0h3dea39ef35793c76 + LP1 :: 0h3FE5555555555593 + LP2 :: 0h3FD999999997FA04 + LP3 :: 0h3FD2492494229359 + LP4 :: 0h3FCC71C51D8E78AF + LP5 :: 0h3FC7466496CB03DE + LP6 :: 0h3FC39A09D078C69F + LP7 :: 0h3FC2F112DF3E5244 + + switch { + case x < -1 || is_nan(x): + return nan_f64() + case x == -1: + return inf_f64(-1) + case is_inf(x, 1): + return inf_f64(+1) + } + absx := abs(x) + + f: f64 + iu: u64 + k := 1 + if absx < SQRT2_M1 { // |x| < Sqrt(2)-1 + if absx < SMALL { // |x| < 2**-29 + if absx < TINY { // |x| < 2**-54 + return x + } + return x - x*x*0.5 + } + if x > SQRT2_HALF_M1 { // Sqrt(2)/2-1 < x + // (Sqrt(2)/2-1) < x < (Sqrt(2)-1) + k = 0 + f = x + iu = 1 + } + } + c: f64 + if k != 0 { + u: f64 + if absx < TWO53 { // 1<<53 + u = 1.0 + x + iu = transmute(u64)u + k = int((iu >> 52) - 1023) + // correction term + if k > 0 { + c = 1.0 - (u - x) + } else { + c = x - (u - 1.0) + } + c /= u + } else { + u = x + iu = transmute(u64)u + k = int((iu >> 52) - 1023) + c = 0 + } + iu &= 0x000fffffffffffff + if iu < 0x0006a09e667f3bcd { // mantissa of Sqrt(2) + u = transmute(f64)(iu | 0x3ff0000000000000) // normalize u + } else { + k += 1 + u = transmute(f64)(iu | 0x3fe0000000000000) // normalize u/2 + iu = (0x0010000000000000 - iu) >> 2 + } + f = u - 1.0 // Sqrt(2)/2 < u < Sqrt(2) + } + hfsq := 0.5 * f * f + s, R, z: f64 + if iu == 0 { // |f| < 2**-20 + if f == 0 { + if k == 0 { + return 0 + } + c += f64(k) * LN2LO + return f64(k)*LN2HI + c + } + R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division + if k == 0 { + return f - R + } + return f64(k)*LN2HI - ((R - (f64(k)*LN2LO + c)) - f) + } + s = f / (2.0 + f) + z = s * s + R = z * (LP1 + z*(LP2+z*(LP3+z*(LP4+z*(LP5+z*(LP6+z*LP7)))))) + if k == 0 { + return f - (hfsq - s*(hfsq+R)) + } + return f64(k)*LN2HI - ((hfsq - (s*(hfsq+R) + (f64(k)*LN2LO + c))) - f) +}