mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-18 20:02:22 -07:00
Add log1p, erf, erfc, ilogb logb (implemented based of FreeBSD's)
This commit is contained in:
+170
-20
@@ -197,22 +197,16 @@ log :: proc{
|
||||
log_f64, log_f64le, log_f64be,
|
||||
}
|
||||
|
||||
log2_f16 :: proc "contextless" (x: f16) -> f16 { return ln(x)/LN2 }
|
||||
log2_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log2_f16(f16(x))) }
|
||||
log2_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log2_f16(f16(x))) }
|
||||
|
||||
log2_f32 :: proc "contextless" (x: f32) -> f32 { return ln(x)/LN2 }
|
||||
log2_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log2_f32(f32(x))) }
|
||||
log2_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log2_f32(f32(x))) }
|
||||
|
||||
log2_f64 :: proc "contextless" (x: f64) -> f64 { return ln(x)/LN2 }
|
||||
log2_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log2_f64(f64(x))) }
|
||||
log2_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log2_f64(f64(x))) }
|
||||
log2 :: proc{
|
||||
log2_f16, log2_f16le, log2_f16be,
|
||||
log2_f32, log2_f32le, log2_f32be,
|
||||
log2_f64, log2_f64le, log2_f64be,
|
||||
}
|
||||
log2_f16 :: logb_f16
|
||||
log2_f16le :: logb_f16le
|
||||
log2_f16be :: logb_f16be
|
||||
log2_f32 :: logb_f32
|
||||
log2_f32le :: logb_f32le
|
||||
log2_f32be :: logb_f32be
|
||||
log2_f64 :: logb_f64
|
||||
log2_f64le :: logb_f64le
|
||||
log2_f64be :: logb_f64be
|
||||
log2 :: logb
|
||||
|
||||
log10_f16 :: proc "contextless" (x: f16) -> f16 { return ln(x)/LN10 }
|
||||
log10_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log10_f16(f16(x))) }
|
||||
@@ -1394,18 +1388,174 @@ tanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
|
||||
return (t - 1) / (t + 1)
|
||||
}
|
||||
|
||||
asinh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
|
||||
return ln(x + sqrt(x*x + 1))
|
||||
asinh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
|
||||
// The original C code, the long comment, and the constants
|
||||
// below are from FreeBSD's /usr/src/lib/msun/src/s_asinh.c
|
||||
// and came with this notice.
|
||||
//
|
||||
// ====================================================
|
||||
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
//
|
||||
// Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
// Permission to use, copy, modify, and distribute this
|
||||
// software is freely granted, provided that this notice
|
||||
// is preserved.
|
||||
// ====================================================
|
||||
|
||||
LN2 :: 0h3FE62E42FEFA39EF
|
||||
NEAR_ZERO :: 1.0 / (1 << 28)
|
||||
LARGE :: 1 << 28
|
||||
|
||||
x := f64(y)
|
||||
|
||||
if is_nan(x) || is_inf(x) {
|
||||
return T(x)
|
||||
}
|
||||
sign := false
|
||||
if x < 0 {
|
||||
x = -x
|
||||
sign = true
|
||||
}
|
||||
temp: f64
|
||||
switch {
|
||||
case x > LARGE:
|
||||
temp = ln(x) + LN2
|
||||
case x > 2:
|
||||
temp = ln(2*x + 1/(sqrt(x*x + 1) + x))
|
||||
case x < NEAR_ZERO:
|
||||
temp = x
|
||||
case:
|
||||
temp = log1p(x + x*x/(1 + sqrt(1 + x*x)))
|
||||
}
|
||||
|
||||
if sign {
|
||||
temp = -temp
|
||||
}
|
||||
return T(temp)
|
||||
}
|
||||
|
||||
acosh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
|
||||
return ln(x + sqrt(x*x - 1))
|
||||
acosh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
|
||||
// The original C code, the long comment, and the constants
|
||||
// below are from FreeBSD's /usr/src/lib/msun/src/e_acosh.c
|
||||
// and came with this notice.
|
||||
//
|
||||
// ====================================================
|
||||
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
//
|
||||
// Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
// Permission to use, copy, modify, and distribute this
|
||||
// software is freely granted, provided that this notice
|
||||
// is preserved.
|
||||
// ====================================================
|
||||
|
||||
LARGE :: 1<<28
|
||||
LN2 :: 0h3FE62E42FEFA39EF
|
||||
x := f64(y)
|
||||
switch {
|
||||
case x < 1 || is_nan(x):
|
||||
return T(nan_f64())
|
||||
case x == 1:
|
||||
return 0
|
||||
case x >= LARGE:
|
||||
return T(ln(x) + LN2)
|
||||
case x > 2:
|
||||
return T(ln(2*x - 1/(x+sqrt(x*x-1))))
|
||||
}
|
||||
t := x-1
|
||||
return T(log1p(t + sqrt(2*t + t*t)))
|
||||
}
|
||||
|
||||
atanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
|
||||
return 0.5*ln((1+x)/(1-x))
|
||||
}
|
||||
|
||||
ilogb_f16 :: proc "contextless" (val: f16) -> int {
|
||||
switch {
|
||||
case val == 0: return int(min(i32))
|
||||
case is_nan(val): return int(max(i32))
|
||||
case is_inf(val): return int(max(i32))
|
||||
}
|
||||
x, exp := normalize_f16(val)
|
||||
return int(((transmute(u16)x)>>F16_SHIFT)&F16_MASK) - F16_BIAS + exp
|
||||
}
|
||||
ilogb_f32 :: proc "contextless" (val: f32) -> int {
|
||||
switch {
|
||||
case val == 0: return int(min(i32))
|
||||
case is_nan(val): return int(max(i32))
|
||||
case is_inf(val): return int(max(i32))
|
||||
}
|
||||
x, exp := normalize_f32(val)
|
||||
return int(((transmute(u32)x)>>F32_SHIFT)&F32_MASK) - F32_BIAS + exp
|
||||
}
|
||||
ilogb_f64 :: proc "contextless" (val: f64) -> int {
|
||||
switch {
|
||||
case val == 0: return int(min(i32))
|
||||
case is_nan(val): return int(max(i32))
|
||||
case is_inf(val): return int(max(i32))
|
||||
}
|
||||
x, exp := normalize_f64(val)
|
||||
return int(((transmute(u64)x)>>F64_SHIFT)&F64_MASK) - F64_BIAS + exp
|
||||
}
|
||||
ilogb_f16le :: proc "contextless" (value: f16le) -> int { return ilogb_f16(f16(value)) }
|
||||
ilogb_f16be :: proc "contextless" (value: f16be) -> int { return ilogb_f16(f16(value)) }
|
||||
ilogb_f32le :: proc "contextless" (value: f32le) -> int { return ilogb_f32(f32(value)) }
|
||||
ilogb_f32be :: proc "contextless" (value: f32be) -> int { return ilogb_f32(f32(value)) }
|
||||
ilogb_f64le :: proc "contextless" (value: f64le) -> int { return ilogb_f64(f64(value)) }
|
||||
ilogb_f64be :: proc "contextless" (value: f64be) -> int { return ilogb_f64(f64(value)) }
|
||||
ilogb :: proc {
|
||||
ilogb_f16,
|
||||
ilogb_f32,
|
||||
ilogb_f64,
|
||||
ilogb_f16le,
|
||||
ilogb_f16be,
|
||||
ilogb_f32le,
|
||||
ilogb_f32be,
|
||||
ilogb_f64le,
|
||||
ilogb_f64be,
|
||||
}
|
||||
|
||||
logb_f16 :: proc "contextless" (val: f16) -> f16 {
|
||||
switch {
|
||||
case val == 0: return inf_f16(-1)
|
||||
case is_inf(val): return inf_f16(+1)
|
||||
case is_nan(val): return val
|
||||
}
|
||||
return f16(ilogb(val))
|
||||
}
|
||||
logb_f32 :: proc "contextless" (val: f32) -> f32 {
|
||||
switch {
|
||||
case val == 0: return inf_f32(-1)
|
||||
case is_inf(val): return inf_f32(+1)
|
||||
case is_nan(val): return val
|
||||
}
|
||||
return f32(ilogb(val))
|
||||
}
|
||||
logb_f64 :: proc "contextless" (val: f64) -> f64 {
|
||||
switch {
|
||||
case val == 0: return inf_f64(-1)
|
||||
case is_inf(val): return inf_f64(+1)
|
||||
case is_nan(val): return val
|
||||
}
|
||||
return f64(ilogb(val))
|
||||
}
|
||||
logb_f16le :: proc "contextless" (value: f16le) -> f16le { return f16le(logb_f16(f16(value))) }
|
||||
logb_f16be :: proc "contextless" (value: f16be) -> f16be { return f16be(logb_f16(f16(value))) }
|
||||
logb_f32le :: proc "contextless" (value: f32le) -> f32le { return f32le(logb_f32(f32(value))) }
|
||||
logb_f32be :: proc "contextless" (value: f32be) -> f32be { return f32be(logb_f32(f32(value))) }
|
||||
logb_f64le :: proc "contextless" (value: f64le) -> f64le { return f64le(logb_f64(f64(value))) }
|
||||
logb_f64be :: proc "contextless" (value: f64be) -> f64be { return f64be(logb_f64(f64(value))) }
|
||||
logb :: proc {
|
||||
logb_f16,
|
||||
logb_f32,
|
||||
logb_f64,
|
||||
logb_f16le,
|
||||
logb_f16be,
|
||||
logb_f32le,
|
||||
logb_f32be,
|
||||
logb_f64le,
|
||||
logb_f64be,
|
||||
}
|
||||
|
||||
F16_DIG :: 3
|
||||
F16_EPSILON :: 0.00097656
|
||||
F16_GUARD :: 0
|
||||
|
||||
@@ -0,0 +1,410 @@
|
||||
package math
|
||||
|
||||
// The original C code and the long comment below are
|
||||
// from FreeBSD's /usr/src/lib/msun/src/s_erf.c and
|
||||
// came with this notice.
|
||||
//
|
||||
// ====================================================
|
||||
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
//
|
||||
// Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
// Permission to use, copy, modify, and distribute this
|
||||
// software is freely granted, provided that this notice
|
||||
// is preserved.
|
||||
// ====================================================
|
||||
//
|
||||
//
|
||||
// double erf(double x)
|
||||
// double erfc(double x)
|
||||
// x
|
||||
// 2 |\
|
||||
// erf(x) = --------- | exp(-t*t)dt
|
||||
// sqrt(pi) \|
|
||||
// 0
|
||||
//
|
||||
// erfc(x) = 1-erf(x)
|
||||
// Note that
|
||||
// erf(-x) = -erf(x)
|
||||
// erfc(-x) = 2 - erfc(x)
|
||||
//
|
||||
// Method:
|
||||
// 1. For |x| in [0, 0.84375]
|
||||
// erf(x) = x + x*R(x**2)
|
||||
// erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
|
||||
// = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
|
||||
// where R = P/Q where P is an odd poly of degree 8 and
|
||||
// Q is an odd poly of degree 10.
|
||||
// -57.90
|
||||
// | R - (erf(x)-x)/x | <= 2
|
||||
//
|
||||
//
|
||||
// Remark. The formula is derived by noting
|
||||
// erf(x) = (2/sqrt(pi))*(x - x**3/3 + x**5/10 - x**7/42 + ....)
|
||||
// and that
|
||||
// 2/sqrt(pi) = 1.128379167095512573896158903121545171688
|
||||
// is close to one. The interval is chosen because the fix
|
||||
// point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
|
||||
// near 0.6174), and by some experiment, 0.84375 is chosen to
|
||||
// guarantee the error is less than one ulp for erf.
|
||||
//
|
||||
// 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
|
||||
// c = 0.84506291151 rounded to single (24 bits)
|
||||
// erf(x) = sign(x) * (c + P1(s)/Q1(s))
|
||||
// erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
|
||||
// 1+(c+P1(s)/Q1(s)) if x < 0
|
||||
// |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
|
||||
// Remark: here we use the taylor series expansion at x=1.
|
||||
// erf(1+s) = erf(1) + s*Poly(s)
|
||||
// = 0.845.. + P1(s)/Q1(s)
|
||||
// That is, we use rational approximation to approximate
|
||||
// erf(1+s) - (c = (single)0.84506291151)
|
||||
// Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
|
||||
// where
|
||||
// P1(s) = degree 6 poly in s
|
||||
// Q1(s) = degree 6 poly in s
|
||||
//
|
||||
// 3. For x in [1.25,1/0.35(~2.857143)],
|
||||
// erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
|
||||
// erf(x) = 1 - erfc(x)
|
||||
// where
|
||||
// R1(z) = degree 7 poly in z, (z=1/x**2)
|
||||
// S1(z) = degree 8 poly in z
|
||||
//
|
||||
// 4. For x in [1/0.35,28]
|
||||
// erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
|
||||
// = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
|
||||
// = 2.0 - tiny (if x <= -6)
|
||||
// erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
|
||||
// erf(x) = sign(x)*(1.0 - tiny)
|
||||
// where
|
||||
// R2(z) = degree 6 poly in z, (z=1/x**2)
|
||||
// S2(z) = degree 7 poly in z
|
||||
//
|
||||
// Note1:
|
||||
// To compute exp(-x*x-0.5625+R/S), let s be a single
|
||||
// precision number and s := x; then
|
||||
// -x*x = -s*s + (s-x)*(s+x)
|
||||
// exp(-x*x-0.5626+R/S) =
|
||||
// exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
|
||||
// Note2:
|
||||
// Here 4 and 5 make use of the asymptotic series
|
||||
// exp(-x*x)
|
||||
// erfc(x) ~ ---------- * ( 1 + Poly(1/x**2) )
|
||||
// x*sqrt(pi)
|
||||
// We use rational approximation to approximate
|
||||
// g(s)=f(1/x**2) = log(erfc(x)*x) - x*x + 0.5625
|
||||
// Here is the error bound for R1/S1 and R2/S2
|
||||
// |R1/S1 - f(x)| < 2**(-62.57)
|
||||
// |R2/S2 - f(x)| < 2**(-61.52)
|
||||
//
|
||||
// 5. For inf > x >= 28
|
||||
// erf(x) = sign(x) *(1 - tiny) (raise inexact)
|
||||
// erfc(x) = tiny*tiny (raise underflow) if x > 0
|
||||
// = 2 - tiny if x<0
|
||||
//
|
||||
// 7. Special case:
|
||||
// erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
|
||||
// erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
|
||||
// erfc/erf(NaN) is NaN
|
||||
|
||||
erf :: proc{
|
||||
erf_f16,
|
||||
erf_f16le,
|
||||
erf_f16be,
|
||||
erf_f32,
|
||||
erf_f32le,
|
||||
erf_f32be,
|
||||
erf_f64,
|
||||
}
|
||||
|
||||
erf_f16 :: proc "contextless" (x: f16) -> f16 { return f16(erf_f64(f64(x))) }
|
||||
erf_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erf_f64(f64(x))) }
|
||||
erf_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erf_f64(f64(x))) }
|
||||
erf_f32 :: proc "contextless" (x: f32) -> f32 { return f32(erf_f64(f64(x))) }
|
||||
erf_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erf_f64(f64(x))) }
|
||||
erf_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erf_f64(f64(x))) }
|
||||
|
||||
erf_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
erx :: 0h3FEB0AC160000000
|
||||
// Coefficients for approximation to erf in [0, 0.84375]
|
||||
efx :: 0h3FC06EBA8214DB69
|
||||
efx8 :: 0h3FF06EBA8214DB69
|
||||
pp0 :: 0h3FC06EBA8214DB68
|
||||
pp1 :: 0hBFD4CD7D691CB913
|
||||
pp2 :: 0hBF9D2A51DBD7194F
|
||||
pp3 :: 0hBF77A291236668E4
|
||||
pp4 :: 0hBEF8EAD6120016AC
|
||||
qq1 :: 0h3FD97779CDDADC09
|
||||
qq2 :: 0h3FB0A54C5536CEBA
|
||||
qq3 :: 0h3F74D022C4D36B0F
|
||||
qq4 :: 0h3F215DC9221C1A10
|
||||
qq5 :: 0hBED09C4342A26120
|
||||
// Coefficients for approximation to erf in [0.84375, 1.25]
|
||||
pa0 :: 0hBF6359B8BEF77538
|
||||
pa1 :: 0h3FDA8D00AD92B34D
|
||||
pa2 :: 0hBFD7D240FBB8C3F1
|
||||
pa3 :: 0h3FD45FCA805120E4
|
||||
pa4 :: 0hBFBC63983D3E28EC
|
||||
pa5 :: 0h3FA22A36599795EB
|
||||
pa6 :: 0hBF61BF380A96073F
|
||||
qa1 :: 0h3FBB3E6618EEE323
|
||||
qa2 :: 0h3FE14AF092EB6F33
|
||||
qa3 :: 0h3FB2635CD99FE9A7
|
||||
qa4 :: 0h3FC02660E763351F
|
||||
qa5 :: 0h3F8BEDC26B51DD1C
|
||||
qa6 :: 0h3F888B545735151D
|
||||
// Coefficients for approximation to erfc in [1.25, 1/0.35]
|
||||
ra0 :: 0hBF843412600D6435
|
||||
ra1 :: 0hBFE63416E4BA7360
|
||||
ra2 :: 0hC0251E0441B0E726
|
||||
ra3 :: 0hC04F300AE4CBA38D
|
||||
ra4 :: 0hC0644CB184282266
|
||||
ra5 :: 0hC067135CEBCCABB2
|
||||
ra6 :: 0hC054526557E4D2F2
|
||||
ra7 :: 0hC023A0EFC69AC25C
|
||||
sa1 :: 0h4033A6B9BD707687
|
||||
sa2 :: 0h4061350C526AE721
|
||||
sa3 :: 0h407B290DD58A1A71
|
||||
sa4 :: 0h40842B1921EC2868
|
||||
sa5 :: 0h407AD02157700314
|
||||
sa6 :: 0h405B28A3EE48AE2C
|
||||
sa7 :: 0h401A47EF8E484A93
|
||||
sa8 :: 0hBFAEEFF2EE749A62
|
||||
// Coefficients for approximation to erfc in [1/.35, 28]
|
||||
rb0 :: 0hBF84341239E86F4A
|
||||
rb1 :: 0hBFE993BA70C285DE
|
||||
rb2 :: 0hC031C209555F995A
|
||||
rb3 :: 0hC064145D43C5ED98
|
||||
rb4 :: 0hC083EC881375F228
|
||||
rb5 :: 0hC09004616A2E5992
|
||||
rb6 :: 0hC07E384E9BDC383F
|
||||
sb1 :: 0h403E568B261D5190
|
||||
sb2 :: 0h40745CAE221B9F0A
|
||||
sb3 :: 0h409802EB189D5118
|
||||
sb4 :: 0h40A8FFB7688C246A
|
||||
sb5 :: 0h40A3F219CEDF3BE6
|
||||
sb6 :: 0h407DA874E79FE763
|
||||
sb7 :: 0hC03670E242712D62
|
||||
|
||||
|
||||
VERY_TINY :: 0h0080000000000000
|
||||
SMALL :: 1.0 / (1 << 28) // 2**-28
|
||||
|
||||
// special cases
|
||||
switch {
|
||||
case is_nan(x):
|
||||
return nan_f64()
|
||||
case is_inf(x, 1):
|
||||
return 1
|
||||
case is_inf(x, -1):
|
||||
return -1
|
||||
}
|
||||
x := x
|
||||
sign := false
|
||||
if x < 0 {
|
||||
x = -x
|
||||
sign = true
|
||||
}
|
||||
if x < 0.84375 { // |x| < 0.84375
|
||||
temp: f64
|
||||
if x < SMALL { // |x| < 2**-28
|
||||
if x < VERY_TINY {
|
||||
temp = 0.125 * (8.0*x + efx8*x) // avoid underflow
|
||||
} else {
|
||||
temp = x + efx*x
|
||||
}
|
||||
} else {
|
||||
z := x * x
|
||||
r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
|
||||
s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
|
||||
y := r / s
|
||||
temp = x + x*y
|
||||
}
|
||||
if sign {
|
||||
return -temp
|
||||
}
|
||||
return temp
|
||||
}
|
||||
if x < 1.25 { // 0.84375 <= |x| < 1.25
|
||||
s := x - 1
|
||||
P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
|
||||
Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
|
||||
if sign {
|
||||
return -erx - P/Q
|
||||
}
|
||||
return erx + P/Q
|
||||
}
|
||||
if x >= 6 { // inf > |x| >= 6
|
||||
if sign {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
s := 1 / (x * x)
|
||||
R, S: f64
|
||||
if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143
|
||||
R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
|
||||
S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
|
||||
} else { // |x| >= 1 / 0.35 ~ 2.857143
|
||||
R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
|
||||
S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
|
||||
}
|
||||
z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x
|
||||
r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S)
|
||||
if sign {
|
||||
return r/x - 1
|
||||
}
|
||||
return 1 - r/x
|
||||
}
|
||||
|
||||
|
||||
erfc :: proc{
|
||||
erfc_f16,
|
||||
erfc_f16le,
|
||||
erfc_f16be,
|
||||
erfc_f32,
|
||||
erfc_f32le,
|
||||
erfc_f32be,
|
||||
erfc_f64,
|
||||
}
|
||||
|
||||
erfc_f16 :: proc "contextless" (x: f16) -> f16 { return f16(erfc_f64(f64(x))) }
|
||||
erfc_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erfc_f64(f64(x))) }
|
||||
erfc_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erfc_f64(f64(x))) }
|
||||
erfc_f32 :: proc "contextless" (x: f32) -> f32 { return f32(erfc_f64(f64(x))) }
|
||||
erfc_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erfc_f64(f64(x))) }
|
||||
erfc_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erfc_f64(f64(x))) }
|
||||
|
||||
erfc_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
erx :: 0h3FEB0AC160000000
|
||||
// Coefficients for approximation to erf in [0, 0.84375]
|
||||
efx :: 0h3FC06EBA8214DB69
|
||||
efx8 :: 0h3FF06EBA8214DB69
|
||||
pp0 :: 0h3FC06EBA8214DB68
|
||||
pp1 :: 0hBFD4CD7D691CB913
|
||||
pp2 :: 0hBF9D2A51DBD7194F
|
||||
pp3 :: 0hBF77A291236668E4
|
||||
pp4 :: 0hBEF8EAD6120016AC
|
||||
qq1 :: 0h3FD97779CDDADC09
|
||||
qq2 :: 0h3FB0A54C5536CEBA
|
||||
qq3 :: 0h3F74D022C4D36B0F
|
||||
qq4 :: 0h3F215DC9221C1A10
|
||||
qq5 :: 0hBED09C4342A26120
|
||||
// Coefficients for approximation to erf in [0.84375, 1.25]
|
||||
pa0 :: 0hBF6359B8BEF77538
|
||||
pa1 :: 0h3FDA8D00AD92B34D
|
||||
pa2 :: 0hBFD7D240FBB8C3F1
|
||||
pa3 :: 0h3FD45FCA805120E4
|
||||
pa4 :: 0hBFBC63983D3E28EC
|
||||
pa5 :: 0h3FA22A36599795EB
|
||||
pa6 :: 0hBF61BF380A96073F
|
||||
qa1 :: 0h3FBB3E6618EEE323
|
||||
qa2 :: 0h3FE14AF092EB6F33
|
||||
qa3 :: 0h3FB2635CD99FE9A7
|
||||
qa4 :: 0h3FC02660E763351F
|
||||
qa5 :: 0h3F8BEDC26B51DD1C
|
||||
qa6 :: 0h3F888B545735151D
|
||||
// Coefficients for approximation to erfc in [1.25, 1/0.35]
|
||||
ra0 :: 0hBF843412600D6435
|
||||
ra1 :: 0hBFE63416E4BA7360
|
||||
ra2 :: 0hC0251E0441B0E726
|
||||
ra3 :: 0hC04F300AE4CBA38D
|
||||
ra4 :: 0hC0644CB184282266
|
||||
ra5 :: 0hC067135CEBCCABB2
|
||||
ra6 :: 0hC054526557E4D2F2
|
||||
ra7 :: 0hC023A0EFC69AC25C
|
||||
sa1 :: 0h4033A6B9BD707687
|
||||
sa2 :: 0h4061350C526AE721
|
||||
sa3 :: 0h407B290DD58A1A71
|
||||
sa4 :: 0h40842B1921EC2868
|
||||
sa5 :: 0h407AD02157700314
|
||||
sa6 :: 0h405B28A3EE48AE2C
|
||||
sa7 :: 0h401A47EF8E484A93
|
||||
sa8 :: 0hBFAEEFF2EE749A62
|
||||
// Coefficients for approximation to erfc in [1/.35, 28]
|
||||
rb0 :: 0hBF84341239E86F4A
|
||||
rb1 :: 0hBFE993BA70C285DE
|
||||
rb2 :: 0hC031C209555F995A
|
||||
rb3 :: 0hC064145D43C5ED98
|
||||
rb4 :: 0hC083EC881375F228
|
||||
rb5 :: 0hC09004616A2E5992
|
||||
rb6 :: 0hC07E384E9BDC383F
|
||||
sb1 :: 0h403E568B261D5190
|
||||
sb2 :: 0h40745CAE221B9F0A
|
||||
sb3 :: 0h409802EB189D5118
|
||||
sb4 :: 0h40A8FFB7688C246A
|
||||
sb5 :: 0h40A3F219CEDF3BE6
|
||||
sb6 :: 0h407DA874E79FE763
|
||||
sb7 :: 0hC03670E242712D62
|
||||
|
||||
TINY :: 1.0 / (1 << 56) // 2**-56
|
||||
// special cases
|
||||
switch {
|
||||
case is_nan(x):
|
||||
return nan_f64()
|
||||
case is_inf(x, 1):
|
||||
return 0
|
||||
case is_inf(x, -1):
|
||||
return 2
|
||||
}
|
||||
x := x
|
||||
sign := false
|
||||
if x < 0 {
|
||||
x = -x
|
||||
sign = true
|
||||
}
|
||||
if x < 0.84375 { // |x| < 0.84375
|
||||
temp: f64
|
||||
if x < TINY { // |x| < 2**-56
|
||||
temp = x
|
||||
} else {
|
||||
z := x * x
|
||||
r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
|
||||
s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
|
||||
y := r / s
|
||||
if x < 0.25 { // |x| < 1/4
|
||||
temp = x + x*y
|
||||
} else {
|
||||
temp = 0.5 + (x*y + (x - 0.5))
|
||||
}
|
||||
}
|
||||
if sign {
|
||||
return 1 + temp
|
||||
}
|
||||
return 1 - temp
|
||||
}
|
||||
if x < 1.25 { // 0.84375 <= |x| < 1.25
|
||||
s := x - 1
|
||||
P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
|
||||
Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
|
||||
if sign {
|
||||
return 1 + erx + P/Q
|
||||
}
|
||||
return 1 - erx - P/Q
|
||||
|
||||
}
|
||||
if x < 28 { // |x| < 28
|
||||
s := 1 / (x * x)
|
||||
R, S: f64
|
||||
if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143
|
||||
R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
|
||||
S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
|
||||
} else { // |x| >= 1 / 0.35 ~ 2.857143
|
||||
if sign && x > 6 {
|
||||
return 2 // x < -6
|
||||
}
|
||||
R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
|
||||
S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
|
||||
}
|
||||
z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x
|
||||
r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S)
|
||||
if sign {
|
||||
return 2 - r/x
|
||||
}
|
||||
return r / x
|
||||
}
|
||||
if sign {
|
||||
return 2
|
||||
}
|
||||
return 0
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
package math
|
||||
|
||||
// The original C code, the long comment, and the constants
|
||||
// below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c
|
||||
// and came with this notice. The go code is a simplified
|
||||
// version of the original C.
|
||||
//
|
||||
// ====================================================
|
||||
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
//
|
||||
// Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
// Permission to use, copy, modify, and distribute this
|
||||
// software is freely granted, provided that this notice
|
||||
// is preserved.
|
||||
// ====================================================
|
||||
//
|
||||
//
|
||||
// double log1p(double x)
|
||||
//
|
||||
// Method :
|
||||
// 1. Argument Reduction: find k and f such that
|
||||
// 1+x = 2**k * (1+f),
|
||||
// where sqrt(2)/2 < 1+f < sqrt(2) .
|
||||
//
|
||||
// Note. If k=0, then f=x is exact. However, if k!=0, then f
|
||||
// may not be representable exactly. In that case, a correction
|
||||
// term is need. Let u=1+x rounded. Let c = (1+x)-u, then
|
||||
// log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
|
||||
// and add back the correction term c/u.
|
||||
// (Note: when x > 2**53, one can simply return log(x))
|
||||
//
|
||||
// 2. Approximation of log1p(f).
|
||||
// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
|
||||
// = 2s + 2/3 s**3 + 2/5 s**5 + .....,
|
||||
// = 2s + s*R
|
||||
// We use a special Reme algorithm on [0,0.1716] to generate
|
||||
// a polynomial of degree 14 to approximate R The maximum error
|
||||
// of this polynomial approximation is bounded by 2**-58.45. In
|
||||
// other words,
|
||||
// 2 4 6 8 10 12 14
|
||||
// R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
|
||||
// (the values of Lp1 to Lp7 are listed in the program)
|
||||
// and
|
||||
// | 2 14 | -58.45
|
||||
// | Lp1*s +...+Lp7*s - R(z) | <= 2
|
||||
// | |
|
||||
// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
|
||||
// In order to guarantee error in log below 1ulp, we compute log
|
||||
// by
|
||||
// log1p(f) = f - (hfsq - s*(hfsq+R)).
|
||||
//
|
||||
// 3. Finally, log1p(x) = k*ln2 + log1p(f).
|
||||
// = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
|
||||
// Here ln2 is split into two floating point number:
|
||||
// ln2_hi + ln2_lo,
|
||||
// where n*ln2_hi is always exact for |n| < 2000.
|
||||
//
|
||||
// Special cases:
|
||||
// log1p(x) is NaN with signal if x < -1 (including -INF) ;
|
||||
// log1p(+INF) is +INF; log1p(-1) is -INF with signal;
|
||||
// log1p(NaN) is that NaN with no signal.
|
||||
//
|
||||
// Accuracy:
|
||||
// according to an error analysis, the error is always less than
|
||||
// 1 ulp (unit in the last place).
|
||||
//
|
||||
// Constants:
|
||||
// The hexadecimal values are the intended ones for the following
|
||||
// constants. The decimal values may be used, provided that the
|
||||
// compiler will convert from decimal to binary accurately enough
|
||||
// to produce the hexadecimal values shown.
|
||||
//
|
||||
// Note: Assuming log() return accurate answer, the following
|
||||
// algorithm can be used to compute log1p(x) to within a few ULP:
|
||||
//
|
||||
// u = 1+x;
|
||||
// if(u==1.0) return x ; else
|
||||
// return log(u)*(x/(u-1.0));
|
||||
//
|
||||
// See HP-15C Advanced Functions Handbook, p.193.
|
||||
|
||||
log1p :: proc {
|
||||
log1p_f16,
|
||||
log1p_f32,
|
||||
log1p_f64,
|
||||
log1p_f16le,
|
||||
log1p_f16be,
|
||||
log1p_f32le,
|
||||
log1p_f32be,
|
||||
log1p_f64le,
|
||||
log1p_f64be,
|
||||
}
|
||||
log1p_f16 :: proc "contextless" (x: f16) -> f16 { return f16(log1p_f64(f64(x))) }
|
||||
log1p_f32 :: proc "contextless" (x: f32) -> f32 { return f32(log1p_f64(f64(x))) }
|
||||
log1p_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log1p_f64(f64(x))) }
|
||||
log1p_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log1p_f64(f64(x))) }
|
||||
log1p_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log1p_f64(f64(x))) }
|
||||
log1p_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log1p_f64(f64(x))) }
|
||||
log1p_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log1p_f64(f64(x))) }
|
||||
log1p_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log1p_f64(f64(x))) }
|
||||
|
||||
log1p_f64 :: proc "contextless" (x: f64) -> f64 {
|
||||
SQRT2_M1 :: 0h3fda827999fcef34 // Sqrt(2)-1
|
||||
SQRT2_HALF_M1 :: 0hbfd2bec333018866 // Sqrt(2)/2-1
|
||||
SMALL :: 0h3e20000000000000 // 2**-29
|
||||
TINY :: 1.0 / (1 << 54) // 2**-54
|
||||
TWO53 :: 1 << 53 // 2**53
|
||||
LN2HI :: 0h3fe62e42fee00000
|
||||
LN2LO :: 0h3dea39ef35793c76
|
||||
LP1 :: 0h3FE5555555555593
|
||||
LP2 :: 0h3FD999999997FA04
|
||||
LP3 :: 0h3FD2492494229359
|
||||
LP4 :: 0h3FCC71C51D8E78AF
|
||||
LP5 :: 0h3FC7466496CB03DE
|
||||
LP6 :: 0h3FC39A09D078C69F
|
||||
LP7 :: 0h3FC2F112DF3E5244
|
||||
|
||||
switch {
|
||||
case x < -1 || is_nan(x):
|
||||
return nan_f64()
|
||||
case x == -1:
|
||||
return inf_f64(-1)
|
||||
case is_inf(x, 1):
|
||||
return inf_f64(+1)
|
||||
}
|
||||
absx := abs(x)
|
||||
|
||||
f: f64
|
||||
iu: u64
|
||||
k := 1
|
||||
if absx < SQRT2_M1 { // |x| < Sqrt(2)-1
|
||||
if absx < SMALL { // |x| < 2**-29
|
||||
if absx < TINY { // |x| < 2**-54
|
||||
return x
|
||||
}
|
||||
return x - x*x*0.5
|
||||
}
|
||||
if x > SQRT2_HALF_M1 { // Sqrt(2)/2-1 < x
|
||||
// (Sqrt(2)/2-1) < x < (Sqrt(2)-1)
|
||||
k = 0
|
||||
f = x
|
||||
iu = 1
|
||||
}
|
||||
}
|
||||
c: f64
|
||||
if k != 0 {
|
||||
u: f64
|
||||
if absx < TWO53 { // 1<<53
|
||||
u = 1.0 + x
|
||||
iu = transmute(u64)u
|
||||
k = int((iu >> 52) - 1023)
|
||||
// correction term
|
||||
if k > 0 {
|
||||
c = 1.0 - (u - x)
|
||||
} else {
|
||||
c = x - (u - 1.0)
|
||||
}
|
||||
c /= u
|
||||
} else {
|
||||
u = x
|
||||
iu = transmute(u64)u
|
||||
k = int((iu >> 52) - 1023)
|
||||
c = 0
|
||||
}
|
||||
iu &= 0x000fffffffffffff
|
||||
if iu < 0x0006a09e667f3bcd { // mantissa of Sqrt(2)
|
||||
u = transmute(f64)(iu | 0x3ff0000000000000) // normalize u
|
||||
} else {
|
||||
k += 1
|
||||
u = transmute(f64)(iu | 0x3fe0000000000000) // normalize u/2
|
||||
iu = (0x0010000000000000 - iu) >> 2
|
||||
}
|
||||
f = u - 1.0 // Sqrt(2)/2 < u < Sqrt(2)
|
||||
}
|
||||
hfsq := 0.5 * f * f
|
||||
s, R, z: f64
|
||||
if iu == 0 { // |f| < 2**-20
|
||||
if f == 0 {
|
||||
if k == 0 {
|
||||
return 0
|
||||
}
|
||||
c += f64(k) * LN2LO
|
||||
return f64(k)*LN2HI + c
|
||||
}
|
||||
R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division
|
||||
if k == 0 {
|
||||
return f - R
|
||||
}
|
||||
return f64(k)*LN2HI - ((R - (f64(k)*LN2LO + c)) - f)
|
||||
}
|
||||
s = f / (2.0 + f)
|
||||
z = s * s
|
||||
R = z * (LP1 + z*(LP2+z*(LP3+z*(LP4+z*(LP5+z*(LP6+z*LP7))))))
|
||||
if k == 0 {
|
||||
return f - (hfsq - s*(hfsq+R))
|
||||
}
|
||||
return f64(k)*LN2HI - ((hfsq - (s*(hfsq+R) + (f64(k)*LN2LO + c))) - f)
|
||||
}
|
||||
Reference in New Issue
Block a user