Add log1p, erf, erfc, ilogb logb (implemented based of FreeBSD's)

This commit is contained in:
gingerBill
2021-11-16 14:26:43 +00:00
parent 880af47ae7
commit eb8b0d7a03
3 changed files with 778 additions and 20 deletions
+170 -20
View File
@@ -197,22 +197,16 @@ log :: proc{
log_f64, log_f64le, log_f64be,
}
log2_f16 :: proc "contextless" (x: f16) -> f16 { return ln(x)/LN2 }
log2_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log2_f16(f16(x))) }
log2_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log2_f16(f16(x))) }
log2_f32 :: proc "contextless" (x: f32) -> f32 { return ln(x)/LN2 }
log2_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log2_f32(f32(x))) }
log2_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log2_f32(f32(x))) }
log2_f64 :: proc "contextless" (x: f64) -> f64 { return ln(x)/LN2 }
log2_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log2_f64(f64(x))) }
log2_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log2_f64(f64(x))) }
log2 :: proc{
log2_f16, log2_f16le, log2_f16be,
log2_f32, log2_f32le, log2_f32be,
log2_f64, log2_f64le, log2_f64be,
}
log2_f16 :: logb_f16
log2_f16le :: logb_f16le
log2_f16be :: logb_f16be
log2_f32 :: logb_f32
log2_f32le :: logb_f32le
log2_f32be :: logb_f32be
log2_f64 :: logb_f64
log2_f64le :: logb_f64le
log2_f64be :: logb_f64be
log2 :: logb
log10_f16 :: proc "contextless" (x: f16) -> f16 { return ln(x)/LN10 }
log10_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log10_f16(f16(x))) }
@@ -1394,18 +1388,174 @@ tanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
return (t - 1) / (t + 1)
}
asinh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
return ln(x + sqrt(x*x + 1))
asinh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
// The original C code, the long comment, and the constants
// below are from FreeBSD's /usr/src/lib/msun/src/s_asinh.c
// and came with this notice.
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
LN2 :: 0h3FE62E42FEFA39EF
NEAR_ZERO :: 1.0 / (1 << 28)
LARGE :: 1 << 28
x := f64(y)
if is_nan(x) || is_inf(x) {
return T(x)
}
sign := false
if x < 0 {
x = -x
sign = true
}
temp: f64
switch {
case x > LARGE:
temp = ln(x) + LN2
case x > 2:
temp = ln(2*x + 1/(sqrt(x*x + 1) + x))
case x < NEAR_ZERO:
temp = x
case:
temp = log1p(x + x*x/(1 + sqrt(1 + x*x)))
}
if sign {
temp = -temp
}
return T(temp)
}
acosh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
return ln(x + sqrt(x*x - 1))
acosh :: proc "contextless" (y: $T) -> T where intrinsics.type_is_float(T) {
// The original C code, the long comment, and the constants
// below are from FreeBSD's /usr/src/lib/msun/src/e_acosh.c
// and came with this notice.
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
LARGE :: 1<<28
LN2 :: 0h3FE62E42FEFA39EF
x := f64(y)
switch {
case x < 1 || is_nan(x):
return T(nan_f64())
case x == 1:
return 0
case x >= LARGE:
return T(ln(x) + LN2)
case x > 2:
return T(ln(2*x - 1/(x+sqrt(x*x-1))))
}
t := x-1
return T(log1p(t + sqrt(2*t + t*t)))
}
atanh :: proc "contextless" (x: $T) -> T where intrinsics.type_is_float(T) {
return 0.5*ln((1+x)/(1-x))
}
ilogb_f16 :: proc "contextless" (val: f16) -> int {
switch {
case val == 0: return int(min(i32))
case is_nan(val): return int(max(i32))
case is_inf(val): return int(max(i32))
}
x, exp := normalize_f16(val)
return int(((transmute(u16)x)>>F16_SHIFT)&F16_MASK) - F16_BIAS + exp
}
ilogb_f32 :: proc "contextless" (val: f32) -> int {
switch {
case val == 0: return int(min(i32))
case is_nan(val): return int(max(i32))
case is_inf(val): return int(max(i32))
}
x, exp := normalize_f32(val)
return int(((transmute(u32)x)>>F32_SHIFT)&F32_MASK) - F32_BIAS + exp
}
ilogb_f64 :: proc "contextless" (val: f64) -> int {
switch {
case val == 0: return int(min(i32))
case is_nan(val): return int(max(i32))
case is_inf(val): return int(max(i32))
}
x, exp := normalize_f64(val)
return int(((transmute(u64)x)>>F64_SHIFT)&F64_MASK) - F64_BIAS + exp
}
ilogb_f16le :: proc "contextless" (value: f16le) -> int { return ilogb_f16(f16(value)) }
ilogb_f16be :: proc "contextless" (value: f16be) -> int { return ilogb_f16(f16(value)) }
ilogb_f32le :: proc "contextless" (value: f32le) -> int { return ilogb_f32(f32(value)) }
ilogb_f32be :: proc "contextless" (value: f32be) -> int { return ilogb_f32(f32(value)) }
ilogb_f64le :: proc "contextless" (value: f64le) -> int { return ilogb_f64(f64(value)) }
ilogb_f64be :: proc "contextless" (value: f64be) -> int { return ilogb_f64(f64(value)) }
ilogb :: proc {
ilogb_f16,
ilogb_f32,
ilogb_f64,
ilogb_f16le,
ilogb_f16be,
ilogb_f32le,
ilogb_f32be,
ilogb_f64le,
ilogb_f64be,
}
logb_f16 :: proc "contextless" (val: f16) -> f16 {
switch {
case val == 0: return inf_f16(-1)
case is_inf(val): return inf_f16(+1)
case is_nan(val): return val
}
return f16(ilogb(val))
}
logb_f32 :: proc "contextless" (val: f32) -> f32 {
switch {
case val == 0: return inf_f32(-1)
case is_inf(val): return inf_f32(+1)
case is_nan(val): return val
}
return f32(ilogb(val))
}
logb_f64 :: proc "contextless" (val: f64) -> f64 {
switch {
case val == 0: return inf_f64(-1)
case is_inf(val): return inf_f64(+1)
case is_nan(val): return val
}
return f64(ilogb(val))
}
logb_f16le :: proc "contextless" (value: f16le) -> f16le { return f16le(logb_f16(f16(value))) }
logb_f16be :: proc "contextless" (value: f16be) -> f16be { return f16be(logb_f16(f16(value))) }
logb_f32le :: proc "contextless" (value: f32le) -> f32le { return f32le(logb_f32(f32(value))) }
logb_f32be :: proc "contextless" (value: f32be) -> f32be { return f32be(logb_f32(f32(value))) }
logb_f64le :: proc "contextless" (value: f64le) -> f64le { return f64le(logb_f64(f64(value))) }
logb_f64be :: proc "contextless" (value: f64be) -> f64be { return f64be(logb_f64(f64(value))) }
logb :: proc {
logb_f16,
logb_f32,
logb_f64,
logb_f16le,
logb_f16be,
logb_f32le,
logb_f32be,
logb_f64le,
logb_f64be,
}
F16_DIG :: 3
F16_EPSILON :: 0.00097656
F16_GUARD :: 0
+410
View File
@@ -0,0 +1,410 @@
package math
// The original C code and the long comment below are
// from FreeBSD's /usr/src/lib/msun/src/s_erf.c and
// came with this notice.
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
//
//
// double erf(double x)
// double erfc(double x)
// x
// 2 |\
// erf(x) = --------- | exp(-t*t)dt
// sqrt(pi) \|
// 0
//
// erfc(x) = 1-erf(x)
// Note that
// erf(-x) = -erf(x)
// erfc(-x) = 2 - erfc(x)
//
// Method:
// 1. For |x| in [0, 0.84375]
// erf(x) = x + x*R(x**2)
// erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
// = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
// where R = P/Q where P is an odd poly of degree 8 and
// Q is an odd poly of degree 10.
// -57.90
// | R - (erf(x)-x)/x | <= 2
//
//
// Remark. The formula is derived by noting
// erf(x) = (2/sqrt(pi))*(x - x**3/3 + x**5/10 - x**7/42 + ....)
// and that
// 2/sqrt(pi) = 1.128379167095512573896158903121545171688
// is close to one. The interval is chosen because the fix
// point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
// near 0.6174), and by some experiment, 0.84375 is chosen to
// guarantee the error is less than one ulp for erf.
//
// 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
// c = 0.84506291151 rounded to single (24 bits)
// erf(x) = sign(x) * (c + P1(s)/Q1(s))
// erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
// 1+(c+P1(s)/Q1(s)) if x < 0
// |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
// Remark: here we use the taylor series expansion at x=1.
// erf(1+s) = erf(1) + s*Poly(s)
// = 0.845.. + P1(s)/Q1(s)
// That is, we use rational approximation to approximate
// erf(1+s) - (c = (single)0.84506291151)
// Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
// where
// P1(s) = degree 6 poly in s
// Q1(s) = degree 6 poly in s
//
// 3. For x in [1.25,1/0.35(~2.857143)],
// erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
// erf(x) = 1 - erfc(x)
// where
// R1(z) = degree 7 poly in z, (z=1/x**2)
// S1(z) = degree 8 poly in z
//
// 4. For x in [1/0.35,28]
// erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
// = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
// = 2.0 - tiny (if x <= -6)
// erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
// erf(x) = sign(x)*(1.0 - tiny)
// where
// R2(z) = degree 6 poly in z, (z=1/x**2)
// S2(z) = degree 7 poly in z
//
// Note1:
// To compute exp(-x*x-0.5625+R/S), let s be a single
// precision number and s := x; then
// -x*x = -s*s + (s-x)*(s+x)
// exp(-x*x-0.5626+R/S) =
// exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
// Note2:
// Here 4 and 5 make use of the asymptotic series
// exp(-x*x)
// erfc(x) ~ ---------- * ( 1 + Poly(1/x**2) )
// x*sqrt(pi)
// We use rational approximation to approximate
// g(s)=f(1/x**2) = log(erfc(x)*x) - x*x + 0.5625
// Here is the error bound for R1/S1 and R2/S2
// |R1/S1 - f(x)| < 2**(-62.57)
// |R2/S2 - f(x)| < 2**(-61.52)
//
// 5. For inf > x >= 28
// erf(x) = sign(x) *(1 - tiny) (raise inexact)
// erfc(x) = tiny*tiny (raise underflow) if x > 0
// = 2 - tiny if x<0
//
// 7. Special case:
// erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
// erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
// erfc/erf(NaN) is NaN
erf :: proc{
erf_f16,
erf_f16le,
erf_f16be,
erf_f32,
erf_f32le,
erf_f32be,
erf_f64,
}
erf_f16 :: proc "contextless" (x: f16) -> f16 { return f16(erf_f64(f64(x))) }
erf_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erf_f64(f64(x))) }
erf_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erf_f64(f64(x))) }
erf_f32 :: proc "contextless" (x: f32) -> f32 { return f32(erf_f64(f64(x))) }
erf_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erf_f64(f64(x))) }
erf_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erf_f64(f64(x))) }
erf_f64 :: proc "contextless" (x: f64) -> f64 {
erx :: 0h3FEB0AC160000000
// Coefficients for approximation to erf in [0, 0.84375]
efx :: 0h3FC06EBA8214DB69
efx8 :: 0h3FF06EBA8214DB69
pp0 :: 0h3FC06EBA8214DB68
pp1 :: 0hBFD4CD7D691CB913
pp2 :: 0hBF9D2A51DBD7194F
pp3 :: 0hBF77A291236668E4
pp4 :: 0hBEF8EAD6120016AC
qq1 :: 0h3FD97779CDDADC09
qq2 :: 0h3FB0A54C5536CEBA
qq3 :: 0h3F74D022C4D36B0F
qq4 :: 0h3F215DC9221C1A10
qq5 :: 0hBED09C4342A26120
// Coefficients for approximation to erf in [0.84375, 1.25]
pa0 :: 0hBF6359B8BEF77538
pa1 :: 0h3FDA8D00AD92B34D
pa2 :: 0hBFD7D240FBB8C3F1
pa3 :: 0h3FD45FCA805120E4
pa4 :: 0hBFBC63983D3E28EC
pa5 :: 0h3FA22A36599795EB
pa6 :: 0hBF61BF380A96073F
qa1 :: 0h3FBB3E6618EEE323
qa2 :: 0h3FE14AF092EB6F33
qa3 :: 0h3FB2635CD99FE9A7
qa4 :: 0h3FC02660E763351F
qa5 :: 0h3F8BEDC26B51DD1C
qa6 :: 0h3F888B545735151D
// Coefficients for approximation to erfc in [1.25, 1/0.35]
ra0 :: 0hBF843412600D6435
ra1 :: 0hBFE63416E4BA7360
ra2 :: 0hC0251E0441B0E726
ra3 :: 0hC04F300AE4CBA38D
ra4 :: 0hC0644CB184282266
ra5 :: 0hC067135CEBCCABB2
ra6 :: 0hC054526557E4D2F2
ra7 :: 0hC023A0EFC69AC25C
sa1 :: 0h4033A6B9BD707687
sa2 :: 0h4061350C526AE721
sa3 :: 0h407B290DD58A1A71
sa4 :: 0h40842B1921EC2868
sa5 :: 0h407AD02157700314
sa6 :: 0h405B28A3EE48AE2C
sa7 :: 0h401A47EF8E484A93
sa8 :: 0hBFAEEFF2EE749A62
// Coefficients for approximation to erfc in [1/.35, 28]
rb0 :: 0hBF84341239E86F4A
rb1 :: 0hBFE993BA70C285DE
rb2 :: 0hC031C209555F995A
rb3 :: 0hC064145D43C5ED98
rb4 :: 0hC083EC881375F228
rb5 :: 0hC09004616A2E5992
rb6 :: 0hC07E384E9BDC383F
sb1 :: 0h403E568B261D5190
sb2 :: 0h40745CAE221B9F0A
sb3 :: 0h409802EB189D5118
sb4 :: 0h40A8FFB7688C246A
sb5 :: 0h40A3F219CEDF3BE6
sb6 :: 0h407DA874E79FE763
sb7 :: 0hC03670E242712D62
VERY_TINY :: 0h0080000000000000
SMALL :: 1.0 / (1 << 28) // 2**-28
// special cases
switch {
case is_nan(x):
return nan_f64()
case is_inf(x, 1):
return 1
case is_inf(x, -1):
return -1
}
x := x
sign := false
if x < 0 {
x = -x
sign = true
}
if x < 0.84375 { // |x| < 0.84375
temp: f64
if x < SMALL { // |x| < 2**-28
if x < VERY_TINY {
temp = 0.125 * (8.0*x + efx8*x) // avoid underflow
} else {
temp = x + efx*x
}
} else {
z := x * x
r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
y := r / s
temp = x + x*y
}
if sign {
return -temp
}
return temp
}
if x < 1.25 { // 0.84375 <= |x| < 1.25
s := x - 1
P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
if sign {
return -erx - P/Q
}
return erx + P/Q
}
if x >= 6 { // inf > |x| >= 6
if sign {
return -1
}
return 1
}
s := 1 / (x * x)
R, S: f64
if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143
R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
} else { // |x| >= 1 / 0.35 ~ 2.857143
R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
}
z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x
r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S)
if sign {
return r/x - 1
}
return 1 - r/x
}
erfc :: proc{
erfc_f16,
erfc_f16le,
erfc_f16be,
erfc_f32,
erfc_f32le,
erfc_f32be,
erfc_f64,
}
erfc_f16 :: proc "contextless" (x: f16) -> f16 { return f16(erfc_f64(f64(x))) }
erfc_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(erfc_f64(f64(x))) }
erfc_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(erfc_f64(f64(x))) }
erfc_f32 :: proc "contextless" (x: f32) -> f32 { return f32(erfc_f64(f64(x))) }
erfc_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(erfc_f64(f64(x))) }
erfc_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(erfc_f64(f64(x))) }
erfc_f64 :: proc "contextless" (x: f64) -> f64 {
erx :: 0h3FEB0AC160000000
// Coefficients for approximation to erf in [0, 0.84375]
efx :: 0h3FC06EBA8214DB69
efx8 :: 0h3FF06EBA8214DB69
pp0 :: 0h3FC06EBA8214DB68
pp1 :: 0hBFD4CD7D691CB913
pp2 :: 0hBF9D2A51DBD7194F
pp3 :: 0hBF77A291236668E4
pp4 :: 0hBEF8EAD6120016AC
qq1 :: 0h3FD97779CDDADC09
qq2 :: 0h3FB0A54C5536CEBA
qq3 :: 0h3F74D022C4D36B0F
qq4 :: 0h3F215DC9221C1A10
qq5 :: 0hBED09C4342A26120
// Coefficients for approximation to erf in [0.84375, 1.25]
pa0 :: 0hBF6359B8BEF77538
pa1 :: 0h3FDA8D00AD92B34D
pa2 :: 0hBFD7D240FBB8C3F1
pa3 :: 0h3FD45FCA805120E4
pa4 :: 0hBFBC63983D3E28EC
pa5 :: 0h3FA22A36599795EB
pa6 :: 0hBF61BF380A96073F
qa1 :: 0h3FBB3E6618EEE323
qa2 :: 0h3FE14AF092EB6F33
qa3 :: 0h3FB2635CD99FE9A7
qa4 :: 0h3FC02660E763351F
qa5 :: 0h3F8BEDC26B51DD1C
qa6 :: 0h3F888B545735151D
// Coefficients for approximation to erfc in [1.25, 1/0.35]
ra0 :: 0hBF843412600D6435
ra1 :: 0hBFE63416E4BA7360
ra2 :: 0hC0251E0441B0E726
ra3 :: 0hC04F300AE4CBA38D
ra4 :: 0hC0644CB184282266
ra5 :: 0hC067135CEBCCABB2
ra6 :: 0hC054526557E4D2F2
ra7 :: 0hC023A0EFC69AC25C
sa1 :: 0h4033A6B9BD707687
sa2 :: 0h4061350C526AE721
sa3 :: 0h407B290DD58A1A71
sa4 :: 0h40842B1921EC2868
sa5 :: 0h407AD02157700314
sa6 :: 0h405B28A3EE48AE2C
sa7 :: 0h401A47EF8E484A93
sa8 :: 0hBFAEEFF2EE749A62
// Coefficients for approximation to erfc in [1/.35, 28]
rb0 :: 0hBF84341239E86F4A
rb1 :: 0hBFE993BA70C285DE
rb2 :: 0hC031C209555F995A
rb3 :: 0hC064145D43C5ED98
rb4 :: 0hC083EC881375F228
rb5 :: 0hC09004616A2E5992
rb6 :: 0hC07E384E9BDC383F
sb1 :: 0h403E568B261D5190
sb2 :: 0h40745CAE221B9F0A
sb3 :: 0h409802EB189D5118
sb4 :: 0h40A8FFB7688C246A
sb5 :: 0h40A3F219CEDF3BE6
sb6 :: 0h407DA874E79FE763
sb7 :: 0hC03670E242712D62
TINY :: 1.0 / (1 << 56) // 2**-56
// special cases
switch {
case is_nan(x):
return nan_f64()
case is_inf(x, 1):
return 0
case is_inf(x, -1):
return 2
}
x := x
sign := false
if x < 0 {
x = -x
sign = true
}
if x < 0.84375 { // |x| < 0.84375
temp: f64
if x < TINY { // |x| < 2**-56
temp = x
} else {
z := x * x
r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4)))
s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))))
y := r / s
if x < 0.25 { // |x| < 1/4
temp = x + x*y
} else {
temp = 0.5 + (x*y + (x - 0.5))
}
}
if sign {
return 1 + temp
}
return 1 - temp
}
if x < 1.25 { // 0.84375 <= |x| < 1.25
s := x - 1
P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))))
Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))))
if sign {
return 1 + erx + P/Q
}
return 1 - erx - P/Q
}
if x < 28 { // |x| < 28
s := 1 / (x * x)
R, S: f64
if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143
R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7))))))
S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
} else { // |x| >= 1 / 0.35 ~ 2.857143
if sign && x > 6 {
return 2 // x < -6
}
R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6)))))
S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))))
}
z := transmute(f64)(0xffffffff00000000 & transmute(u64)x) // pseudo-single (20-bit) precision x
r := exp(-z*z-0.5625) * exp((z-x)*(z+x)+R/S)
if sign {
return 2 - r/x
}
return r / x
}
if sign {
return 2
}
return 0
}
+198
View File
@@ -0,0 +1,198 @@
package math
// The original C code, the long comment, and the constants
// below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c
// and came with this notice. The go code is a simplified
// version of the original C.
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
//
//
// double log1p(double x)
//
// Method :
// 1. Argument Reduction: find k and f such that
// 1+x = 2**k * (1+f),
// where sqrt(2)/2 < 1+f < sqrt(2) .
//
// Note. If k=0, then f=x is exact. However, if k!=0, then f
// may not be representable exactly. In that case, a correction
// term is need. Let u=1+x rounded. Let c = (1+x)-u, then
// log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
// and add back the correction term c/u.
// (Note: when x > 2**53, one can simply return log(x))
//
// 2. Approximation of log1p(f).
// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
// = 2s + 2/3 s**3 + 2/5 s**5 + .....,
// = 2s + s*R
// We use a special Reme algorithm on [0,0.1716] to generate
// a polynomial of degree 14 to approximate R The maximum error
// of this polynomial approximation is bounded by 2**-58.45. In
// other words,
// 2 4 6 8 10 12 14
// R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
// (the values of Lp1 to Lp7 are listed in the program)
// and
// | 2 14 | -58.45
// | Lp1*s +...+Lp7*s - R(z) | <= 2
// | |
// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
// In order to guarantee error in log below 1ulp, we compute log
// by
// log1p(f) = f - (hfsq - s*(hfsq+R)).
//
// 3. Finally, log1p(x) = k*ln2 + log1p(f).
// = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
// Here ln2 is split into two floating point number:
// ln2_hi + ln2_lo,
// where n*ln2_hi is always exact for |n| < 2000.
//
// Special cases:
// log1p(x) is NaN with signal if x < -1 (including -INF) ;
// log1p(+INF) is +INF; log1p(-1) is -INF with signal;
// log1p(NaN) is that NaN with no signal.
//
// Accuracy:
// according to an error analysis, the error is always less than
// 1 ulp (unit in the last place).
//
// Constants:
// The hexadecimal values are the intended ones for the following
// constants. The decimal values may be used, provided that the
// compiler will convert from decimal to binary accurately enough
// to produce the hexadecimal values shown.
//
// Note: Assuming log() return accurate answer, the following
// algorithm can be used to compute log1p(x) to within a few ULP:
//
// u = 1+x;
// if(u==1.0) return x ; else
// return log(u)*(x/(u-1.0));
//
// See HP-15C Advanced Functions Handbook, p.193.
log1p :: proc {
log1p_f16,
log1p_f32,
log1p_f64,
log1p_f16le,
log1p_f16be,
log1p_f32le,
log1p_f32be,
log1p_f64le,
log1p_f64be,
}
log1p_f16 :: proc "contextless" (x: f16) -> f16 { return f16(log1p_f64(f64(x))) }
log1p_f32 :: proc "contextless" (x: f32) -> f32 { return f32(log1p_f64(f64(x))) }
log1p_f16le :: proc "contextless" (x: f16le) -> f16le { return f16le(log1p_f64(f64(x))) }
log1p_f16be :: proc "contextless" (x: f16be) -> f16be { return f16be(log1p_f64(f64(x))) }
log1p_f32le :: proc "contextless" (x: f32le) -> f32le { return f32le(log1p_f64(f64(x))) }
log1p_f32be :: proc "contextless" (x: f32be) -> f32be { return f32be(log1p_f64(f64(x))) }
log1p_f64le :: proc "contextless" (x: f64le) -> f64le { return f64le(log1p_f64(f64(x))) }
log1p_f64be :: proc "contextless" (x: f64be) -> f64be { return f64be(log1p_f64(f64(x))) }
log1p_f64 :: proc "contextless" (x: f64) -> f64 {
SQRT2_M1 :: 0h3fda827999fcef34 // Sqrt(2)-1
SQRT2_HALF_M1 :: 0hbfd2bec333018866 // Sqrt(2)/2-1
SMALL :: 0h3e20000000000000 // 2**-29
TINY :: 1.0 / (1 << 54) // 2**-54
TWO53 :: 1 << 53 // 2**53
LN2HI :: 0h3fe62e42fee00000
LN2LO :: 0h3dea39ef35793c76
LP1 :: 0h3FE5555555555593
LP2 :: 0h3FD999999997FA04
LP3 :: 0h3FD2492494229359
LP4 :: 0h3FCC71C51D8E78AF
LP5 :: 0h3FC7466496CB03DE
LP6 :: 0h3FC39A09D078C69F
LP7 :: 0h3FC2F112DF3E5244
switch {
case x < -1 || is_nan(x):
return nan_f64()
case x == -1:
return inf_f64(-1)
case is_inf(x, 1):
return inf_f64(+1)
}
absx := abs(x)
f: f64
iu: u64
k := 1
if absx < SQRT2_M1 { // |x| < Sqrt(2)-1
if absx < SMALL { // |x| < 2**-29
if absx < TINY { // |x| < 2**-54
return x
}
return x - x*x*0.5
}
if x > SQRT2_HALF_M1 { // Sqrt(2)/2-1 < x
// (Sqrt(2)/2-1) < x < (Sqrt(2)-1)
k = 0
f = x
iu = 1
}
}
c: f64
if k != 0 {
u: f64
if absx < TWO53 { // 1<<53
u = 1.0 + x
iu = transmute(u64)u
k = int((iu >> 52) - 1023)
// correction term
if k > 0 {
c = 1.0 - (u - x)
} else {
c = x - (u - 1.0)
}
c /= u
} else {
u = x
iu = transmute(u64)u
k = int((iu >> 52) - 1023)
c = 0
}
iu &= 0x000fffffffffffff
if iu < 0x0006a09e667f3bcd { // mantissa of Sqrt(2)
u = transmute(f64)(iu | 0x3ff0000000000000) // normalize u
} else {
k += 1
u = transmute(f64)(iu | 0x3fe0000000000000) // normalize u/2
iu = (0x0010000000000000 - iu) >> 2
}
f = u - 1.0 // Sqrt(2)/2 < u < Sqrt(2)
}
hfsq := 0.5 * f * f
s, R, z: f64
if iu == 0 { // |f| < 2**-20
if f == 0 {
if k == 0 {
return 0
}
c += f64(k) * LN2LO
return f64(k)*LN2HI + c
}
R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division
if k == 0 {
return f - R
}
return f64(k)*LN2HI - ((R - (f64(k)*LN2LO + c)) - f)
}
s = f / (2.0 + f)
z = s * s
R = z * (LP1 + z*(LP2+z*(LP3+z*(LP4+z*(LP5+z*(LP6+z*LP7))))))
if k == 0 {
return f - (hfsq - s*(hfsq+R))
}
return f64(k)*LN2HI - ((hfsq - (s*(hfsq+R) + (f64(k)*LN2LO + c))) - f)
}