From 6944e2fc04cd5c0d8913e11df3202196c4344a49 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Sat, 29 May 2021 15:33:11 +0200 Subject: [PATCH 1/5] Add abs_f16 dependency when used. --- src/check_builtin.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 1630b5b7d..ccc234af9 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1406,6 +1406,7 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 { Type *bt = base_type(operand->type); + if (are_types_identical(bt, t_f16)) add_package_dependency(c, "runtime", "abs_f16"); if (are_types_identical(bt, t_f32)) add_package_dependency(c, "runtime", "abs_f32"); if (are_types_identical(bt, t_f64)) add_package_dependency(c, "runtime", "abs_f64"); if (are_types_identical(bt, t_complex64)) add_package_dependency(c, "runtime", "abs_complex64"); From 55fc2c00c07246dcebe82fffcf145dcea6167c6c Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Sat, 29 May 2021 16:22:47 +0200 Subject: [PATCH 2/5] Add Endian versions of math routines. --- core/math/math.odin | 219 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 191 insertions(+), 28 deletions(-) diff --git a/core/math/math.odin b/core/math/math.odin index 16d40fccb..711a2d086 100644 --- a/core/math/math.odin +++ b/core/math/math.odin @@ -96,36 +96,170 @@ foreign _ { ldexp_f64 :: proc(val: f64, exp: i32) -> f64 ---; } -sqrt :: proc{sqrt_f16, sqrt_f32, sqrt_f64}; -sin :: proc{sin_f16, sin_f32, sin_f64}; -cos :: proc{cos_f16, cos_f32, cos_f64}; -pow :: proc{pow_f16, pow_f32, pow_f64}; -fmuladd :: proc{fmuladd_f16, fmuladd_f32, fmuladd_f64}; -ln :: proc{ln_f16, ln_f32, ln_f64}; -exp :: proc{exp_f16, exp_f32, exp_f64}; +sqrt_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(sqrt_f16(f16(x))); } +sqrt_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(sqrt_f16(f16(x))); } +sqrt_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(sqrt_f32(f32(x))); } +sqrt_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(sqrt_f32(f32(x))); } +sqrt_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(sqrt_f64(f64(x))); } +sqrt_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(sqrt_f64(f64(x))); } +sqrt :: proc{ + sqrt_f16, sqrt_f16le, sqrt_f16be, + sqrt_f32, sqrt_f32le, sqrt_f32be, + sqrt_f64, sqrt_f64le, sqrt_f64be, +}; -ldexp :: proc{ldexp_f16, ldexp_f32, ldexp_f64}; +sin_f16le :: proc(θ: f16le) -> f16le { return #force_inline f16le(sin_f16(f16(θ))); } +sin_f16be :: proc(θ: f16be) -> f16be { return #force_inline f16be(sin_f16(f16(θ))); } +sin_f32le :: proc(θ: f32le) -> f32le { return #force_inline f32le(sin_f32(f32(θ))); } +sin_f32be :: proc(θ: f32be) -> f32be { return #force_inline f32be(sin_f32(f32(θ))); } +sin_f64le :: proc(θ: f64le) -> f64le { return #force_inline f64le(sin_f64(f64(θ))); } +sin_f64be :: proc(θ: f64be) -> f64be { return #force_inline f64be(sin_f64(f64(θ))); } +sin :: proc{ + sin_f16, sin_f16le, sin_f16be, + sin_f32, sin_f32le, sin_f32be, + sin_f64, sin_f64le, sin_f64be, +}; -log_f16 :: proc(x, base: f16) -> f16 { return ln(x) / ln(base); } -log_f32 :: proc(x, base: f32) -> f32 { return ln(x) / ln(base); } -log_f64 :: proc(x, base: f64) -> f64 { return ln(x) / ln(base); } -log :: proc{log_f16, log_f32, log_f64}; +cos_f16le :: proc(θ: f16le) -> f16le { return #force_inline f16le(cos_f16(f16(θ))); } +cos_f16be :: proc(θ: f16be) -> f16be { return #force_inline f16be(cos_f16(f16(θ))); } +cos_f32le :: proc(θ: f32le) -> f32le { return #force_inline f32le(cos_f32(f32(θ))); } +cos_f32be :: proc(θ: f32be) -> f32be { return #force_inline f32be(cos_f32(f32(θ))); } +cos_f64le :: proc(θ: f64le) -> f64le { return #force_inline f64le(cos_f64(f64(θ))); } +cos_f64be :: proc(θ: f64be) -> f64be { return #force_inline f64be(cos_f64(f64(θ))); } +cos :: proc{ + cos_f16, cos_f16le, cos_f16be, + cos_f32, cos_f32le, cos_f32be, + cos_f64, cos_f64le, cos_f64be, +}; -log2_f16 :: proc(x: f16) -> f16 { return ln(x)/LN2; } -log2_f32 :: proc(x: f32) -> f32 { return ln(x)/LN2; } -log2_f64 :: proc(x: f64) -> f64 { return ln(x)/LN2; } -log2 :: proc{log2_f16, log2_f32, log2_f64}; +pow_f16le :: proc(x, power: f16le) -> f16le { return #force_inline f16le(pow_f16(f16(x), f16(power))); } +pow_f16be :: proc(x, power: f16be) -> f16be { return #force_inline f16be(pow_f16(f16(x), f16(power))); } +pow_f32le :: proc(x, power: f32le) -> f32le { return #force_inline f32le(pow_f32(f32(x), f32(power))); } +pow_f32be :: proc(x, power: f32be) -> f32be { return #force_inline f32be(pow_f32(f32(x), f32(power))); } +pow_f64le :: proc(x, power: f64le) -> f64le { return #force_inline f64le(pow_f64(f64(x), f64(power))); } +pow_f64be :: proc(x, power: f64be) -> f64be { return #force_inline f64be(pow_f64(f64(x), f64(power))); } +pow :: proc{ + pow_f16, pow_f16le, pow_f16be, + pow_f32, pow_f32le, pow_f32be, + pow_f64, pow_f64le, pow_f64be, +}; -log10_f16 :: proc(x: f16) -> f16 { return ln(x)/LN10; } -log10_f32 :: proc(x: f32) -> f32 { return ln(x)/LN10; } -log10_f64 :: proc(x: f64) -> f64 { return ln(x)/LN10; } -log10 :: proc{log10_f16, log10_f32, log10_f64}; +fmuladd_f16le :: proc(a, b, c: f16le) -> f16le { return #force_inline f16le(fmuladd_f16(f16(a), f16(b), f16(c))); } +fmuladd_f16be :: proc(a, b, c: f16be) -> f16be { return #force_inline f16be(fmuladd_f16(f16(a), f16(b), f16(c))); } +fmuladd_f32le :: proc(a, b, c: f32le) -> f32le { return #force_inline f32le(fmuladd_f32(f32(a), f32(b), f32(c))); } +fmuladd_f32be :: proc(a, b, c: f32be) -> f32be { return #force_inline f32be(fmuladd_f32(f32(a), f32(b), f32(c))); } +fmuladd_f64le :: proc(a, b, c: f64le) -> f64le { return #force_inline f64le(fmuladd_f64(f64(a), f64(b), f64(c))); } +fmuladd_f64be :: proc(a, b, c: f64be) -> f64be { return #force_inline f64be(fmuladd_f64(f64(a), f64(b), f64(c))); } +fmuladd :: proc{ + fmuladd_f16, fmuladd_f16le, fmuladd_f16be, + fmuladd_f32, fmuladd_f32le, fmuladd_f32be, + fmuladd_f64, fmuladd_f64le, fmuladd_f64be, +}; + +ln_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(ln_f16(f16(x))); } +ln_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(ln_f16(f16(x))); } +ln_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(ln_f32(f32(x))); } +ln_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(ln_f32(f32(x))); } +ln_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))); } +ln_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))); } +ln :: proc{ + ln_f16, ln_f16le, ln_f16be, + ln_f32, ln_f32le, ln_f32be, + ln_f64, ln_f64le, ln_f64be, +}; + +exp_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(exp_f16(f16(x))); } +exp_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(exp_f16(f16(x))); } +exp_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(exp_f32(f32(x))); } +exp_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(exp_f32(f32(x))); } +exp_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(exp_f64(f64(x))); } +exp_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(exp_f64(f64(x))); } +exp :: proc{ + exp_f16, exp_f16le, exp_f16be, + exp_f32, exp_f32le, exp_f32be, + exp_f64, exp_f64le, exp_f64be, +}; + +ldexp_f16le :: proc(val: f16le, exp: i32) -> f16le { return #force_inline f16le(ldexp_f16(f16(val), exp)); } +ldexp_f16be :: proc(val: f16be, exp: i32) -> f16be { return #force_inline f16be(ldexp_f16(f16(val), exp)); } +ldexp_f32le :: proc(val: f32le, exp: i32) -> f32le { return #force_inline f32le(ldexp_f32(f32(val), exp)); } +ldexp_f32be :: proc(val: f32be, exp: i32) -> f32be { return #force_inline f32be(ldexp_f32(f32(val), exp)); } +ldexp_f64le :: proc(val: f64le, exp: i32) -> f64le { return #force_inline f64le(ldexp_f64(f64(val), exp)); } +ldexp_f64be :: proc(val: f64be, exp: i32) -> f64be { return #force_inline f64be(ldexp_f64(f64(val), exp)); } +ldexp :: proc{ + ldexp_f16, ldexp_f16le, ldexp_f16be, + ldexp_f32, ldexp_f32le, ldexp_f32be, + ldexp_f64, ldexp_f64le, ldexp_f64be, +}; -tan_f16 :: proc(θ: f16) -> f16 { return sin(θ)/cos(θ); } -tan_f32 :: proc(θ: f32) -> f32 { return sin(θ)/cos(θ); } -tan_f64 :: proc(θ: f64) -> f64 { return sin(θ)/cos(θ); } -tan :: proc{tan_f16, tan_f32, tan_f64}; +log_f16 :: proc(x, base: f16) -> f16 { return ln(x) / ln(base); } +log_f16le :: proc(x, base: f16le) -> f16le { return f16le(log_f16(f16(x), f16(base))); } +log_f16be :: proc(x, base: f16be) -> f16be { return f16be(log_f16(f16(x), f16(base))); } + +log_f32 :: proc(x, base: f32) -> f32 { return ln(x) / ln(base); } +log_f32le :: proc(x, base: f32le) -> f32le { return f32le(log_f32(f32(x), f32(base))); } +log_f32be :: proc(x, base: f32be) -> f32be { return f32be(log_f32(f32(x), f32(base))); } + +log_f64 :: proc(x, base: f64) -> f64 { return ln(x) / ln(base); } +log_f64le :: proc(x, base: f64le) -> f64le { return f64le(log_f64(f64(x), f64(base))); } +log_f64be :: proc(x, base: f64be) -> f64be { return f64be(log_f64(f64(x), f64(base))); } +log :: proc{ + log_f16, log_f16le, log_f16be, + log_f32, log_f32le, log_f32be, + log_f64, log_f64le, log_f64be, +}; + +log2_f16 :: proc(x: f16) -> f16 { return ln(x)/LN2; } +log2_f16le :: proc(x: f16le) -> f16le { return f16le(log2_f16(f16(x))); } +log2_f16be :: proc(x: f16be) -> f16be { return f16be(log2_f16(f16(x))); } + +log2_f32 :: proc(x: f32) -> f32 { return ln(x)/LN2; } +log2_f32le :: proc(x: f32le) -> f32le { return f32le(log2_f32(f32(x))); } +log2_f32be :: proc(x: f32be) -> f32be { return f32be(log2_f32(f32(x))); } + +log2_f64 :: proc(x: f64) -> f64 { return ln(x)/LN2; } +log2_f64le :: proc(x: f64le) -> f64le { return f64le(log2_f64(f64(x))); } +log2_f64be :: proc(x: f64be) -> f64be { return f64be(log2_f64(f64(x))); } +log2 :: proc{ + log2_f16, log2_f16le, log2_f16be, + log2_f32, log2_f32le, log2_f32be, + log2_f64, log2_f64le, log2_f64be, +}; + +log10_f16 :: proc(x: f16) -> f16 { return ln(x)/LN10; } +log10_f16le :: proc(x: f16le) -> f16le { return f16le(log10_f16(f16(x))); } +log10_f16be :: proc(x: f16be) -> f16be { return f16be(log10_f16(f16(x))); } + +log10_f32 :: proc(x: f32) -> f32 { return ln(x)/LN10; } +log10_f32le :: proc(x: f32le) -> f32le { return f32le(log10_f32(f32(x))); } +log10_f32be :: proc(x: f32be) -> f32be { return f32be(log10_f32(f32(x))); } + +log10_f64 :: proc(x: f64) -> f64 { return ln(x)/LN10; } +log10_f64le :: proc(x: f64le) -> f64le { return f64le(log10_f64(f64(x))); } +log10_f64be :: proc(x: f64be) -> f64be { return f64be(log10_f64(f64(x))); } +log10 :: proc{ + log10_f16, log10_f16le, log10_f16be, + log10_f32, log10_f32le, log10_f32be, + log10_f64, log10_f64le, log10_f64be, +}; + +tan_f16 :: proc(θ: f16) -> f16 { return sin(θ)/cos(θ); } +tan_f16le :: proc(θ: f16le) -> f16le { return f16le(tan_f16(f16(θ))); } +tan_f16be :: proc(θ: f16be) -> f16be { return f16be(tan_f16(f16(θ))); } + +tan_f32 :: proc(θ: f32) -> f32 { return sin(θ)/cos(θ); } +tan_f32le :: proc(θ: f32le) -> f32le { return f32le(tan_f32(f32(θ))); } +tan_f32be :: proc(θ: f32be) -> f32be { return f32be(tan_f32(f32(θ))); } + +tan_f64 :: proc(θ: f64) -> f64 { return sin(θ)/cos(θ); } +tan_f64le :: proc(θ: f64le) -> f64le { return f64le(tan_f64(f64(θ))); } +tan_f64be :: proc(θ: f64be) -> f64be { return f64be(tan_f64(f64(θ))); } +tan :: proc{ + tan_f16, tan_f16le, tan_f16be, + tan_f32, tan_f32le, tan_f32be, + tan_f64, tan_f64le, tan_f64be, +}; lerp :: proc(a, b: $T, t: $E) -> (x: T) { return a*(1-t) + b*t; } saturate :: proc(a: $T) -> (x: T) { return clamp(a, 0, 1); }; @@ -135,6 +269,14 @@ unlerp_f32 :: proc(a, b, x: f32) -> (t: f32) { return (x-a)/(b-a); } unlerp_f64 :: proc(a, b, x: f64) -> (t: f64) { return (x-a)/(b-a); } unlerp :: proc{unlerp_f16, unlerp_f32, unlerp_f64}; +remap :: proc(old_value, old_min, old_max, new_min, new_max: $T) -> (x: T) where intrinsics.type_is_numeric(T), !intrinsics.type_is_array(T) { + old_range := old_max - old_min; + new_range := new_max - new_min; + if old_range == 0 { + return new_range / 2; + } + return ((old_value - old_min) / old_range) * new_range + new_min; +} wrap :: proc(x, y: $T) -> T where intrinsics.type_is_numeric(T), !intrinsics.type_is_array(T) { tmp := mod(x, y); @@ -709,6 +851,13 @@ is_inf_f16 :: proc(x: f16, sign: int = 0) -> bool { } return class == .Inf || class == .Neg_Inf; } +is_inf_f16le :: proc(x: f16le, sign: int = 0) -> bool { + return #force_inline is_inf_f16(f16(x), sign); +} +is_inf_f16be :: proc(x: f16be, sign: int = 0) -> bool { + return #force_inline is_inf_f16(f16(x), sign); +} + is_inf_f32 :: proc(x: f32, sign: int = 0) -> bool { class := classify(abs(x)); switch { @@ -719,6 +868,13 @@ is_inf_f32 :: proc(x: f32, sign: int = 0) -> bool { } return class == .Inf || class == .Neg_Inf; } +is_inf_f32le :: proc(x: f32le, sign: int = 0) -> bool { + return #force_inline is_inf_f32(f32(x), sign); +} +is_inf_f32be :: proc(x: f32be, sign: int = 0) -> bool { + return #force_inline is_inf_f32(f32(x), sign); +} + is_inf_f64 :: proc(x: f64, sign: int = 0) -> bool { class := classify(abs(x)); switch { @@ -729,8 +885,17 @@ is_inf_f64 :: proc(x: f64, sign: int = 0) -> bool { } return class == .Inf || class == .Neg_Inf; } -is_inf :: proc{is_inf_f16, is_inf_f32, is_inf_f64}; - +is_inf_f64le :: proc(x: f64le, sign: int = 0) -> bool { + return #force_inline is_inf_f64(f64(x), sign); +} +is_inf_f64be :: proc(x: f64be, sign: int = 0) -> bool { + return #force_inline is_inf_f64(f64(x), sign); +} +is_inf :: proc{ + is_inf_f16, is_inf_f16le, is_inf_f16be, + is_inf_f32, is_inf_f32le, is_inf_f32be, + is_inf_f64, is_inf_f64le, is_inf_f64be, +}; inf_f16 :: proc(sign: int) -> f16 { return f16(inf_f16(sign)); @@ -759,8 +924,6 @@ nan_f64 :: proc() -> f64 { return transmute(f64)v; } - - is_power_of_two :: proc(x: int) -> bool { return x > 0 && (x & (x-1)) == 0; } From a0a578c72a968fc4bd39af536bbee2cba9892bfc Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Sat, 29 May 2021 17:21:54 +0200 Subject: [PATCH 3/5] More Endian version of maths procs. --- core/math/math.odin | 182 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 140 insertions(+), 42 deletions(-) diff --git a/core/math/math.odin b/core/math/math.odin index 711a2d086..13505aebd 100644 --- a/core/math/math.odin +++ b/core/math/math.odin @@ -264,10 +264,9 @@ tan :: proc{ lerp :: proc(a, b: $T, t: $E) -> (x: T) { return a*(1-t) + b*t; } saturate :: proc(a: $T) -> (x: T) { return clamp(a, 0, 1); }; -unlerp_f16 :: proc(a, b, x: f16) -> (t: f16) { return (x-a)/(b-a); } -unlerp_f32 :: proc(a, b, x: f32) -> (t: f32) { return (x-a)/(b-a); } -unlerp_f64 :: proc(a, b, x: f64) -> (t: f64) { return (x-a)/(b-a); } -unlerp :: proc{unlerp_f16, unlerp_f32, unlerp_f64}; +unlerp :: proc(a, b, x: $T) -> (t: T) where intrinsics.type_is_float(T), !intrinsics.type_is_array(T) { + return (x-a)/(b-a); +} remap :: proc(old_value, old_min, old_max, new_min, new_max: $T) -> (x: T) where intrinsics.type_is_numeric(T), !intrinsics.type_is_array(T) { old_range := old_max - old_min; @@ -312,57 +311,105 @@ gain :: proc(t, g: $T) -> T where intrinsics.type_is_numeric(T) { } -sign_f16 :: proc(x: f16) -> f16 { return f16(int(0 < x) - int(x < 0)); } -sign_f32 :: proc(x: f32) -> f32 { return f32(int(0 < x) - int(x < 0)); } -sign_f64 :: proc(x: f64) -> f64 { return f64(int(0 < x) - int(x < 0)); } -sign :: proc{sign_f16, sign_f32, sign_f64}; +sign_f16 :: proc(x: f16) -> f16 { return f16(int(0 < x) - int(x < 0)); } +sign_f16le :: proc(x: f16le) -> f16le { return f16le(int(0 < x) - int(x < 0)); } +sign_f16be :: proc(x: f16be) -> f16be { return f16be(int(0 < x) - int(x < 0)); } +sign_f32 :: proc(x: f32) -> f32 { return f32(int(0 < x) - int(x < 0)); } +sign_f32le :: proc(x: f32le) -> f32le { return f32le(int(0 < x) - int(x < 0)); } +sign_f32be :: proc(x: f32be) -> f32be { return f32be(int(0 < x) - int(x < 0)); } +sign_f64 :: proc(x: f64) -> f64 { return f64(int(0 < x) - int(x < 0)); } +sign_f64le :: proc(x: f64le) -> f64le { return f64le(int(0 < x) - int(x < 0)); } +sign_f64be :: proc(x: f64be) -> f64be { return f64be(int(0 < x) - int(x < 0)); } +sign :: proc{ + sign_f16, sign_f16le, sign_f16be, + sign_f32, sign_f32le, sign_f32be, + sign_f64, sign_f64le, sign_f64be, +}; - -sign_bit_f16 :: proc(x: f16) -> bool { +sign_bit_f16 :: proc(x: f16) -> bool { return (transmute(u16)x) & (1<<15) != 0; } -sign_bit_f32 :: proc(x: f32) -> bool { +sign_bit_f16le :: proc(x: f16le) -> bool { return #force_inline sign_bit_f16(f16(x)); } +sign_bit_f16be :: proc(x: f16be) -> bool { return #force_inline sign_bit_f16(f16(x)); } +sign_bit_f32 :: proc(x: f32) -> bool { return (transmute(u32)x) & (1<<31) != 0; } -sign_bit_f64 :: proc(x: f64) -> bool { +sign_bit_f32le :: proc(x: f32le) -> bool { return #force_inline sign_bit_f32(f32(x)); } +sign_bit_f32be :: proc(x: f32be) -> bool { return #force_inline sign_bit_f32(f32(x)); } +sign_bit_f64 :: proc(x: f64) -> bool { return (transmute(u64)x) & (1<<63) != 0; } -sign_bit :: proc{sign_bit_f16, sign_bit_f32, sign_bit_f64}; +sign_bit_f64le :: proc(x: f64le) -> bool { return #force_inline sign_bit_f64(f64(x)); } +sign_bit_f64be :: proc(x: f64be) -> bool { return #force_inline sign_bit_f64(f64(x)); } +sign_bit :: proc{ + sign_bit_f16, sign_bit_f16le, sign_bit_f16be, + sign_bit_f32, sign_bit_f32le, sign_bit_f32be, + sign_bit_f64, sign_bit_f64le, sign_bit_f64be, +}; -copy_sign_f16 :: proc(x, y: f16) -> f16 { +copy_sign_f16 :: proc(x, y: f16) -> f16 { ix := transmute(u16)x; iy := transmute(u16)y; ix &= 0x7fff; ix |= iy & 0x8000; return transmute(f16)ix; } -copy_sign_f32 :: proc(x, y: f32) -> f32 { +copy_sign_f16le :: proc(x, y: f16le) -> f16le { return #force_inline f16le(copy_sign_f16(f16(x), f16(y))); } +copy_sign_f16be :: proc(x, y: f16be) -> f16be { return #force_inline f16be(copy_sign_f16(f16(x), f16(y))); } +copy_sign_f32 :: proc(x, y: f32) -> f32 { ix := transmute(u32)x; iy := transmute(u32)y; ix &= 0x7fff_ffff; ix |= iy & 0x8000_0000; return transmute(f32)ix; } -copy_sign_f64 :: proc(x, y: f64) -> f64 { +copy_sign_f32le :: proc(x, y: f32le) -> f32le { return #force_inline f32le(copy_sign_f32(f32(x), f32(y))); } +copy_sign_f32be :: proc(x, y: f32be) -> f32be { return #force_inline f32be(copy_sign_f32(f32(x), f32(y))); } +copy_sign_f64 :: proc(x, y: f64) -> f64 { ix := transmute(u64)x; iy := transmute(u64)y; ix &= 0x7fff_ffff_ffff_ffff; ix |= iy & 0x8000_0000_0000_0000; return transmute(f64)ix; } -copy_sign :: proc{copy_sign_f16, copy_sign_f32, copy_sign_f64}; +copy_sign_f64le :: proc(x, y: f64le) -> f64le { return #force_inline f64le(copy_sign_f64(f64(x), f64(y))); } +copy_sign_f64be :: proc(x, y: f64be) -> f64be { return #force_inline f64be(copy_sign_f64(f64(x), f64(y))); } +copy_sign :: proc{ + copy_sign_f16, copy_sign_f16le, copy_sign_f16be, + copy_sign_f32, copy_sign_f32le, copy_sign_f32be, + copy_sign_f64, copy_sign_f64le, copy_sign_f64be, +}; +to_radians_f16 :: proc(degrees: f16) -> f16 { return degrees * RAD_PER_DEG; } +to_radians_f16le :: proc(degrees: f16le) -> f16le { return degrees * RAD_PER_DEG; } +to_radians_f16be :: proc(degrees: f16be) -> f16be { return degrees * RAD_PER_DEG; } +to_radians_f32 :: proc(degrees: f32) -> f32 { return degrees * RAD_PER_DEG; } +to_radians_f32le :: proc(degrees: f32le) -> f32le { return degrees * RAD_PER_DEG; } +to_radians_f32be :: proc(degrees: f32be) -> f32be { return degrees * RAD_PER_DEG; } +to_radians_f64 :: proc(degrees: f64) -> f64 { return degrees * RAD_PER_DEG; } +to_radians_f64le :: proc(degrees: f64le) -> f64le { return degrees * RAD_PER_DEG; } +to_radians_f64be :: proc(degrees: f64be) -> f64be { return degrees * RAD_PER_DEG; } +to_degrees_f16 :: proc(radians: f16) -> f16 { return radians * DEG_PER_RAD; } +to_degrees_f16le :: proc(radians: f16le) -> f16le { return radians * DEG_PER_RAD; } +to_degrees_f16be :: proc(radians: f16be) -> f16be { return radians * DEG_PER_RAD; } +to_degrees_f32 :: proc(radians: f32) -> f32 { return radians * DEG_PER_RAD; } +to_degrees_f32le :: proc(radians: f32le) -> f32le { return radians * DEG_PER_RAD; } +to_degrees_f32be :: proc(radians: f32be) -> f32be { return radians * DEG_PER_RAD; } +to_degrees_f64 :: proc(radians: f64) -> f64 { return radians * DEG_PER_RAD; } +to_degrees_f64le :: proc(radians: f64le) -> f64le { return radians * DEG_PER_RAD; } +to_degrees_f64be :: proc(radians: f64be) -> f64be { return radians * DEG_PER_RAD; } +to_radians :: proc{ + to_radians_f16, to_radians_f16le, to_radians_f16be, + to_radians_f32, to_radians_f32le, to_radians_f32be, + to_radians_f64, to_radians_f64le, to_radians_f64be, +}; +to_degrees :: proc{ + to_degrees_f16, to_degrees_f16le, to_degrees_f16be, + to_degrees_f32, to_degrees_f32le, to_degrees_f32be, + to_degrees_f64, to_degrees_f64le, to_degrees_f64be, +}; -to_radians_f16 :: proc(degrees: f16) -> f16 { return degrees * RAD_PER_DEG; } -to_radians_f32 :: proc(degrees: f32) -> f32 { return degrees * RAD_PER_DEG; } -to_radians_f64 :: proc(degrees: f64) -> f64 { return degrees * RAD_PER_DEG; } -to_degrees_f16 :: proc(radians: f16) -> f16 { return radians * DEG_PER_RAD; } -to_degrees_f32 :: proc(radians: f32) -> f32 { return radians * DEG_PER_RAD; } -to_degrees_f64 :: proc(radians: f64) -> f64 { return radians * DEG_PER_RAD; } -to_radians :: proc{to_radians_f16, to_radians_f32, to_radians_f64}; -to_degrees :: proc{to_degrees_f16, to_degrees_f32, to_degrees_f64}; - -trunc_f16 :: proc(x: f16) -> f16 { +trunc_f16 :: proc(x: f16) -> f16 { trunc_internal :: proc(f: f16) -> f16 { mask :: 0x1f; shift :: 16 - 6; @@ -391,8 +438,10 @@ trunc_f16 :: proc(x: f16) -> f16 { } return trunc_internal(x); } +trunc_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(trunc_f16(f16(x))); } +trunc_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(trunc_f16(f16(x))); } -trunc_f32 :: proc(x: f32) -> f32 { +trunc_f32 :: proc(x: f32) -> f32 { trunc_internal :: proc(f: f32) -> f32 { mask :: 0xff; shift :: 32 - 9; @@ -421,8 +470,10 @@ trunc_f32 :: proc(x: f32) -> f32 { } return trunc_internal(x); } +trunc_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(trunc_f32(f32(x))); } +trunc_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(trunc_f32(f32(x))); } -trunc_f64 :: proc(x: f64) -> f64 { +trunc_f64 :: proc(x: f64) -> f64 { trunc_internal :: proc(f: f64) -> f64 { mask :: 0x7ff; shift :: 64 - 12; @@ -451,27 +502,64 @@ trunc_f64 :: proc(x: f64) -> f64 { } return trunc_internal(x); } +trunc_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(trunc_f64(f64(x))); } +trunc_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(trunc_f64(f64(x))); } +trunc :: proc{ + trunc_f16, trunc_f16le, trunc_f16be, + trunc_f32, trunc_f32le, trunc_f32be, + trunc_f64, trunc_f64le, trunc_f64be, +}; -trunc :: proc{trunc_f16, trunc_f32, trunc_f64}; - -round_f16 :: proc(x: f16) -> f16 { +round_f16 :: proc(x: f16) -> f16 { return ceil(x - 0.5) if x < 0 else floor(x + 0.5); } -round_f32 :: proc(x: f32) -> f32 { +round_f16le :: proc(x: f16le) -> f16le { return ceil(x - 0.5) if x < 0 else floor(x + 0.5); } -round_f64 :: proc(x: f64) -> f64 { +round_f16be :: proc(x: f16be) -> f16be { + return ceil(x - 0.5) if x < 0 else floor(x + 0.5); +} + +round_f32 :: proc(x: f32) -> f32 { + return ceil(x - 0.5) if x < 0 else floor(x + 0.5); +} +round_f32le :: proc(x: f32le) -> f32le { + return ceil(x - 0.5) if x < 0 else floor(x + 0.5); +} +round_f32be :: proc(x: f32be) -> f32be { + return ceil(x - 0.5) if x < 0 else floor(x + 0.5); +} +round_f64 :: proc(x: f64) -> f64 { + return ceil(x - 0.5) if x < 0 else floor(x + 0.5); +} +round_f64le :: proc(x: f64le) -> f64le { + return ceil(x - 0.5) if x < 0 else floor(x + 0.5); +} +round_f64be :: proc(x: f64be) -> f64be { return ceil(x - 0.5) if x < 0 else floor(x + 0.5); } round :: proc{round_f16, round_f32, round_f64}; -ceil_f16 :: proc(x: f16) -> f16 { return -floor(-x); } -ceil_f32 :: proc(x: f32) -> f32 { return -floor(-x); } -ceil_f64 :: proc(x: f64) -> f64 { return -floor(-x); } -ceil :: proc{ceil_f16, ceil_f32, ceil_f64}; +ceil_f16 :: proc(x: f16) -> f16 { return -floor(-x); } +ceil_f16le :: proc(x: f16le) -> f16le { return -floor(-x); } +ceil_f16be :: proc(x: f16be) -> f16be { return -floor(-x); } -floor_f16 :: proc(x: f16) -> f16 { +ceil_f32 :: proc(x: f32) -> f32 { return -floor(-x); } +ceil_f32le :: proc(x: f32le) -> f32le { return -floor(-x); } +ceil_f32be :: proc(x: f32be) -> f32be { return -floor(-x); } + +ceil_f64 :: proc(x: f64) -> f64 { return -floor(-x); } +ceil_f64le :: proc(x: f64le) -> f64le { return -floor(-x); } +ceil_f64be :: proc(x: f64be) -> f64be { return -floor(-x); } + +ceil :: proc{ + ceil_f16, ceil_f16le, ceil_f16be, + ceil_f32, ceil_f32le, ceil_f32be, + ceil_f64, ceil_f64le, ceil_f64be, +}; + +floor_f16 :: proc(x: f16) -> f16 { if x == 0 || is_nan(x) || is_inf(x) { return x; } @@ -485,7 +573,9 @@ floor_f16 :: proc(x: f16) -> f16 { d, _ := modf(x); return d; } -floor_f32 :: proc(x: f32) -> f32 { +floor_f16le :: proc(x: f16le) -> f16le { return #force_inline f16le(floor_f16(f16(x))); } +floor_f16be :: proc(x: f16be) -> f16be { return #force_inline f16be(floor_f16(f16(x))); } +floor_f32 :: proc(x: f32) -> f32 { if x == 0 || is_nan(x) || is_inf(x) { return x; } @@ -499,7 +589,9 @@ floor_f32 :: proc(x: f32) -> f32 { d, _ := modf(x); return d; } -floor_f64 :: proc(x: f64) -> f64 { +floor_f32le :: proc(x: f32le) -> f32le { return #force_inline f32le(floor_f32(f32(x))); } +floor_f32be :: proc(x: f32be) -> f32be { return #force_inline f32be(floor_f32(f32(x))); } +floor_f64 :: proc(x: f64) -> f64 { if x == 0 || is_nan(x) || is_inf(x) { return x; } @@ -513,7 +605,13 @@ floor_f64 :: proc(x: f64) -> f64 { d, _ := modf(x); return d; } -floor :: proc{floor_f16, floor_f32, floor_f64}; +floor_f64le :: proc(x: f64le) -> f64le { return #force_inline f64le(floor_f64(f64(x))); } +floor_f64be :: proc(x: f64be) -> f64be { return #force_inline f64be(floor_f64(f64(x))); } +floor :: proc{ + floor_f16, floor_f16le, floor_f16be, + floor_f32, floor_f32le, floor_f32be, + floor_f64, floor_f64le, floor_f64be, +}; floor_div :: proc(x, y: $T) -> T From c05f6b4a31666af0bfb346398b8e7420033a6a4b Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Sat, 29 May 2021 17:52:47 +0200 Subject: [PATCH 4/5] Even more Endian maths. --- core/math/math.odin | 114 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 96 insertions(+), 18 deletions(-) diff --git a/core/math/math.odin b/core/math/math.odin index 13505aebd..c2a08ca09 100644 --- a/core/math/math.odin +++ b/core/math/math.odin @@ -538,7 +538,11 @@ round_f64le :: proc(x: f64le) -> f64le { round_f64be :: proc(x: f64be) -> f64be { return ceil(x - 0.5) if x < 0 else floor(x + 0.5); } -round :: proc{round_f16, round_f32, round_f64}; +round :: proc{ + round_f16, round_f16le, round_f16be, + round_f32, round_f32le, round_f32be, + round_f64, round_f64le, round_f64be, +}; ceil_f16 :: proc(x: f16) -> f16 { return -floor(-x); } @@ -633,7 +637,7 @@ floor_mod :: proc(x, y: $T) -> T return r; } -modf_f16 :: proc(x: f16) -> (int: f16, frac: f16) { +modf_f16 :: proc(x: f16) -> (int: f16, frac: f16) { shift :: 16 - 5 - 1; mask :: 0x1f; bias :: 15; @@ -659,7 +663,15 @@ modf_f16 :: proc(x: f16) -> (int: f16, frac: f16) { frac = x - int; return; } -modf_f32 :: proc(x: f32) -> (int: f32, frac: f32) { +modf_f16le :: proc(x: f16le) -> (int: f16le, frac: f16le) { + i, f := #force_inline modf_f16(f16(x)); + return f16le(i), f16le(f); +} +modf_f16be :: proc(x: f16be) -> (int: f16be, frac: f16be) { + i, f := #force_inline modf_f16(f16(x)); + return f16be(i), f16be(f); +} +modf_f32 :: proc(x: f32) -> (int: f32, frac: f32) { shift :: 32 - 8 - 1; mask :: 0xff; bias :: 127; @@ -685,7 +697,15 @@ modf_f32 :: proc(x: f32) -> (int: f32, frac: f32) { frac = x - int; return; } -modf_f64 :: proc(x: f64) -> (int: f64, frac: f64) { +modf_f32le :: proc(x: f32le) -> (int: f32le, frac: f32le) { + i, f := #force_inline modf_f32(f32(x)); + return f32le(i), f32le(f); +} +modf_f32be :: proc(x: f32be) -> (int: f32be, frac: f32be) { + i, f := #force_inline modf_f32(f32(x)); + return f32be(i), f32be(f); +} +modf_f64 :: proc(x: f64) -> (int: f64, frac: f64) { shift :: 64 - 11 - 1; mask :: 0x7ff; bias :: 1023; @@ -711,10 +731,22 @@ modf_f64 :: proc(x: f64) -> (int: f64, frac: f64) { frac = x - int; return; } -modf :: proc{modf_f16, modf_f32, modf_f64}; +modf_f64le :: proc(x: f64le) -> (int: f64le, frac: f64le) { + i, f := #force_inline modf_f64(f64(x)); + return f64le(i), f64le(f); +} +modf_f64be :: proc(x: f64be) -> (int: f64be, frac: f64be) { + i, f := #force_inline modf_f64(f64(x)); + return f64be(i), f64be(f); +} +modf :: proc{ + modf_f16, modf_f16le, modf_f16be, + modf_f32, modf_f32le, modf_f32be, + modf_f64, modf_f64le, modf_f64be, +}; split_decimal :: modf; -mod_f16 :: proc(x, y: f16) -> (n: f16) { +mod_f16 :: proc(x, y: f16) -> (n: f16) { z := abs(y); n = remainder(abs(x), z); if sign(n) < 0 { @@ -722,7 +754,9 @@ mod_f16 :: proc(x, y: f16) -> (n: f16) { } return copy_sign(n, x); } -mod_f32 :: proc(x, y: f32) -> (n: f32) { +mod_f16le :: proc(x, y: f16le) -> (n: f16le) { return #force_inline f16le(mod_f16(f16(x), f16(y))); } +mod_f16be :: proc(x, y: f16be) -> (n: f16be) { return #force_inline f16be(mod_f16(f16(x), f16(y))); } +mod_f32 :: proc(x, y: f32) -> (n: f32) { z := abs(y); n = remainder(abs(x), z); if sign(n) < 0 { @@ -730,7 +764,9 @@ mod_f32 :: proc(x, y: f32) -> (n: f32) { } return copy_sign(n, x); } -mod_f64 :: proc(x, y: f64) -> (n: f64) { +mod_f32le :: proc(x, y: f32le) -> (n: f32le) { return #force_inline f32le(mod_f32(f32(x), f32(y))); } +mod_f32be :: proc(x, y: f32be) -> (n: f32be) { return #force_inline f32be(mod_f32(f32(x), f32(y))); } +mod_f64 :: proc(x, y: f64) -> (n: f64) { z := abs(y); n = remainder(abs(x), z); if sign(n) < 0 { @@ -738,14 +774,28 @@ mod_f64 :: proc(x, y: f64) -> (n: f64) { } return copy_sign(n, x); } -mod :: proc{mod_f16, mod_f32, mod_f64}; - -remainder_f16 :: proc(x, y: f16) -> f16 { return x - round(x/y) * y; } -remainder_f32 :: proc(x, y: f32) -> f32 { return x - round(x/y) * y; } -remainder_f64 :: proc(x, y: f64) -> f64 { return x - round(x/y) * y; } -remainder :: proc{remainder_f16, remainder_f32, remainder_f64}; - +mod_f64le :: proc(x, y: f64le) -> (n: f64le) { return #force_inline f64le(mod_f64(f64(x), f64(y))); } +mod_f64be :: proc(x, y: f64be) -> (n: f64be) { return #force_inline f64be(mod_f64(f64(x), f64(y))); } +mod :: proc{ + mod_f16, mod_f16le, mod_f16be, + mod_f32, mod_f32le, mod_f32be, + mod_f64, mod_f64le, mod_f64be, +}; +remainder_f16 :: proc(x, y: f16 ) -> f16 { return x - round(x/y) * y; } +remainder_f16le :: proc(x, y: f16le) -> f16le { return x - round(x/y) * y; } +remainder_f16be :: proc(x, y: f16be) -> f16be { return x - round(x/y) * y; } +remainder_f32 :: proc(x, y: f32 ) -> f32 { return x - round(x/y) * y; } +remainder_f32le :: proc(x, y: f32le) -> f32le { return x - round(x/y) * y; } +remainder_f32be :: proc(x, y: f32be) -> f32be { return x - round(x/y) * y; } +remainder_f64 :: proc(x, y: f64 ) -> f64 { return x - round(x/y) * y; } +remainder_f64le :: proc(x, y: f64le) -> f64le { return x - round(x/y) * y; } +remainder_f64be :: proc(x, y: f64be) -> f64be { return x - round(x/y) * y; } +remainder :: proc{ + remainder_f16, remainder_f16le, remainder_f16be, + remainder_f32, remainder_f32le, remainder_f32be, + remainder_f64, remainder_f64le, remainder_f64be, +}; gcd :: proc(x, y: $T) -> T where intrinsics.type_is_ordered_numeric(T) { @@ -762,14 +812,30 @@ lcm :: proc(x, y: $T) -> T return x / gcd(x, y) * y; } -frexp_f16 :: proc(x: f16) -> (significand: f16, exponent: int) { +frexp_f16 :: proc(x: f16) -> (significand: f16, exponent: int) { f, e := frexp_f64(f64(x)); return f16(f), e; } -frexp_f32 :: proc(x: f32) -> (significand: f32, exponent: int) { +frexp_f16le :: proc(x: f16le) -> (significand: f16le, exponent: int) { + f, e := frexp_f64(f64(x)); + return f16le(f), e; +} +frexp_f16be :: proc(x: f16be) -> (significand: f16be, exponent: int) { + f, e := frexp_f64(f64(x)); + return f16be(f), e; +} +frexp_f32 :: proc(x: f32) -> (significand: f32, exponent: int) { f, e := frexp_f64(f64(x)); return f32(f), e; } +frexp_f32le :: proc(x: f32le) -> (significand: f32le, exponent: int) { + f, e := frexp_f64(f64(x)); + return f32le(f), e; +} +frexp_f32be :: proc(x: f32be) -> (significand: f32be, exponent: int) { + f, e := frexp_f64(f64(x)); + return f32be(f), e; +} frexp_f64 :: proc(x: f64) -> (significand: f64, exponent: int) { switch { case x == 0: @@ -790,7 +856,19 @@ frexp_f64 :: proc(x: f64) -> (significand: f64, exponent: int) { } return; } -frexp :: proc{frexp_f16, frexp_f32, frexp_f64}; +frexp_f64le :: proc(x: f64le) -> (significand: f64le, exponent: int) { + f, e := frexp_f64(f64(x)); + return f64le(f), e; +} +frexp_f64be :: proc(x: f64be) -> (significand: f64be, exponent: int) { + f, e := frexp_f64(f64(x)); + return f64be(f), e; +} +frexp :: proc{ + frexp_f16, frexp_f16le, frexp_f16be, + frexp_f32, frexp_f32le, frexp_f32be, + frexp_f64, frexp_f64le, frexp_f64be, +}; From d7dba495fd5703facfb8e4339da161da895dab35 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Sat, 29 May 2021 18:27:43 +0200 Subject: [PATCH 5/5] Last of the Endian float in math.odin. --- core/math/math.odin | 188 ++++++++++++++++++++++++++------------------ 1 file changed, 112 insertions(+), 76 deletions(-) diff --git a/core/math/math.odin b/core/math/math.odin index c2a08ca09..1239d3e61 100644 --- a/core/math/math.odin +++ b/core/math/math.odin @@ -934,7 +934,7 @@ factorial :: proc(n: int) -> int { return table[n]; } -classify_f16 :: proc(x: f16) -> Float_Class { +classify_f16 :: proc(x: f16) -> Float_Class { switch { case x == 0: i := transmute(i16)x; @@ -958,7 +958,9 @@ classify_f16 :: proc(x: f16) -> Float_Class { } return .Normal; } -classify_f32 :: proc(x: f32) -> Float_Class { +classify_f16le :: proc(x: f16le) -> Float_Class { return #force_inline classify_f16(f16(x)); } +classify_f16be :: proc(x: f16be) -> Float_Class { return #force_inline classify_f16(f16(x)); } +classify_f32 :: proc(x: f32) -> Float_Class { switch { case x == 0: i := transmute(i32)x; @@ -982,7 +984,9 @@ classify_f32 :: proc(x: f32) -> Float_Class { } return .Normal; } -classify_f64 :: proc(x: f64) -> Float_Class { +classify_f32le :: proc(x: f32le) -> Float_Class { return #force_inline classify_f32(f32(x)); } +classify_f32be :: proc(x: f32be) -> Float_Class { return #force_inline classify_f32(f32(x)); } +classify_f64 :: proc(x: f64) -> Float_Class { switch { case x == 0: i := transmute(i64)x; @@ -1005,13 +1009,28 @@ classify_f64 :: proc(x: f64) -> Float_Class { } return .Normal; } -classify :: proc{classify_f16, classify_f32, classify_f64}; - -is_nan_f16 :: proc(x: f16) -> bool { return classify(x) == .NaN; } -is_nan_f32 :: proc(x: f32) -> bool { return classify(x) == .NaN; } -is_nan_f64 :: proc(x: f64) -> bool { return classify(x) == .NaN; } -is_nan :: proc{is_nan_f16, is_nan_f32, is_nan_f64}; +classify_f64le :: proc(x: f64le) -> Float_Class { return #force_inline classify_f64(f64(x)); } +classify_f64be :: proc(x: f64be) -> Float_Class { return #force_inline classify_f64(f64(x)); } +classify :: proc{ + classify_f16, classify_f16le, classify_f16be, + classify_f32, classify_f32le, classify_f32be, + classify_f64, classify_f64le, classify_f64be, +}; +is_nan_f16 :: proc(x: f16) -> bool { return classify(x) == .NaN; } +is_nan_f16le :: proc(x: f16le) -> bool { return classify(x) == .NaN; } +is_nan_f16be :: proc(x: f16be) -> bool { return classify(x) == .NaN; } +is_nan_f32 :: proc(x: f32) -> bool { return classify(x) == .NaN; } +is_nan_f32le :: proc(x: f32le) -> bool { return classify(x) == .NaN; } +is_nan_f32be :: proc(x: f32be) -> bool { return classify(x) == .NaN; } +is_nan_f64 :: proc(x: f64) -> bool { return classify(x) == .NaN; } +is_nan_f64le :: proc(x: f64le) -> bool { return classify(x) == .NaN; } +is_nan_f64be :: proc(x: f64be) -> bool { return classify(x) == .NaN; } +is_nan :: proc{ + is_nan_f16, is_nan_f16le, is_nan_f16be, + is_nan_f32, is_nan_f32le, is_nan_f32be, + is_nan_f64, is_nan_f64le, is_nan_f64be, +}; // is_inf reports whether f is an infinity, according to sign. // If sign > 0, is_inf reports whether f is positive infinity. @@ -1073,13 +1092,25 @@ is_inf :: proc{ is_inf_f64, is_inf_f64le, is_inf_f64be, }; -inf_f16 :: proc(sign: int) -> f16 { - return f16(inf_f16(sign)); +inf_f16 :: proc(sign: int) -> f16 { + return f16(inf_f64(sign)); } -inf_f32 :: proc(sign: int) -> f32 { +inf_f16le :: proc(sign: int) -> f16le { + return f16le(inf_f64(sign)); +} +inf_f16be :: proc(sign: int) -> f16be { + return f16be(inf_f64(sign)); +} +inf_f32 :: proc(sign: int) -> f32 { return f32(inf_f64(sign)); } -inf_f64 :: proc(sign: int) -> f64 { +inf_f32le :: proc(sign: int) -> f32le { + return f32le(inf_f64(sign)); +} +inf_f32be :: proc(sign: int) -> f32be { + return f32be(inf_f64(sign)); +} +inf_f64 :: proc(sign: int) -> f64 { v: u64; if sign >= 0 { v = 0x7ff00000_00000000; @@ -1088,17 +1119,41 @@ inf_f64 :: proc(sign: int) -> f64 { } return transmute(f64)v; } +inf_f64le :: proc(sign: int) -> f64le { + return f64le(inf_f64(sign)); +} +inf_f64be :: proc(sign: int) -> f64be { + return f64be(inf_f64(sign)); +} -nan_f16 :: proc() -> f16 { +nan_f16 :: proc() -> f16 { return f16(nan_f64()); } -nan_f32 :: proc() -> f32 { +nan_f16le :: proc() -> f16le { + return f16le(nan_f64()); +} +nan_f16be :: proc() -> f16be { + return f16be(nan_f64()); +} +nan_f32 :: proc() -> f32 { return f32(nan_f64()); } -nan_f64 :: proc() -> f64 { +nan_f32le :: proc() -> f32le { + return f32le(nan_f64()); +} +nan_f32be :: proc() -> f32be { + return f32be(nan_f64()); +} +nan_f64 :: proc() -> f64 { v: u64 = 0x7ff80000_00000001; return transmute(f64)v; } +nan_f64le :: proc() -> f64le { + return f64le(nan_f64()); +} +nan_f64be :: proc() -> f64be { + return f64be(nan_f64()); +} is_power_of_two :: proc(x: int) -> bool { return x > 0 && (x & (x-1)) == 0; @@ -1155,14 +1210,30 @@ cumsum :: proc(dst, src: $T/[]$E) -> T } -atan2_f16 :: proc(y, x: f16) -> f16 { +atan2_f16 :: proc(y, x: f16) -> f16 { // TODO(bill): Better atan2_f16 return f16(atan2_f64(f64(y), f64(x))); } -atan2_f32 :: proc(y, x: f32) -> f32 { +atan2_f16le :: proc(y, x: f16le) -> f16le { + // TODO(bill): Better atan2_f16 + return f16le(atan2_f64(f64(y), f64(x))); +} +atan2_f16be :: proc(y, x: f16be) -> f16be { + // TODO(bill): Better atan2_f16 + return f16be(atan2_f64(f64(y), f64(x))); +} +atan2_f32 :: proc(y, x: f32) -> f32 { // TODO(bill): Better atan2_f32 return f32(atan2_f64(f64(y), f64(x))); } +atan2_f32le :: proc(y, x: f32le) -> f32le { + // TODO(bill): Better atan2_f32 + return f32le(atan2_f64(f64(y), f64(x))); +} +atan2_f32be :: proc(y, x: f32be) -> f32be { + // TODO(bill): Better atan2_f32 + return f32be(atan2_f64(f64(y), f64(x))); +} atan2_f64 :: proc(y, x: f64) -> f64 { // TODO(bill): Faster atan2_f64 if possible @@ -1249,80 +1320,45 @@ atan2_f64 :: proc(y, x: f64) -> f64 { } return q; } +atan2_f64le :: proc(y, x: f64le) -> f64le { + // TODO(bill): Better atan2_f32 + return f64le(atan2_f64(f64(y), f64(x))); +} +atan2_f64be :: proc(y, x: f64be) -> f64be { + // TODO(bill): Better atan2_f32 + return f64be(atan2_f64(f64(y), f64(x))); +} +atan2 :: proc{ + atan2_f16, atan2_f16le, atan2_f16be, + atan2_f32, atan2_f32le, atan2_f32be, + atan2_f64, atan2_f64le, atan2_f64be, +}; -atan2 :: proc{atan2_f16, atan2_f32, atan2_f64}; +atan :: proc(x: $T) -> T where intrinsics.type_is_float(x) { + return atan2(x, 1); +} -atan_f16 :: proc(x: f16) -> f16 { - return atan2_f16(x, 1); +asin :: proc(x: $T) -> T where intrinsics.type_is_float(x) { + return atan2(x, 1 + sqrt(1 - x*x)); } -atan_f32 :: proc(x: f32) -> f32 { - return atan2_f32(x, 1); -} -atan_f64 :: proc(x: f64) -> f64 { - return atan2_f64(x, 1); -} -atan :: proc{atan_f16, atan_f32, atan_f64}; -asin_f16 :: proc(x: f16) -> f16 { - return atan2_f16(x, 1 + sqrt_f16(1 - x*x)); +acos :: proc(x: $T) -> T where intrinsics.type_is_float(x) { + return 2 * atan2(sqrt(1 - x), sqrt(1 + x)); } -asin_f32 :: proc(x: f32) -> f32 { - return atan2_f32(x, 1 + sqrt_f32(1 - x*x)); -} -asin_f64 :: proc(x: f64) -> f64 { - return atan2_f64(x, 1 + sqrt_f64(1 - x*x)); -} -asin :: proc{asin_f16, asin_f32, asin_f64}; -acos_f16 :: proc(x: f16) -> f16 { - return 2 * atan2_f16(sqrt_f16(1 - x), sqrt_f16(1 + x)); -} -acos_f32 :: proc(x: f32) -> f32 { - return 2 * atan2_f32(sqrt_f32(1 - x), sqrt_f32(1 + x)); -} -acos_f64 :: proc(x: f64) -> f64 { - return 2 * atan2_f64(sqrt_f64(1 - x), sqrt_f64(1 + x)); -} -acos :: proc{acos_f16, acos_f32, acos_f64}; - - -sinh_f16 :: proc(x: f16) -> f16 { +sinh :: proc(x: $T) -> T where intrinsics.type_is_float(x) { return (exp(x) - exp(-x))*0.5; } -sinh_f32 :: proc(x: f32) -> f32 { - return (exp(x) - exp(-x))*0.5; -} -sinh_f64 :: proc(x: f64) -> f64 { - return (exp(x) - exp(-x))*0.5; -} -sinh :: proc{sinh_f16, sinh_f32, sinh_f64}; -cosh_f16 :: proc(x: f16) -> f16 { +cosh :: proc(x: $T) -> T where intrinsics.type_is_float(x) { return (exp(x) + exp(-x))*0.5; } -cosh_f32 :: proc(x: f32) -> f32 { - return (exp(x) + exp(-x))*0.5; -} -cosh_f64 :: proc(x: f64) -> f64 { - return (exp(x) + exp(-x))*0.5; -} -cosh :: proc{cosh_f16, cosh_f32, cosh_f64}; -tanh_f16 :: proc(x: f16) -> f16 { +tanh :: proc(x: $T) -> T where intrinsics.type_is_float(x) { t := exp(2*x); return (t - 1) / (t + 1); } -tanh_f32 :: proc(x: f32) -> f32 { - t := exp(2*x); - return (t - 1) / (t + 1); -} -tanh_f64 :: proc(x: f64) -> f64 { - t := exp(2*x); - return (t - 1) / (t + 1); -} -tanh :: proc{tanh_f16, tanh_f32, tanh_f64}; - F16_DIG :: 3; F16_EPSILON :: 0.00097656;