strconv.parse_f64 - accurately parse floats

This commit is contained in:
gingerBill
2022-11-21 13:00:24 +00:00
parent 3dfd53aee0
commit b9ec2de4db
3 changed files with 434 additions and 111 deletions
+83
View File
@@ -9,6 +9,89 @@ Decimal :: struct {
neg, trunc: bool,
}
set :: proc(d: ^Decimal, s: string) -> (ok: bool) {
d^ = {}
if len(s) == 0 {
return
}
i := 0
switch s[i] {
case '+': i += 1
case '-': i += 1; d.neg = true
}
// digits
saw_dot := false
saw_digits := false
for ; i < len(s); i += 1 {
switch {
case s[i] == '_':
// ignore underscores
continue
case s[i] == '.':
if saw_dot {
return
}
saw_dot = true
d.decimal_point = d.count
continue
case '0' <= s[i] && s[i] <= '9':
saw_digits = true
if s[i] == '0' && d.count == 0 {
d.decimal_point -= 1
continue
}
if d.count < len(d.digits) {
d.digits[d.count] = s[i]
d.count += 1
} else if s[i] != '0' {
d.trunc = true
}
continue
}
break
}
if !saw_digits {
return
}
if !saw_dot {
d.decimal_point = d.count
}
lower :: #force_inline proc "contextless" (ch: byte) -> byte { return ('a' - 'A') | ch }
if i < len(s) && lower(s[i]) == 'e' {
i += 1
if i >= len(s) {
return
}
exp_sign := 1
switch s[i] {
case '+': i += 1
case '-': i += 1; exp_sign = -1
}
if i >= len(s) || s[i] < '0' || s[i] > '9' {
return
}
e := 0
for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i += 1 {
if s[i] == '_' {
// ignore underscores
continue
}
if e < 1e4 {
e = e*10 + int(s[i]) - '0'
}
}
d.decimal_point += e * exp_sign
}
return i == len(s)
}
decimal_to_string :: proc(buf: []byte, a: ^Decimal) -> string {
digit_zero :: proc(buf: []byte) -> int {
for _, i in buf {
+86
View File
@@ -284,3 +284,89 @@ round_shortest :: proc(d: ^decimal.Decimal, mant: u64, exp: int, flt: ^Float_Inf
}
}
@(private)
decimal_to_float_bits :: proc(d: ^decimal.Decimal, info: ^Float_Info) -> (b: u64, overflow: bool) {
end :: proc "contextless" (d: ^decimal.Decimal, mant: u64, exp: int, info: ^Float_Info) -> (b: u64) {
bits := mant & (u64(1)<<info.mantbits - 1)
bits |= u64((exp-info.bias) & (1<<info.expbits - 1)) << info.mantbits
if d.neg {
bits |= 1<< info.mantbits << info.expbits
}
return bits
}
set_overflow :: proc "contextless" (mant: ^u64, exp: ^int, info: ^Float_Info) -> bool {
mant^ = 0
exp^ = 1<<info.expbits - 1 + info.bias
return true
}
mant: u64
exp: int
if d.decimal_point == 0 {
mant = 0
exp = info.bias
b = end(d, mant, exp, info)
return
}
if d.decimal_point > 310 {
set_overflow(&mant, &exp, info)
b = end(d, mant, exp, info)
return
} else if d.decimal_point < -330 {
mant = 0
exp = info.bias
b = end(d, mant, exp, info)
return
}
@static power_table := [?]int{1, 3, 6, 9, 13, 16, 19, 23, 26}
exp = 0
for d.decimal_point > 0 {
n := 27 if d.decimal_point >= len(power_table) else power_table[d.decimal_point]
decimal.shift(d, n)
exp += n
}
for d.decimal_point < 0 || d.decimal_point == 0 && d.digits[0] < '5' {
n := 27 if -d.decimal_point >= len(power_table) else power_table[-d.decimal_point]
decimal.shift(d, n)
exp -= n
}
// go from [0.5, 1) to [1, 2)
exp -= 1
if exp < info.bias + 1 {
n := info.bias + 1 - exp
decimal.shift(d, n)
exp += n
}
if (exp-info.bias) >= (1<<info.expbits - 1) {
set_overflow(&mant, &exp, info)
b = end(d, mant, exp, info)
return
}
decimal.shift(d, int(1 + info.mantbits))
mant = decimal.rounded_integer(d)
if mant == 2<<info.mantbits {
mant >>= 1
exp += 1
if (exp-info.bias) >= (1<<info.expbits - 1) {
set_overflow(&mant, &exp, info)
b = end(d, mant, exp, info)
return
}
}
if mant & (1<<info.mantbits) == 0 {
exp = info.bias
}
b = end(d, mant, exp, info)
return
}
+265 -111
View File
@@ -1,6 +1,7 @@
package strconv
import "core:unicode/utf8"
import "decimal"
parse_bool :: proc(s: string, n: ^int = nil) -> (result: bool = false, ok: bool) {
switch s {
@@ -532,6 +533,8 @@ parse_u128_maybe_prefixed :: proc(str: string, n: ^int = nil) -> (value: u128, o
parse_u128 :: proc{parse_u128_maybe_prefixed, parse_u128_of_base}
@(private)
lower :: #force_inline proc "contextless" (ch: byte) -> byte { return ('a' - 'A') | ch }
@@ -566,133 +569,284 @@ parse_f32 :: proc(s: string, n: ^int = nil) -> (value: f32, ok: bool) {
// assert(n == 12.34 && ok);
// ```
parse_f64 :: proc(str: string, n: ^int = nil) -> (value: f64, ok: bool) {
s := str
defer if n != nil { n^ = len(str) - len(s) }
if s == "" {
common_prefix_len_ignore_case :: proc "contextless" (s, prefix: string) -> int {
n := len(prefix)
if n > len(s) {
n = len(s)
}
for i in 0..<n {
c := s[i]
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
}
if c != prefix[i] {
return i
}
}
return n
}
check_special :: proc "contextless" (s: string) -> (f: f64, n: int, ok: bool) {
s := s
if len(s) > 0 {
sign := 1
nsign := 0
switch s[0] {
case '+', '-':
if s[0] == '-' {
sign = -1
}
nsign = 1
s = s[1:]
fallthrough
case 'i', 'I':
n := common_prefix_len_ignore_case(s, "infinity")
if 3 < n && n < 8 { // "inf" or "infinity"
n = 3
}
if n == 3 || n == 8 {
f = 0h7ff00000_00000000 if sign == 1 else 0hfff00000_00000000
n = nsign + 3
ok = true
return
}
case 'n', 'N':
if common_prefix_len_ignore_case(s, "nan") == 3 {
f = 0h7ff80000_00000001
n = nsign + 3
ok = true
return
}
}
}
return
}
parse_components :: proc "contextless" (s: string) -> (mantissa: u64, exp: int, neg, trunc, hex: bool, i: int, ok: bool) {
if len(s) == 0 {
return
}
switch s[i] {
case '+': i += 1
case '-': i += 1; neg = true
}
base := u64(10)
MAX_MANT_DIGITS := 19
exp_char := byte('e')
// support stupid 0x1.ABp100 hex floats even if Odin doesn't
if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
base = 16
MAX_MANT_DIGITS = 16
i += 2
exp_char = 'p'
hex = true
}
underscores := false
saw_dot, saw_digits := false, false
nd := 0
nd_mant := 0
decimal_point := 0
loop: for ; i < len(s); i += 1 {
switch c := s[i]; true {
case c == '_':
underscores = true
continue loop
case c == '.':
if saw_dot {
break loop
}
saw_dot = true
decimal_point = nd
continue loop
case '0' <= c && c <= '9':
saw_digits = true
if c == '0' && nd == 0 {
decimal_point -= 1
continue loop
}
nd += 1
if nd_mant < MAX_MANT_DIGITS {
mantissa *= base
mantissa += u64(c - '0')
nd_mant += 1
} else if c != '0' {
trunc = true
}
continue loop
case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
saw_digits = true
nd += 1
if nd_mant < MAX_MANT_DIGITS {
MAX_MANT_DIGITS *= 16
MAX_MANT_DIGITS += int(lower(c) - 'a' + 10)
nd_mant += 1
} else {
trunc = true
}
continue loop
}
break loop
}
if !saw_digits {
return
}
if !saw_dot {
decimal_point = nd
}
if base == 16 {
decimal_point *= 4
nd_mant *= 4
}
if i < len(s) && lower(s[i]) == exp_char {
i += 1
if i >= len(s) { return }
exp_sign := 1
switch s[i] {
case '+': i += 1
case '-': i += 1; exp_sign = -1
}
if i >= len(s) || s[i] < '0' || s[i] > '9' {
return
}
e := 0
for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i += 1 {
if s[i] == '_' {
underscores = true
continue
}
if e < 1e5 {
e = e*10 + int(s[i]) - '0'
}
}
decimal_point += e * exp_sign
} else if base == 16 {
return
}
if mantissa != 0 {
exp = decimal_point - nd_mant
}
// TODO(bill): check underscore correctness
ok = true
return
}
i := 0
parse_hex :: proc(s: string, mantissa: u64, exp: int, neg, trunc: bool) -> (f64, bool) {
info := &_f64_info
sign: f64 = 1
seen_sign := true
switch s[i] {
case '-': i += 1; sign = -1
case '+': i += 1
case: seen_sign = false
}
mantissa, exp := mantissa, exp
for ; i < len(s); i += 1 {
r := rune(s[i])
if r == '_' {
continue
MAX_EXP := 1<<info.expbits + info.bias - 2
MIN_EXP := info.bias + 1
exp += int(info.mantbits)
for mantissa != 0 && mantissa >> (info.mantbits+2) == 0 {
mantissa <<= 1
exp -= 1
}
if trunc {
mantissa |= 1
}
v := _digit_value(r)
if v >= 10 {
if r == '.' || r == 'e' || r == 'E' { // Skip parsing NaN and Inf if it's probably a regular float
break
}
if len(s) >= 3 + i {
buf: [4]u8
copy(buf[:], s[i:][:3])
v2 := transmute(u32)buf
v2 &= 0xDFDFDFDF // Knock out lower-case bits
buf = transmute([4]u8)v2
when ODIN_ENDIAN == .Little {
if v2 == 0x464e49 { // "INF"
s = s[3+i:]
value = 0h7ff00000_00000000 if sign == 1 else 0hfff00000_00000000
return value, len(s) == 0
} else if v2 == 0x4e414e { // "NAN"
s = s[3+i:]
return 0h7ff80000_00000001, len(s) == 0
}
} else {
if v2 == 0x494e4600 { // "\0FNI"
s = s[3+i:]
value = 0h7ff00000_00000000 if sign == 1 else 0hfff00000_00000000
return value, len(s) == 0
} else if v2 == 0x4e414e00 { // "\0NAN"
s = s[3+i:]
return 0h7ff80000_00000001, len(s) == 0
}
}
}
break
for mantissa >> (info.mantbits+2) == 0 {
mantissa = mantissa>>1 | mantissa&1
exp += 1
}
value *= 10
value += f64(v)
// denormalize
if mantissa > 1 && exp < MIN_EXP-2 {
mantissa = mantissa>>1 | mantissa&1
exp += 1
}
round := mantissa & 3
mantissa >>= 2
round |= mantissa & 1 // round to even
exp += 2
if round == 3 {
mantissa += 1
if mantissa == 1 << (1 + info.mantbits) {
mantissa >>= 1
exp += 1
}
}
if mantissa>>info.mantbits == 0 {
// zero or denormal
exp = info.bias
}
ok := true
if exp > MAX_EXP {
// infinity or invalid
mantissa = 1<<info.mantbits
exp = MAX_EXP + 1
ok = false
}
bits := mantissa & (1<<info.mantbits - 1)
bits |= u64((exp-info.bias) & (1<<info.expbits - 1)) << info.mantbits
if neg {
bits |= 1 << info.mantbits << info.expbits
}
return transmute(f64)bits, ok
}
if i < len(s) && s[i] == '.' {
pow10: f64 = 10
i += 1
for ; i < len(s); i += 1 {
r := rune(s[i])
if r == '_' {
continue
}
nr: int
defer if n != nil { n^ = nr }
v := _digit_value(r)
if v >= 10 {
break
if value, nr, ok = check_special(str); ok {
return
}
mantissa: u64
exp: int
neg, trunc, hex: bool
mantissa, exp, neg, trunc, hex, nr = parse_components(str) or_return
if hex {
return parse_hex(str, mantissa, exp, neg, trunc)
}
trunc_block: if !trunc {
@static pow10 := [?]f64{
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22,
}
if mantissa>>_f64_info.mantbits != 0 {
return
}
f := f64(mantissa)
if neg {
f = -f
}
switch {
case exp == 0:
return f, true
case exp > 0 && exp <= 15+22:
if exp > 22 {
f *= pow10[exp-22]
exp = 22
}
value += f64(v)/pow10
pow10 *= 10
if f > 1e15 || f < 1e-15 {
break trunc_block
}
return f * pow10[exp], true
case -22 <= exp && exp < 0:
return f / pow10[-exp], true
}
}
frac := false
scale: f64 = 1
if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
i += 1
if i < len(s) {
switch s[i] {
case '-': i += 1; frac = true
case '+': i += 1
}
exp: u32 = 0
for ; i < len(s); i += 1 {
r := rune(s[i])
if r == '_' {
continue
}
d := u32(_digit_value(r))
if d >= 10 {
break
}
exp = exp * 10 + d
}
if exp > 308 { exp = 308 }
for exp >= 50 { scale *= 1e50; exp -= 50 }
for exp >= 8 { scale *= 1e8; exp -= 8 }
for exp > 0 { scale *= 10; exp -= 1 }
}
}
// If we only consumed a sign, return false
if i == 1 && seen_sign {
return 0, false
}
s = s[i:]
if frac {
value = sign * (value/scale)
} else {
value = sign * (value*scale)
}
ok = len(s) == 0
d: decimal.Decimal
decimal.set(&d, str[:nr])
b, overflow := decimal_to_float_bits(&d, &_f64_info)
value = transmute(f64)b
ok = !overflow
return
}