make float parsing locale independent.

2026-02-26 04:45:08 +00:00 · 2016-05-16 19:53:34 +02:00
parent f4f7edf00f
commit 60e03ee27c
1 changed files with 104 additions and 37 deletions
--- a/lib/system/sysstr.nim
+++ b/lib/system/sysstr.nim
@@ -304,43 +304,37 @@ proc nimFloatToStr(f: float): string {.compilerproc.} =
 proc strtod(buf: cstring, endptr: ptr cstring): float64 {.importc,
  header: "<stdlib.h>", noSideEffect.}

-var decimalPoint: char
-
-proc getDecimalPoint(): char =
-  result = decimalPoint
-  if result == '\0':
-    if strtod("0,5", nil) == 0.5: result = ','
-    else: result = '.'
-    # yes this is threadsafe in practice, spare me:
-    decimalPoint = result
-
 const
  IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+  powtens =   [ 1e0,   1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,
+                1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+                1e20, 1e21, 1e22]

 proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
                          start = 0): int {.compilerProc.} =
-  # This routine leverages `strtod()` for the non-trivial task of
-  # parsing floating point numbers correctly. Because `strtod()` is
-  # locale-dependent with respect to the radix character, we create
-  # a copy where the decimal point is replaced with the locale's
-  # radix character.
+  # This routine attempt to parse float that can parsed quickly.
+  # ie whose integer part can fit inside a 53bits integer.
+  # their real exponent must also be <= 22. If the float doesn't follow
+  # these restrictions, transform the float into this form:
+  #  INTEGER * 10 ^ exponent and leave the work to standard `strtod()`.
+  # This avoid the problems of decimal character portability.
+  # see: http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
  var
    i = start
    sign = 1.0
-    t: array[500, char] # flaviu says: 325 is the longest reasonable literal
-    ti = 0
-    hasdigits = false
-
-  template addToBuf(c) =
-    if ti < t.high:
-      t[ti] = c; inc(ti)
+    kdigits, fdigits = 0
+    exponent: int
+    integer: uint64
+    fraction: uint64
+    frac_exponent= 0
+    exp_sign = 1
+    first_digit = 0

  # Sign?
  if s[i] == '+' or s[i] == '-':
    if s[i] == '-':
      sign = -1.0
-    t[ti] = s[i]
-    inc(i); inc(ti)
+    inc(i)

  # NaN?
  if s[i] == 'N' or s[i] == 'n':
@@ -360,40 +354,113 @@ proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
          return i+3 - start
    return 0

+  # Skip leading zero
+  while s[i] == '0':
+    inc(i)
+
+  if s[i] in {'0'..'9'}:
+      first_digit = (s[i].ord - '0'.ord)
  # Integer part?
  while s[i] in {'0'..'9'}:
-    hasdigits = true
-    addToBuf(s[i])
+    inc(kdigits)
+    integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
    inc(i);
    while s[i] == '_': inc(i)

  # Fractional part?
  if s[i] == '.':
-    addToBuf(getDecimalPoint())
    inc(i)
+    # if no integer part, Skip leading zeros
+    if kdigits <= 0:
+      while s[i] in {'_','0'}:
+        inc(i)
+        inc(frac_exponent)
+
+    if s[i] in {'0'..'9'}:
+      first_digit = (s[i].ord - '0'.ord)
+    # get fractional part
    while s[i] in {'0'..'9'}:
-      hasdigits = true
-      addToBuf(s[i])
+      inc(fdigits)
+      inc(frac_exponent)
+      integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
      inc(i)
      while s[i] == '_': inc(i)
-  if not hasdigits:
+
+  # if has no digits: return error
+  # test for special case 0.0'f
+  if kdigits + fdigits <= 0 and i == start:
    return 0

-  # Exponent?
  if s[i] in {'e', 'E'}:
-    addToBuf(s[i])
    inc(i)
-    if s[i] in {'+', '-'}:
-      addToBuf(s[i])
+    if s[i] == '+' or s[i] == '-':
+      if s[i] == '-':
+        exp_sign = -1
+
      inc(i)
    if s[i] notin {'0'..'9'}:
      return 0
    while s[i] in {'0'..'9'}:
-      addToBuf(s[i])
+      exponent = exponent * 10 + (ord(s[i]) - ord('0'))
      inc(i)
-      while s[i] == '_': inc(i)
-  number = strtod(t, nil)
+      while s[i] == '_': inc(i) # underscores are allowed and ignored
+
+  var real_exponent = exp_sign*exponent - frac_exponent
+  let exp_negative = real_exponent < 0
+  var abs_exponent = abs(real_exponent)
+
+  # if exponent greater than can be represented: +/- zero or infinity
+  if abs_exponent > 999:
+    if exp_negative:
+      number = 0.0*sign
+    else:
+      number = Inf*sign
+    return i - start
+
+  # if integer is representable in 53 bits:  fast path
+  # max fast path integer is  1<<53 - 1 or  8999999999999999 (16 digits)
+  if kdigits + fdigits <= 16 and first_digit <= 8:
+    # max float power of ten with set bits above the 53th bit is 10^22
+    if abs_exponent <= 22:
+      if exp_negative:
+        number = sign * integer.float / powtens[abs_exponent]
+      else:
+        number = sign * integer.float * powtens[abs_exponent]
+      return i - start
+
+    # if exponent is greater try to fit extra exponent above 22 by multiplying
+    # integer part is there is space left.
+    let slop = 15 - kdigits - fdigits
+    if  abs_exponent <= 22 + slop and not exp_negative:
+      number = sign * integer.float * powtens[slop] * powtens[abs_exponent-slop]
+      return i - start
+
+  # if failed: slow path with strtod.
+  var t: array[500, char] # flaviu says: 325 is the longest reasonable literal
+  var ti = 0
+  let maxlen = t.high - "e+000".len # reserve enough space for exponent
+
  result = i - start
+  i = start
+  # re-parse without error checking, any error should be handled by the code above.
+  while s[i] in {'0'..'9','+','-'}:
+    if ti < maxlen:
+      t[ti] = s[i]; inc(ti)
+    inc(i)
+    while s[i] in {'.', '_'}: # skip underscore and decimal point
+      inc(i)
+
+  # insert exponent
+  t[ti] = 'E'; inc(ti)
+  t[ti] = if exp_negative: '-' else: '+'; inc(ti)
+  inc(ti, 3)
+
+  # insert adjusted exponent
+  t[ti-1] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
+  t[ti-2] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
+  t[ti-3] = ('0'.ord + abs_exponent mod 10).char
+
+  number = strtod(t, nil)

 proc nimInt64ToStr(x: int64): string {.compilerRtl.} =
  result = newString(sizeof(x)*4)