mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-30 18:02:05 +00:00
Float literals were not parsed properly when their fractional part exceeded 53 significant bits. This affected in particular math.PI and math.E. Rather than reinventing the wheel, this patch reuses C's strtod() implementation, which already does the heavy lifting with respect to correctness, though some caution is necessary to keep float parsing locale-independent.
419 lines
13 KiB
Nim
419 lines
13 KiB
Nim
#
|
|
#
|
|
# Nimrod's Runtime Library
|
|
# (c) Copyright 2012 Andreas Rumpf
|
|
#
|
|
# See the file "copying.txt", included in this
|
|
# distribution, for details about the copyright.
|
|
#
|
|
|
|
## This module contains helpers for parsing tokens, numbers, identifiers, etc.
|
|
|
|
{.deadCodeElim: on.}
|
|
|
|
{.push debugger:off .} # the user does not want to trace a part
|
|
# of the standard library!
|
|
|
|
include "system/inclrtl"
|
|
|
|
const
|
|
Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'}
|
|
IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
|
|
IdentStartChars = {'a'..'z', 'A'..'Z', '_'}
|
|
## copied from strutils
|
|
|
|
proc toLower(c: char): char {.inline.} =
|
|
result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c
|
|
|
|
proc parseHex*(s: string, number: var int, start = 0): int {.
|
|
rtl, extern: "npuParseHex", noSideEffect.} =
|
|
## Parses a hexadecimal number and stores its value in ``number``.
|
|
##
|
|
## Returns the number of the parsed characters or 0 in case of an error. This
|
|
## proc is sensitive to the already existing value of ``number`` and will
|
|
## likely not do what you want unless you make sure ``number`` is zero. You
|
|
## can use this feature to *chain* calls, though the result int will quickly
|
|
## overflow. Example:
|
|
##
|
|
## .. code-block:: nimrod
|
|
## var value = 0
|
|
## discard parseHex("0x38", value)
|
|
## assert value == 56
|
|
## discard parseHex("0x34", value)
|
|
## assert value == 56 * 256 + 52
|
|
## value = -1
|
|
## discard parseHex("0x38", value)
|
|
## assert value == -200
|
|
##
|
|
var i = start
|
|
var foundDigit = false
|
|
if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
|
|
elif s[i] == '#': inc(i)
|
|
while true:
|
|
case s[i]
|
|
of '_': discard
|
|
of '0'..'9':
|
|
number = number shl 4 or (ord(s[i]) - ord('0'))
|
|
foundDigit = true
|
|
of 'a'..'f':
|
|
number = number shl 4 or (ord(s[i]) - ord('a') + 10)
|
|
foundDigit = true
|
|
of 'A'..'F':
|
|
number = number shl 4 or (ord(s[i]) - ord('A') + 10)
|
|
foundDigit = true
|
|
else: break
|
|
inc(i)
|
|
if foundDigit: result = i-start
|
|
|
|
proc parseOct*(s: string, number: var int, start = 0): int {.
|
|
rtl, extern: "npuParseOct", noSideEffect.} =
|
|
## parses an octal number and stores its value in ``number``. Returns
|
|
## the number of the parsed characters or 0 in case of an error.
|
|
var i = start
|
|
var foundDigit = false
|
|
if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
|
|
while true:
|
|
case s[i]
|
|
of '_': discard
|
|
of '0'..'7':
|
|
number = number shl 3 or (ord(s[i]) - ord('0'))
|
|
foundDigit = true
|
|
else: break
|
|
inc(i)
|
|
if foundDigit: result = i-start
|
|
|
|
proc parseIdent*(s: string, ident: var string, start = 0): int =
|
|
## parses an identifier and stores it in ``ident``. Returns
|
|
## the number of the parsed characters or 0 in case of an error.
|
|
var i = start
|
|
if s[i] in IdentStartChars:
|
|
inc(i)
|
|
while s[i] in IdentChars: inc(i)
|
|
ident = substr(s, start, i-1)
|
|
result = i-start
|
|
|
|
proc parseIdent*(s: string, start = 0): string =
|
|
## parses an identifier and stores it in ``ident``.
|
|
## Returns the parsed identifier or an empty string in case of an error.
|
|
result = ""
|
|
var i = start
|
|
|
|
if s[i] in IdentStartChars:
|
|
inc(i)
|
|
while s[i] in IdentChars: inc(i)
|
|
|
|
result = substr(s, start, i-1)
|
|
|
|
proc parseToken*(s: string, token: var string, validChars: set[char],
|
|
start = 0): int {.inline, deprecated.} =
|
|
## parses a token and stores it in ``token``. Returns
|
|
## the number of the parsed characters or 0 in case of an error. A token
|
|
## consists of the characters in `validChars`.
|
|
##
|
|
## **Deprecated since version 0.8.12**: Use ``parseWhile`` instead.
|
|
var i = start
|
|
while s[i] in validChars: inc(i)
|
|
result = i-start
|
|
token = substr(s, start, i-1)
|
|
|
|
proc skipWhitespace*(s: string, start = 0): int {.inline.} =
|
|
## skips the whitespace starting at ``s[start]``. Returns the number of
|
|
## skipped characters.
|
|
while s[start+result] in Whitespace: inc(result)
|
|
|
|
proc skip*(s, token: string, start = 0): int {.inline.} =
|
|
## skips the `token` starting at ``s[start]``. Returns the length of `token`
|
|
## or 0 if there was no `token` at ``s[start]``.
|
|
while result < token.len and s[result+start] == token[result]: inc(result)
|
|
if result != token.len: result = 0
|
|
|
|
proc skipIgnoreCase*(s, token: string, start = 0): int =
|
|
## same as `skip` but case is ignored for token matching.
|
|
while result < token.len and
|
|
toLower(s[result+start]) == toLower(token[result]): inc(result)
|
|
if result != token.len: result = 0
|
|
|
|
proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} =
|
|
## Skips all characters until one char from the set `until` is found
|
|
## or the end is reached.
|
|
## Returns number of characters skipped.
|
|
while s[result+start] notin until and s[result+start] != '\0': inc(result)
|
|
|
|
proc skipUntil*(s: string, until: char, start = 0): int {.inline.} =
|
|
## Skips all characters until the char `until` is found
|
|
## or the end is reached.
|
|
## Returns number of characters skipped.
|
|
while s[result+start] != until and s[result+start] != '\0': inc(result)
|
|
|
|
proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} =
|
|
## Skips all characters while one char from the set `token` is found.
|
|
## Returns number of characters skipped.
|
|
while s[result+start] in toSkip and s[result+start] != '\0': inc(result)
|
|
|
|
proc parseUntil*(s: string, token: var string, until: set[char],
|
|
start = 0): int {.inline.} =
|
|
## parses a token and stores it in ``token``. Returns
|
|
## the number of the parsed characters or 0 in case of an error. A token
|
|
## consists of the characters notin `until`.
|
|
var i = start
|
|
while i < s.len and s[i] notin until: inc(i)
|
|
result = i-start
|
|
token = substr(s, start, i-1)
|
|
|
|
proc parseUntil*(s: string, token: var string, until: char,
|
|
start = 0): int {.inline.} =
|
|
## parses a token and stores it in ``token``. Returns
|
|
## the number of the parsed characters or 0 in case of an error. A token
|
|
## consists of any character that is not the `until` character.
|
|
var i = start
|
|
while i < s.len and s[i] != until: inc(i)
|
|
result = i-start
|
|
token = substr(s, start, i-1)
|
|
|
|
proc parseWhile*(s: string, token: var string, validChars: set[char],
|
|
start = 0): int {.inline.} =
|
|
## parses a token and stores it in ``token``. Returns
|
|
## the number of the parsed characters or 0 in case of an error. A token
|
|
## consists of the characters in `validChars`.
|
|
var i = start
|
|
while s[i] in validChars: inc(i)
|
|
result = i-start
|
|
token = substr(s, start, i-1)
|
|
|
|
proc captureBetween*(s: string, first: char, second = '\0', start = 0): string =
|
|
## Finds the first occurence of ``first``, then returns everything from there
|
|
## up to ``second``(if ``second`` is '\0', then ``first`` is used).
|
|
var i = skipUntil(s, first, start)+1+start
|
|
result = ""
|
|
discard s.parseUntil(result, if second == '\0': first else: second, i)
|
|
|
|
{.push overflowChecks: on.}
|
|
# this must be compiled with overflow checking turned on:
|
|
proc rawParseInt(s: string, b: var BiggestInt, start = 0): int =
|
|
var
|
|
sign: BiggestInt = -1
|
|
i = start
|
|
if s[i] == '+': inc(i)
|
|
elif s[i] == '-':
|
|
inc(i)
|
|
sign = 1
|
|
if s[i] in {'0'..'9'}:
|
|
b = 0
|
|
while s[i] in {'0'..'9'}:
|
|
b = b * 10 - (ord(s[i]) - ord('0'))
|
|
inc(i)
|
|
while s[i] == '_': inc(i) # underscores are allowed and ignored
|
|
b = b * sign
|
|
result = i - start
|
|
{.pop.} # overflowChecks
|
|
|
|
proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {.
|
|
rtl, extern: "npuParseBiggestInt", noSideEffect.} =
|
|
## parses an integer starting at `start` and stores the value into `number`.
|
|
## Result is the number of processed chars or 0 if there is no integer.
|
|
## `EOverflow` is raised if an overflow occurs.
|
|
var res: BiggestInt
|
|
# use 'res' for exception safety (don't write to 'number' in case of an
|
|
# overflow exception:
|
|
result = rawParseInt(s, res, start)
|
|
number = res
|
|
|
|
proc parseInt*(s: string, number: var int, start = 0): int {.
|
|
rtl, extern: "npuParseInt", noSideEffect.} =
|
|
## parses an integer starting at `start` and stores the value into `number`.
|
|
## Result is the number of processed chars or 0 if there is no integer.
|
|
## `EOverflow` is raised if an overflow occurs.
|
|
var res: BiggestInt
|
|
result = parseBiggestInt(s, res, start)
|
|
if (sizeof(int) <= 4) and
|
|
((res < low(int)) or (res > high(int))):
|
|
raise newException(EOverflow, "overflow")
|
|
else:
|
|
number = int(res)
|
|
|
|
proc parseBiggestFloat*(s: string, number: var BiggestFloat, start = 0): int {.
|
|
rtl, extern: "npuParseBiggestFloat", noSideEffect.} =
|
|
## parses a float starting at `start` and stores the value into `number`.
|
|
## Result is the number of processed chars or 0 if a parsing error
|
|
## occurred.
|
|
|
|
type struct_lconv {.importc: "struct lconv",header:"<locale.h>".} =
|
|
object
|
|
# Unneeded fields have been omitted.
|
|
decimal_point: cstring
|
|
|
|
proc localeconv(): ptr struct_lconv {.importc, header: "<locale.h>",
|
|
noSideEffect.}
|
|
|
|
proc strtod(buf: cstring, endptr: ptr cstring): float64 {.importc,
|
|
header: "<stdlib.h>", noSideEffect.}
|
|
|
|
# This routine leverages `strtod()` for the non-trivial task of
|
|
# parsing floating point numbers correctly. Because `strtod()` is
|
|
# locale-dependent with respect to the radix character, we create
|
|
# a copy where the decimal point is replaced with the locale's
|
|
# radix character.
|
|
|
|
var
|
|
i = start
|
|
sign = 1.0
|
|
t = ""
|
|
hasdigits = false
|
|
|
|
# Sign?
|
|
if s[i] == '+' or s[i] == '-':
|
|
if s[i] == '-':
|
|
sign = -1.0
|
|
add(t, s[i])
|
|
inc(i)
|
|
|
|
# NaN?
|
|
if s[i] == 'N' or s[i] == 'n':
|
|
if s[i+1] == 'A' or s[i+1] == 'a':
|
|
if s[i+2] == 'N' or s[i+2] == 'n':
|
|
if s[i+3] notin IdentChars:
|
|
number = NaN
|
|
return i+3 - start
|
|
return 0
|
|
|
|
# Inf?
|
|
if s[i] == 'I' or s[i] == 'i':
|
|
if s[i+1] == 'N' or s[i+1] == 'n':
|
|
if s[i+2] == 'F' or s[i+2] == 'f':
|
|
if s[i+3] notin IdentChars:
|
|
number = Inf*sign
|
|
return i+3 - start
|
|
return 0
|
|
|
|
# Integer part?
|
|
while s[i] in {'0'..'9'}:
|
|
hasdigits = true
|
|
add(t, s[i])
|
|
inc(i)
|
|
while s[i] == '_': inc(i)
|
|
|
|
# Fractional part?
|
|
if s[i] == '.':
|
|
add(t, localeconv().decimal_point)
|
|
inc(i)
|
|
while s[i] in {'0'..'9'}:
|
|
hasdigits = true
|
|
add(t, s[i])
|
|
inc(i)
|
|
while s[i] == '_': inc(i)
|
|
if not hasdigits:
|
|
return 0
|
|
|
|
# Exponent?
|
|
if s[i] in {'e', 'E'}:
|
|
add(t, s[i])
|
|
inc(i)
|
|
if s[i] in {'+', '-'}:
|
|
add(t, s[i])
|
|
inc(i)
|
|
if s[i] notin {'0'..'9'}:
|
|
return 0
|
|
while s[i] in {'0'..'9'}:
|
|
add(t, s[i])
|
|
inc(i)
|
|
while s[i] == '_': inc(i)
|
|
number = strtod(t, nil)
|
|
result = i - start
|
|
|
|
proc parseFloat*(s: string, number: var float, start = 0): int {.
|
|
rtl, extern: "npuParseFloat", noSideEffect.} =
|
|
## parses a float starting at `start` and stores the value into `number`.
|
|
## Result is the number of processed chars or 0 if there occured a parsing
|
|
## error.
|
|
var bf: BiggestFloat
|
|
result = parseBiggestFloat(s, bf, start)
|
|
number = bf
|
|
|
|
type
|
|
TInterpolatedKind* = enum ## describes for `interpolatedFragments`
|
|
## which part of the interpolated string is
|
|
## yielded; for example in "str$$$var${expr}"
|
|
ikStr, ## ``str`` part of the interpolated string
|
|
ikDollar, ## escaped ``$`` part of the interpolated string
|
|
ikVar, ## ``var`` part of the interpolated string
|
|
ikExpr ## ``expr`` part of the interpolated string
|
|
|
|
iterator interpolatedFragments*(s: string): tuple[kind: TInterpolatedKind,
|
|
value: string] =
|
|
## Tokenizes the string `s` into substrings for interpolation purposes.
|
|
##
|
|
## Example:
|
|
##
|
|
## .. code-block:: nimrod
|
|
## for k, v in interpolatedFragments(" $this is ${an example} $$"):
|
|
## echo "(", k, ", \"", v, "\")"
|
|
##
|
|
## Results in:
|
|
##
|
|
## .. code-block:: nimrod
|
|
## (ikString, " ")
|
|
## (ikExpr, "this")
|
|
## (ikString, " is ")
|
|
## (ikExpr, "an example")
|
|
## (ikString, " ")
|
|
## (ikDollar, "$")
|
|
var i = 0
|
|
var kind: TInterpolatedKind
|
|
while true:
|
|
var j = i
|
|
if s[j] == '$':
|
|
if s[j+1] == '{':
|
|
inc j, 2
|
|
var nesting = 0
|
|
while true:
|
|
case s[j]
|
|
of '{': inc nesting
|
|
of '}':
|
|
if nesting == 0:
|
|
inc j
|
|
break
|
|
dec nesting
|
|
of '\0':
|
|
raise newException(EInvalidValue,
|
|
"Expected closing '}': " & s[i..s.len])
|
|
else: discard
|
|
inc j
|
|
inc i, 2 # skip ${
|
|
kind = ikExpr
|
|
elif s[j+1] in IdentStartChars:
|
|
inc j, 2
|
|
while s[j] in IdentChars: inc(j)
|
|
inc i # skip $
|
|
kind = ikVar
|
|
elif s[j+1] == '$':
|
|
inc j, 2
|
|
inc i # skip $
|
|
kind = ikDollar
|
|
else:
|
|
raise newException(EInvalidValue,
|
|
"Unable to parse a varible name at " & s[i..s.len])
|
|
else:
|
|
while j < s.len and s[j] != '$': inc j
|
|
kind = ikStr
|
|
if j > i:
|
|
# do not copy the trailing } for ikExpr:
|
|
yield (kind, substr(s, i, j-1-ord(kind == ikExpr)))
|
|
else:
|
|
break
|
|
i = j
|
|
|
|
when isMainModule:
|
|
for k, v in interpolatedFragments("$test{} $this is ${an{ example}} "):
|
|
echo "(", k, ", \"", v, "\")"
|
|
var value = 0
|
|
discard parseHex("0x38", value)
|
|
assert value == 56
|
|
discard parseHex("0x34", value)
|
|
assert value == 56 * 256 + 52
|
|
value = -1
|
|
discard parseHex("0x38", value)
|
|
assert value == -200
|
|
|
|
|
|
{.pop.}
|