Add parse bin int, fixes #8018 (#8020)

* clarify `parseHexInt`, `parseOctInt` docstring and exception msgs

* add `parseBinInt` based on `parseutil.parseBin` implementation

Adds a `parseBinInt`, which parses a binary integer string and returns
it as an integer. This is based on the implementation of
`parseutil.parseBin`, removing the unnecessary parts.

* add tests for all `parse(Hex|Oct|Bin)Int` procs

* replace `parse*Int` proc impls by call to parseutil procs

Replaces the `parse(Hex|Oct|Bin)Int` procedure implementation by calls
to the `parseutil` procs, which receive a mutable argument.

Has the main advantage that the empty string as well as a "prefix
only" string, e.g. "0x" counts as an invalid integer.

Also moves the `parseOctInt` proc further up in the file so that all
`parse` procs are below one another.

* replace `var L` by `let L` in `parse` procs

There's no reason for the usage of `var` here.

* add `maxLen` optional arg for `parseutil.parse(Oct|Bin)`

Plus small change to test cases.

* update changelog about `parse*Int` procs

* fix `rejectParse` template in `tstrutils`

* make sure only `s.len` chars are parsed, if `maxLen+start` > s.len

Fixes a previous bug in `parseHex` (and now affected `parseOct` and
`parseBin`), which allowed to set `start + maxLen` to be larger than
the strings length. This resulted in an out of bounds access.

* move `parse*Int` proc change to breaking changes, add double `
This commit is contained in:
Vindaar
2018-06-13 19:32:12 +02:00
committed by Varriount
parent cd65ef0056
commit e80be6173d
4 changed files with 111 additions and 53 deletions

View File

@@ -48,6 +48,10 @@
- For string inputs, ``strutils.isUpperAscii`` and ``strutils.isLowerAscii`` now
require a second mandatory parameter ``skipNonAlpha``.
- The procs ``parseHexInt`` and ``parseOctInt`` now fail on empty strings
and strings containing only valid prefixes, e.g. "0x" for hex integers.
#### Breaking changes in the compiler
- The undocumented ``#? braces`` parsing mode was removed.
@@ -72,6 +76,8 @@
- Added the procs ``math.floorMod`` and ``math.floorDiv`` for floor based integer division.
- Added the procs ``rationals.`div```, ``rationals.`mod```, ``rationals.floorDiv`` and ``rationals.floorMod`` for rationals.
- Added the proc ``math.prod`` for product of elements in openArray.
- Added the proc ``parseBinInt`` to parse a binary integer from a string, which returns the value.
- ``parseOct`` and ``parseBin`` in parseutils now also support the ``maxLen`` argument similar to ``parseHexInt``
### Library changes
@@ -100,7 +106,6 @@
- Added the parameter ``val`` for the ``CritBitTree[T].incl`` proc.
- The proc ``tgamma`` was renamed to ``gamma``. ``tgamma`` is deprecated.
### Language additions
- Dot calls combined with explicit generic instantiations can now be written

View File

@@ -47,12 +47,14 @@ proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {.
## discard parseHex("0x38", value)
## assert value == -200
##
## If 'maxLen==0' the length of the hexadecimal number has no
## upper bound. Not more than ```maxLen`` characters are parsed.
## If ``maxLen == 0`` the length of the hexadecimal number has no upper bound.
## Else no more than ``start + maxLen`` characters are parsed, up to the
## length of the string.
var i = start
var foundDigit = false
let last = if maxLen == 0: s.len else: i+maxLen
if i+1 < last and s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
# get last index based on minimum `start + maxLen` or `s.len`
let last = min(s.len, if maxLen == 0: s.len else: i+maxLen)
if i+1 < last and s[i] == '0' and (s[i+1] in {'x', 'X'}): inc(i, 2)
elif i < last and s[i] == '#': inc(i)
while i < last:
case s[i]
@@ -70,14 +72,20 @@ proc parseHex*(s: string, number: var int, start = 0; maxLen = 0): int {.
inc(i)
if foundDigit: result = i-start
proc parseOct*(s: string, number: var int, start = 0): int {.
proc parseOct*(s: string, number: var int, start = 0, maxLen = 0): int {.
rtl, extern: "npuParseOct", noSideEffect.} =
## parses an octal number and stores its value in ``number``. Returns
## Parses an octal number and stores its value in ``number``. Returns
## the number of the parsed characters or 0 in case of an error.
##
## If ``maxLen == 0`` the length of the octal number has no upper bound.
## Else no more than ``start + maxLen`` characters are parsed, up to the
## length of the string.
var i = start
var foundDigit = false
if i+1 < s.len and s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
while i < s.len:
# get last index based on minimum `start + maxLen` or `s.len`
let last = min(s.len, if maxLen == 0: s.len else: i+maxLen)
if i+1 < last and s[i] == '0' and (s[i+1] in {'o', 'O'}): inc(i, 2)
while i < last:
case s[i]
of '_': discard
of '0'..'7':
@@ -87,14 +95,20 @@ proc parseOct*(s: string, number: var int, start = 0): int {.
inc(i)
if foundDigit: result = i-start
proc parseBin*(s: string, number: var int, start = 0): int {.
proc parseBin*(s: string, number: var int, start = 0, maxLen = 0): int {.
rtl, extern: "npuParseBin", noSideEffect.} =
## parses an binary number and stores its value in ``number``. Returns
## Parses an binary number and stores its value in ``number``. Returns
## the number of the parsed characters or 0 in case of an error.
##
## If ``maxLen == 0`` the length of the binary number has no upper bound.
## Else no more than ``start + maxLen`` characters are parsed, up to the
## length of the string.
var i = start
var foundDigit = false
if i+1 < s.len and s[i] == '0' and (s[i+1] == 'b' or s[i+1] == 'B'): inc(i, 2)
while i < s.len:
# get last index based on minimum `start + maxLen` or `s.len`
let last = min(s.len, if maxLen == 0: s.len else: i+maxLen)
if i+1 < last and s[i] == '0' and (s[i+1] in {'b', 'B'}): inc(i, 2)
while i < last:
case s[i]
of '_': discard
of '0'..'1':

View File

@@ -844,7 +844,7 @@ proc parseInt*(s: string): int {.noSideEffect, procvar,
## Parses a decimal integer value contained in `s`.
##
## If `s` is not a valid integer, `ValueError` is raised.
var L = parseutils.parseInt(s, result, 0)
let L = parseutils.parseInt(s, result, 0)
if L != s.len or L == 0:
raise newException(ValueError, "invalid integer: " & s)
@@ -853,7 +853,7 @@ proc parseBiggestInt*(s: string): BiggestInt {.noSideEffect, procvar,
## Parses a decimal integer value contained in `s`.
##
## If `s` is not a valid integer, `ValueError` is raised.
var L = parseutils.parseBiggestInt(s, result, 0)
let L = parseutils.parseBiggestInt(s, result, 0)
if L != s.len or L == 0:
raise newException(ValueError, "invalid integer: " & s)
@@ -862,7 +862,7 @@ proc parseUInt*(s: string): uint {.noSideEffect, procvar,
## Parses a decimal unsigned integer value contained in `s`.
##
## If `s` is not a valid integer, `ValueError` is raised.
var L = parseutils.parseUInt(s, result, 0)
let L = parseutils.parseUInt(s, result, 0)
if L != s.len or L == 0:
raise newException(ValueError, "invalid unsigned integer: " & s)
@@ -871,7 +871,7 @@ proc parseBiggestUInt*(s: string): BiggestUInt {.noSideEffect, procvar,
## Parses a decimal unsigned integer value contained in `s`.
##
## If `s` is not a valid integer, `ValueError` is raised.
var L = parseutils.parseBiggestUInt(s, result, 0)
let L = parseutils.parseBiggestUInt(s, result, 0)
if L != s.len or L == 0:
raise newException(ValueError, "invalid unsigned integer: " & s)
@@ -880,33 +880,42 @@ proc parseFloat*(s: string): float {.noSideEffect, procvar,
## Parses a decimal floating point value contained in `s`. If `s` is not
## a valid floating point number, `ValueError` is raised. ``NAN``,
## ``INF``, ``-INF`` are also supported (case insensitive comparison).
var L = parseutils.parseFloat(s, result, 0)
let L = parseutils.parseFloat(s, result, 0)
if L != s.len or L == 0:
raise newException(ValueError, "invalid float: " & s)
proc parseBinInt*(s: string): int {.noSideEffect, procvar,
rtl, extern: "nsuParseBinInt".} =
## Parses a binary integer value contained in `s`.
##
## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have
## one of the following optional prefixes: ``0b``, ``0B``. Underscores within
## `s` are ignored.
let L = parseutils.parseBin(s, result, 0)
if L != s.len or L == 0:
raise newException(ValueError, "invalid binary integer: " & s)
proc parseOctInt*(s: string): int {.noSideEffect,
rtl, extern: "nsuParseOctInt".} =
## Parses an octal integer value contained in `s`.
##
## If `s` is not a valid oct integer, `ValueError` is raised. `s` can have one
## of the following optional prefixes: ``0o``, ``0O``. Underscores within
## `s` are ignored.
let L = parseutils.parseOct(s, result, 0)
if L != s.len or L == 0:
raise newException(ValueError, "invalid oct integer: " & s)
proc parseHexInt*(s: string): int {.noSideEffect, procvar,
rtl, extern: "nsuParseHexInt".} =
## Parses a hexadecimal integer value contained in `s`.
##
## If `s` is not a valid integer, `ValueError` is raised. `s` can have one
## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have one
## of the following optional prefixes: ``0x``, ``0X``, ``#``. Underscores
## within `s` are ignored.
var i = 0
if i+1 < s.len and s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
elif i < s.len and s[i] == '#': inc(i)
while i < s.len:
case s[i]
of '_': inc(i)
of '0'..'9':
result = result shl 4 or (ord(s[i]) - ord('0'))
inc(i)
of 'a'..'f':
result = result shl 4 or (ord(s[i]) - ord('a') + 10)
inc(i)
of 'A'..'F':
result = result shl 4 or (ord(s[i]) - ord('A') + 10)
inc(i)
else: raise newException(ValueError, "invalid integer: " & s)
let L = parseutils.parseHex(s, result, 0)
if L != s.len or L == 0:
raise newException(ValueError, "invalid hex integer: " & s)
proc generateHexCharToValueMap(): string =
## Generate a string to map a hex digit to uint value
@@ -1616,23 +1625,6 @@ proc delete*(s: var string, first, last: int) {.noSideEffect,
inc(j)
setLen(s, newLen)
proc parseOctInt*(s: string): int {.noSideEffect,
rtl, extern: "nsuParseOctInt".} =
## Parses an octal integer value contained in `s`.
##
## If `s` is not a valid integer, `ValueError` is raised. `s` can have one
## of the following optional prefixes: ``0o``, ``0O``. Underscores within
## `s` are ignored.
var i = 0
if i+1 < s.len and s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
while i < s.len:
case s[i]
of '_': inc(i)
of '0'..'7':
result = result shl 3 or (ord(s[i]) - ord('0'))
inc(i)
else: raise newException(ValueError, "invalid integer: " & s)
proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect,
rtl, extern: "nsuToOct".} =
## Converts `x` into its octal representation.

View File

@@ -7,6 +7,14 @@ discard """
import
strutils
import macros
template rejectParse(e) =
try:
discard e
raise newException(AssertionError, "This was supposed to fail: $#!" % astToStr(e))
except ValueError: discard
proc testStrip() =
write(stdout, strip(" ha "))
@@ -148,7 +156,6 @@ proc testDelete =
delete(s, 0, 0)
assert s == "1236789ABCDEFG"
proc testIsAlphaNumeric =
assert isAlphaNumeric("abcdABC1234") == true
assert isAlphaNumeric("a") == true
@@ -203,10 +210,50 @@ proc testCountLines =
assertCountLines("\nabc\n123")
assertCountLines("\nabc\n123\n")
proc testParseInts =
# binary
assert "0b1111".parseBinInt == 15
assert "0B1111".parseBinInt == 15
assert "1111".parseBinInt == 15
assert "1110".parseBinInt == 14
assert "1_1_1_1".parseBinInt == 15
assert "0b1_1_1_1".parseBinInt == 15
rejectParse "".parseBinInt
rejectParse "_".parseBinInt
rejectParse "0b".parseBinInt
rejectParse "0b1234".parseBinInt
# hex
assert "0x72".parseHexInt == 114
assert "0X72".parseHexInt == 114
assert "#72".parseHexInt == 114
assert "72".parseHexInt == 114
assert "FF".parseHexInt == 255
assert "ff".parseHexInt == 255
assert "fF".parseHexInt == 255
assert "0x7_2".parseHexInt == 114
rejectParse "".parseHexInt
rejectParse "_".parseHexInt
rejectParse "0x".parseHexInt
rejectParse "0xFFG".parseHexInt
rejectParse "reject".parseHexInt
# octal
assert "0o17".parseOctInt == 15
assert "0O17".parseOctInt == 15
assert "17".parseOctInt == 15
assert "10".parseOctInt == 8
assert "0o1_0_0".parseOctInt == 64
rejectParse "".parseOctInt
rejectParse "_".parseOctInt
rejectParse "0o".parseOctInt
rejectParse "9".parseOctInt
rejectParse "0o9".parseOctInt
rejectParse "reject".parseOctInt
testDelete()
testFind()
testRFind()
testCountLines()
testParseInts()
assert(insertSep($1000_000) == "1_000_000")
assert(insertSep($232) == "232")