use hexchar in stdlib (#16290)

2026-02-12 14:23:45 +00:00 · 2020-12-17 06:41:05 -06:00
parent 8cd3655dee
commit e1e069dd6c
7 changed files with 68 additions and 75 deletions
--- a/lib/pure/parsecfg.nim
+++ b/lib/pure/parsecfg.nim
@@ -172,8 +172,8 @@ runnableExamples:
  doAssert dict.getSectionValue(section4, "does_that_mean_anything_special") == "False"
  doAssert dict.getSectionValue(section4, "purpose") == "formatting for readability"

-import
-  strutils, lexbase, streams, tables
+import strutils, lexbase, streams, tables
+import std/private/decode_helpers

 include "system/inclrtl"

@@ -247,20 +247,6 @@ proc getFilename*(c: CfgParser): string {.rtl, extern: "npc$1".} =
  ## Gets the filename of the file that the parser processes.
  result = c.filename

-proc handleHexChar(c: var CfgParser, xi: var int) =
-  case c.buf[c.bufpos]
-  of '0'..'9':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'))
-    inc(c.bufpos)
-  of 'a'..'f':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10)
-    inc(c.bufpos)
-  of 'A'..'F':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10)
-    inc(c.bufpos)
-  else:
-    discard
-
 proc handleDecChars(c: var CfgParser, xi: var int) =
  while c.buf[c.bufpos] in {'0'..'9'}:
    xi = (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0'))
@@ -305,8 +291,10 @@ proc getEscapedChar(c: var CfgParser, tok: var Token) =
  of 'x', 'X':
    inc(c.bufpos)
    var xi = 0
-    handleHexChar(c, xi)
-    handleHexChar(c, xi)
+    if handleHexChar(c.buf[c.bufpos], xi):
+      inc(c.bufpos)
+      if handleHexChar(c.buf[c.bufpos], xi):
+        inc(c.bufpos)
    add(tok.literal, chr(xi))
  of '0'..'9':
    var xi = 0
--- a/lib/pure/parsejson.nim
+++ b/lib/pure/parsejson.nim
@@ -11,8 +11,8 @@
 ## and exported by the ``json`` standard library
 ## module, but can also be used in its own right.

-import
-  strutils, lexbase, streams, unicode
+import strutils, lexbase, streams, unicode
+import std/private/decode_helpers

 type
  JsonEventKind* = enum ## enumeration of all events that may occur when parsing
@@ -162,18 +162,11 @@ proc errorMsgExpected*(my: JsonParser, e: string): string =
  result = "$1($2, $3) Error: $4" % [
    my.filename, $getLine(my), $getColumn(my), e & " expected"]

-proc handleHexChar(c: char, x: var int): bool =
-  result = true # Success
-  case c
-  of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
-  of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
-  of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
-  else: result = false # error
-
 proc parseEscapedUTF16*(buf: cstring, pos: var int): int =
  result = 0
  #UTF-16 escape is always 4 bytes.
  for _ in 0..3:
+    # if char in '0' .. '9', 'a' .. 'f', 'A' .. 'F'
    if handleHexChar(buf[pos], result):
      inc(pos)
    else:
--- a/lib/pure/parsesql.nim
+++ b/lib/pure/parsesql.nim
@@ -12,8 +12,8 @@
 ##
 ## Unstable API.

-import
-  strutils, lexbase
+import strutils, lexbase
+import std/private/decode_helpers

 # ------------------- scanner -------------------------------------------------

@@ -72,20 +72,6 @@ proc getColumn(L: SqlLexer): int =
 proc getLine(L: SqlLexer): int =
  result = L.lineNumber

-proc handleHexChar(c: var SqlLexer, xi: var int) =
-  case c.buf[c.bufpos]
-  of '0'..'9':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'))
-    inc(c.bufpos)
-  of 'a'..'f':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10)
-    inc(c.bufpos)
-  of 'A'..'F':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10)
-    inc(c.bufpos)
-  else:
-    discard
-
 proc handleOctChar(c: var SqlLexer, xi: var int) =
  if c.buf[c.bufpos] in {'0'..'7'}:
    xi = (xi shl 3) or (ord(c.buf[c.bufpos]) - ord('0'))
@@ -130,8 +116,10 @@ proc getEscapedChar(c: var SqlLexer, tok: var Token) =
  of 'x', 'X':
    inc(c.bufpos)
    var xi = 0
-    handleHexChar(c, xi)
-    handleHexChar(c, xi)
+    if handleHexChar(c.buf[c.bufpos], xi):
+      inc(c.bufpos)
+      if handleHexChar(c.buf[c.bufpos], xi):
+        inc(c.bufpos)
    add(tok.literal, chr(xi))
  of '0'..'7':
    var xi = 0
--- a/lib/pure/pegs.nim
+++ b/lib/pure/pegs.nim
@@ -21,6 +21,7 @@ const
  useUnicode = true ## change this to deactivate proper UTF-8 support

 import strutils, macros
+import std/private/decode_helpers

 when useUnicode:
  import unicode
@@ -1466,19 +1467,6 @@ proc errorStr(L: PegLexer, msg: string, line = -1, col = -1): string =
  var col = if col < 0: getColumn(L) else: col
  result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg]

-proc handleHexChar(c: var PegLexer, xi: var int) =
-  case c.buf[c.bufpos]
-  of '0'..'9':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'))
-    inc(c.bufpos)
-  of 'a'..'f':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10)
-    inc(c.bufpos)
-  of 'A'..'F':
-    xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10)
-    inc(c.bufpos)
-  else: discard
-
 proc getEscapedChar(c: var PegLexer, tok: var Token) =
  inc(c.bufpos)
  if c.bufpos >= len(c.buf):
@@ -1515,8 +1503,10 @@ proc getEscapedChar(c: var PegLexer, tok: var Token) =
      tok.kind = tkInvalid
      return
    var xi = 0
-    handleHexChar(c, xi)
-    handleHexChar(c, xi)
+    if handleHexChar(c.buf[c.bufpos], xi):
+      inc(c.bufpos)
+      if handleHexChar(c.buf[c.bufpos], xi):
+        inc(c.bufpos)
    if xi == 0: tok.kind = tkInvalid
    else: add(tok.literal, chr(xi))
  of '0'..'9':
--- a/lib/std/private/decode_helpers.nim
+++ b/lib/std/private/decode_helpers.nim
@@ -1,9 +1,25 @@
-proc handleHexChar*(c: char, x: var int, f: var bool) {.inline.} =
+proc handleHexChar*(c: char, x: var int): bool {.inline.} =
+  ## Converts `%xx` hexadecimal to the ordinal number and adds the result to `x`.
+  ## Returns `true` if `c` is hexadecimal.
+  ##
+  ## When `c` is hexadecimal, the proc is equal to `x = x shl 4 + hex2Int(c)`.
+  runnableExamples:
+    var x = 0
+    assert handleHexChar('a', x)
+    assert x == 10
+
+    assert handleHexChar('B', x)
+    assert x == 171 # 10 shl 4 + 11
+
+    assert not handleHexChar('?', x)
+    assert x == 171 # unchanged
+  result = true
  case c
  of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
  of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
  of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
-  else: f = true
+  else:
+    result = false

 proc decodePercent*(s: openArray[char], i: var int): char =
  ## Converts `%xx` hexadecimal to the character with ordinal number `xx`.
@@ -14,9 +30,6 @@ proc decodePercent*(s: openArray[char], i: var int): char =
  result = '%'
  if i+2 < s.len:
    var x = 0
-    var failed = false
-    handleHexChar(s[i+1], x, failed)
-    handleHexChar(s[i+2], x, failed)
-    if not failed:
+    if handleHexChar(s[i+1], x) and handleHexChar(s[i+2], x):
      result = chr(x)
      inc(i, 2)
--- a/nimsuggest/sexp.nim
+++ b/nimsuggest/sexp.nim
@@ -12,6 +12,8 @@
 import
  hashes, strutils, lexbase, streams, unicode, macros

+import std/private/decode_helpers
+
 type
  SexpEventKind* = enum  ## enumeration of all events that may occur when parsing
    sexpError,           ## an error occurred during parsing
@@ -113,14 +115,6 @@ proc errorMsgExpected*(my: SexpParser, e: string): string =
  ## other error messages
  result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), e & " expected"]

-proc handleHexChar(c: char, x: var int): bool =
-  result = true # Success
-  case c
-  of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
-  of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
-  of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
-  else: result = false # error
-
 proc parseString(my: var SexpParser): TTokKind =
  result = tkString
  var pos = my.bufpos + 1
--- a/tests/stdlib/tdecode_helpers.nim
+++ b/tests/stdlib/tdecode_helpers.nim
@@ -0,0 +1,27 @@
+import std/private/decode_helpers
+
+
+block:
+  var i = 0
+  let c = decodePercent("%t9", i)
+  doAssert (i, c) == (0, '%')
+
+block:
+  var i = 0
+  let c = decodePercent("19", i)
+  doAssert (i, c) == (0, '%')
+
+block:
+  var i = 0
+  let c = decodePercent("%19", i)
+  doAssert (i, c) == (2, '\x19')
+
+block:
+  var i = 0
+  let c = decodePercent("%A9", i)
+  doAssert (i, c) == (2, '\xA9')
+
+block:
+  var i = 0
+  let c = decodePercent("%Aa", i)
+  doAssert (i, c) == (2, '\xAA')