refactor cmpIgnoreStyle and cmpIgnoreCase (#16399)

* init * support strutils * more * better * Call len once per string/cstring * Change var to let * Compare ternary on first char * More appropriate param name * fix * better * one test * impl * more efficient * minor Co-authored-by: Clyybber <darkmine956@gmail.com>
2026-07-25 10:01:56 +00:00 · 2020-12-31 04:54:40 -06:00
parent 5984f7a7dd
commit 5fb56a3b2c
8 changed files with 141 additions and 147 deletions
--- a/lib/core/macros.nim
+++ b/lib/core/macros.nim
@@ -1389,25 +1389,10 @@ when defined(nimVmEqIdent):
    ## these nodes will be unwrapped.

 else:
+  from std/private/strimpl import cmpIgnoreStyleImpl
  # this procedure is optimized for native code, it should not be compiled to nimVM bytecode.
  proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
-    proc toLower(c: char): char {.inline.} =
-      if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
-      else: result = c
-    var i = 0
-    var j = 0
-    # first char is case sensitive
-    if a[0] != b[0]: return 1
-    while true:
-      while a[i] == '_': inc(i)
-      while b[j] == '_': inc(j) # BUGFIX: typo
-      var aa = toLower(a[i])
-      var bb = toLower(b[j])
-      result = ord(aa) - ord(bb)
-      if result != 0 or aa == '\0': break
-      inc(i)
-      inc(j)
-
+    cmpIgnoreStyleImpl(a, b, true)

  proc eqIdent*(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
    ## Check if two idents are equal.
--- a/lib/core/typeinfo.nim
+++ b/lib/core/typeinfo.nim
@@ -91,6 +91,8 @@ when not defined(gcDestructors):
 else:
  include system/seqs_v2_reimpl

+from std/private/strimpl import cmpIgnoreStyleImpl
+
 when not defined(js):
  template rawType(x: Any): PNimType =
    cast[PNimType](x.rawTypePtr)
@@ -366,36 +368,22 @@ iterator fields*(x: Any): tuple[name: string, any: Any] =
  for name, any in items(ret):
    yield ($name, any)

-proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
-  proc toLower(c: char): char {.inline.} =
-    if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
-    else: result = c
-  var i = 0
-  var j = 0
-  if a[0] != b[0]: return 1
-  while true:
-    while a[i] == '_': inc(i)
-    while b[j] == '_': inc(j) # BUGFIX: typo
-    var aa = toLower(a[i])
-    var bb = toLower(b[j])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
-    inc(j)
+proc cmpNimIdentifier(a, b: cstring): int {.noSideEffect.} =
+  cmpIgnoreStyleImpl(a, b, true)

 proc getFieldNode(p: pointer, n: ptr TNimNode,
                  name: cstring): ptr TNimNode =
  case n.kind
  of nkNone: assert(false)
  of nkSlot:
-    if cmpIgnoreStyle(n.name, name) == 0:
+    if cmpNimIdentifier(n.name, name) == 0:
      result = n
  of nkList:
    for i in 0..n.len-1:
      result = getFieldNode(p, n.sons[i], name)
      if result != nil: break
  of nkCase:
-    if cmpIgnoreStyle(n.name, name) == 0:
+    if cmpNimIdentifier(n.name, name) == 0:
      result = n
    else:
      var m = selectBranch(p, n)
@@ -599,7 +587,7 @@ proc getEnumOrdinal*(x: Any, name: string): int =
  var n = typ.node
  var s = n.sons
  for i in 0 .. n.len-1:
-    if cmpIgnoreStyle($s[i].name, name) == 0:
+    if cmpNimIdentifier($s[i].name, name) == 0:
      if ntfEnumHole notin typ.flags:
        return i
      else:
--- a/lib/pure/cstrutils.nim
+++ b/lib/pure/cstrutils.nim
@@ -12,12 +12,8 @@
 ## save allocations.

 include "system/inclrtl"
+import std/private/strimpl

-proc toLowerAscii(c: char): char {.inline.} =
-  if c in {'A'..'Z'}:
-    result = chr(ord(c) + (ord('a') - ord('A')))
-  else:
-    result = c

 when defined(js):
  proc startsWith*(s, prefix: cstring): bool {.noSideEffect,
@@ -25,7 +21,13 @@ when defined(js):

  proc endsWith*(s, suffix: cstring): bool {.noSideEffect,
    importjs: "#.endsWith(#)".}
-  
+
+  proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect.} =
+    cmpIgnoreStyleImpl(a, b)
+
+  proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect.} =
+    cmpIgnoreCaseImpl(a, b)
+
  # JS string has more operations that might warrant its own module:
  # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String
 else:
@@ -57,45 +59,39 @@ else:
      inc(i)
    if suffix[i] == '\0': return true

-proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
-  rtl, extern: "csuCmpIgnoreStyle".} =
-  ## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
-  ## is just optimized to not allocate temporary strings.  This should
-  ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
-  ## for that.  Returns:
-  ##
-  ## | 0 if a == b
-  ## | < 0 if a < b
-  ## | > 0 if a > b
-  ## 
-  ## Not supported for JS backend, use `strutils.cmpIgnoreStyle
-  ## <strutils.html#cmpIgnoreStyle%2Cstring%2Cstring>`_ instead.
-  var i = 0
-  var j = 0
-  while true:
-    while a[i] == '_': inc(i)
-    while b[j] == '_': inc(j) # BUGFIX: typo
-    var aa = toLowerAscii(a[i])
-    var bb = toLowerAscii(b[j])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
-    inc(j)
+  proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
+    rtl, extern: "csuCmpIgnoreStyle".} =
+    ## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
+    ## is just optimized to not allocate temporary strings.  This should
+    ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
+    ## for that. Returns:
+    ##
+    ## | 0 if a == b
+    ## | < 0 if a < b
+    ## | > 0 if a > b
+    var i = 0
+    var j = 0
+    while true:
+      while a[i] == '_': inc(i)
+      while b[j] == '_': inc(j) # BUGFIX: typo
+      var aa = toLowerAscii(a[i])
+      var bb = toLowerAscii(b[j])
+      result = ord(aa) - ord(bb)
+      if result != 0 or aa == '\0': break
+      inc(i)
+      inc(j)

-proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
-  rtl, extern: "csuCmpIgnoreCase".} =
-  ## Compares two strings in a case insensitive manner. Returns:
-  ##
-  ## | 0 if a == b
-  ## | < 0 if a < b
-  ## | > 0 if a > b
-  ## 
-  ## Not supported for JS backend, use `strutils.cmpIgnoreCase
-  ## <strutils.html#cmpIgnoreCase%2Cstring%2Cstring>`_ instead.
-  var i = 0
-  while true:
-    var aa = toLowerAscii(a[i])
-    var bb = toLowerAscii(b[i])
-    result = ord(aa) - ord(bb)
-    if result != 0 or aa == '\0': break
-    inc(i)
+  proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
+    rtl, extern: "csuCmpIgnoreCase".} =
+    ## Compares two strings in a case insensitive manner. Returns:
+    ##
+    ## | 0 if a == b
+    ## | < 0 if a < b
+    ## | > 0 if a > b
+    var i = 0
+    while true:
+      var aa = toLowerAscii(a[i])
+      var bb = toLowerAscii(b[i])
+      result = ord(aa) - ord(bb)
+      if result != 0 or aa == '\0': break
+      inc(i)
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -81,6 +81,8 @@ when defined(nimVmExportFixed):

 include "system/inclrtl"
 import std/private/since
+from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl
+

 const
  Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
@@ -319,13 +321,7 @@ func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} =
    doAssert cmpIgnoreCase("FooBar", "foobar") == 0
    doAssert cmpIgnoreCase("bar", "Foo") < 0
    doAssert cmpIgnoreCase("Foo5", "foo4") > 0
-  var i = 0
-  var m = min(a.len, b.len)
-  while i < m:
-    result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
-    if result != 0: return
-    inc(i)
-  result = a.len - b.len
+  cmpIgnoreCaseImpl(a, b)

 {.push checks: off, line_trace: off.} # this is a hot-spot in the compiler!
                                      # thus we compile without checks here
@@ -344,25 +340,7 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
  runnableExamples:
    doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
    doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
-  var i = 0
-  var j = 0
-  while true:
-    while i < a.len and a[i] == '_': inc i
-    while j < b.len and b[j] == '_': inc j
-    var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
-    var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
-    result = ord(aa) - ord(bb)
-    if result != 0: return result
-    # the characters are identical:
-    if i >= a.len:
-      # both cursors at the end:
-      if j >= b.len: return 0
-      # not yet at the end of 'b':
-      return -1
-    elif j >= b.len:
-      return 1
-    inc i
-    inc j
+  cmpIgnoreStyleImpl(a, b)
 {.pop.}

 # --------- Private templates for different split separators -----------
--- a/lib/std/private/strimpl.nim
+++ b/lib/std/private/strimpl.nim
@@ -0,0 +1,53 @@
+func toLowerAscii*(c: char): char {.inline.} =
+  if c in {'A'..'Z'}:
+    result = chr(ord(c) + (ord('a') - ord('A')))
+  else:
+    result = c
+
+template firstCharCaseSensitiveImpl(a, b: typed, aLen, bLen: int) =
+  if aLen == 0 or bLen == 0:
+    return aLen - bLen
+  if a[0] != b[0]: return ord(a[0]) - ord(b[0])
+
+template cmpIgnoreStyleImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
+  # a, b are string or cstring
+  let aLen = a.len
+  let bLen = b.len
+  var i = 0
+  var j = 0
+  when firstCharCaseSensitive:
+    firstCharCaseSensitiveImpl(a, b, aLen, bLen)
+    inc i
+    inc j
+  while true:
+    while i < aLen and a[i] == '_': inc i
+    while j < bLen and b[j] == '_': inc j
+    let aa = if i < aLen: toLowerAscii(a[i]) else: '\0'
+    let bb = if j < bLen: toLowerAscii(b[j]) else: '\0'
+    result = ord(aa) - ord(bb)
+    if result != 0: return result
+    # the characters are identical:
+    if i >= aLen:
+      # both cursors at the end:
+      if j >= bLen: return 0
+      # not yet at the end of 'b':
+      return -1
+    elif j >= bLen:
+      return 1
+    inc i
+    inc j
+
+template cmpIgnoreCaseImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
+  # a, b are string or cstring
+  let aLen = a.len
+  let bLen = b.len
+  var i = 0
+  when firstCharCaseSensitive:
+    firstCharCaseSensitiveImpl(a, b, aLen, bLen)
+    inc i
+  var m = min(aLen, bLen)
+  while i < m:
+    result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
+    if result != 0: return
+    inc i
+  result = aLen - bLen
--- a/tests/js/tstdlib_various.nim
+++ b/tests/js/tstdlib_various.nim
@@ -29,7 +29,7 @@ Hi Andreas! How do you feel, Rumpf?
 """

 import
-  critbits, cstrutils, sets, strutils, tables, random, algorithm, ropes,
+  critbits, sets, strutils, tables, random, algorithm, ropes,
  lists, htmlgen, xmltree, strtabs


@@ -177,18 +177,3 @@ block txmltree:
    ])
  ])
  doAssert(y.innerText == "foobar")
-
-
-
-block tcstrutils:
-  let s = cstring "abcdef"
-  doAssert s.startsWith("a")
-  doAssert not s.startsWith("b")
-  doAssert s.endsWith("f")
-  doAssert not s.endsWith("a")
-
-  let a = cstring "abracadabra"
-  doAssert a.startsWith("abra")
-  doAssert not a.startsWith("bra")
-  doAssert a.endsWith("abra")
-  doAssert not a.endsWith("dab")
--- a/tests/stdlib/tcstrutils.nim
+++ b/tests/stdlib/tcstrutils.nim
@@ -0,0 +1,30 @@
+discard """
+  targets: "c cpp js"
+"""
+
+import cstrutils
+
+
+block tcstrutils:
+  let s = cstring "abcdef"
+  doAssert s.startsWith("a")
+  doAssert not s.startsWith("b")
+  doAssert s.endsWith("f")
+  doAssert not s.endsWith("a")
+
+  let a = cstring "abracadabra"
+  doAssert a.startsWith("abra")
+  doAssert not a.startsWith("bra")
+  doAssert a.endsWith("abra")
+  doAssert not a.endsWith("dab")
+
+  doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
+  doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
+  doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
+
+  doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
+  doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0
+
+  doAssert cmpIgnoreCase(cstring "", cstring "") == 0
+  doAssert cmpIgnoreCase(cstring "", cstring "Hello") < 0
+  doAssert cmpIgnoreCase(cstring "wind", cstring "") > 0
--- a/tests/stdlib/tstdlib_various.nim
+++ b/tests/stdlib/tstdlib_various.nim
@@ -38,7 +38,7 @@ true
 """

 import
-  critbits, cstrutils, sets, strutils, tables, random, algorithm, re, ropes,
+  critbits, sets, strutils, tables, random, algorithm, re, ropes,
  segfaults, lists, parsesql, streams, os, htmlgen, xmltree, strtabs


@@ -245,24 +245,3 @@ block txmltree:
    ])
  ])
  doAssert(y.innerText == "foobar")
-
-
-block tcstrutils:
-  let s = cstring "abcdef"
-  doAssert s.startsWith("a")
-  doAssert not s.startsWith("b")
-  doAssert s.endsWith("f")
-  doAssert not s.endsWith("a")
-
-  let a = cstring "abracadabra"
-  doAssert a.startsWith("abra")
-  doAssert not a.startsWith("bra")
-  doAssert a.endsWith("abra")
-  doAssert not a.endsWith("dab")
-
-  doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
-  doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
-  doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
-
-  doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
-  doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0