refactor cmpIgnoreStyle and cmpIgnoreCase (#16399)

* init

* support strutils

* more

* better

* Call len once per string/cstring

* Change var to let

* Compare ternary on first char

* More appropriate param name

* fix

* better

* one test

* impl

* more efficient

* minor

Co-authored-by: Clyybber <darkmine956@gmail.com>
This commit is contained in:
flywind
2020-12-31 04:54:40 -06:00
committed by GitHub
parent 5984f7a7dd
commit 5fb56a3b2c
8 changed files with 141 additions and 147 deletions

View File

@@ -1389,25 +1389,10 @@ when defined(nimVmEqIdent):
## these nodes will be unwrapped.
else:
from std/private/strimpl import cmpIgnoreStyleImpl
# this procedure is optimized for native code, it should not be compiled to nimVM bytecode.
proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
proc toLower(c: char): char {.inline.} =
if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
else: result = c
var i = 0
var j = 0
# first char is case sensitive
if a[0] != b[0]: return 1
while true:
while a[i] == '_': inc(i)
while b[j] == '_': inc(j) # BUGFIX: typo
var aa = toLower(a[i])
var bb = toLower(b[j])
result = ord(aa) - ord(bb)
if result != 0 or aa == '\0': break
inc(i)
inc(j)
cmpIgnoreStyleImpl(a, b, true)
proc eqIdent*(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
## Check if two idents are equal.

View File

@@ -91,6 +91,8 @@ when not defined(gcDestructors):
else:
include system/seqs_v2_reimpl
from std/private/strimpl import cmpIgnoreStyleImpl
when not defined(js):
template rawType(x: Any): PNimType =
cast[PNimType](x.rawTypePtr)
@@ -366,36 +368,22 @@ iterator fields*(x: Any): tuple[name: string, any: Any] =
for name, any in items(ret):
yield ($name, any)
proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
proc toLower(c: char): char {.inline.} =
if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
else: result = c
var i = 0
var j = 0
if a[0] != b[0]: return 1
while true:
while a[i] == '_': inc(i)
while b[j] == '_': inc(j) # BUGFIX: typo
var aa = toLower(a[i])
var bb = toLower(b[j])
result = ord(aa) - ord(bb)
if result != 0 or aa == '\0': break
inc(i)
inc(j)
proc cmpNimIdentifier(a, b: cstring): int {.noSideEffect.} =
cmpIgnoreStyleImpl(a, b, true)
proc getFieldNode(p: pointer, n: ptr TNimNode,
name: cstring): ptr TNimNode =
case n.kind
of nkNone: assert(false)
of nkSlot:
if cmpIgnoreStyle(n.name, name) == 0:
if cmpNimIdentifier(n.name, name) == 0:
result = n
of nkList:
for i in 0..n.len-1:
result = getFieldNode(p, n.sons[i], name)
if result != nil: break
of nkCase:
if cmpIgnoreStyle(n.name, name) == 0:
if cmpNimIdentifier(n.name, name) == 0:
result = n
else:
var m = selectBranch(p, n)
@@ -599,7 +587,7 @@ proc getEnumOrdinal*(x: Any, name: string): int =
var n = typ.node
var s = n.sons
for i in 0 .. n.len-1:
if cmpIgnoreStyle($s[i].name, name) == 0:
if cmpNimIdentifier($s[i].name, name) == 0:
if ntfEnumHole notin typ.flags:
return i
else:

View File

@@ -12,12 +12,8 @@
## save allocations.
include "system/inclrtl"
import std/private/strimpl
proc toLowerAscii(c: char): char {.inline.} =
if c in {'A'..'Z'}:
result = chr(ord(c) + (ord('a') - ord('A')))
else:
result = c
when defined(js):
proc startsWith*(s, prefix: cstring): bool {.noSideEffect,
@@ -25,7 +21,13 @@ when defined(js):
proc endsWith*(s, suffix: cstring): bool {.noSideEffect,
importjs: "#.endsWith(#)".}
proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect.} =
cmpIgnoreStyleImpl(a, b)
proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect.} =
cmpIgnoreCaseImpl(a, b)
# JS string has more operations that might warrant its own module:
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String
else:
@@ -57,45 +59,39 @@ else:
inc(i)
if suffix[i] == '\0': return true
proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
rtl, extern: "csuCmpIgnoreStyle".} =
## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
## is just optimized to not allocate temporary strings. This should
## NOT be used to compare Nim identifier names. use `macros.eqIdent`
## for that. Returns:
##
## | 0 if a == b
## | < 0 if a < b
## | > 0 if a > b
##
## Not supported for JS backend, use `strutils.cmpIgnoreStyle
## <strutils.html#cmpIgnoreStyle%2Cstring%2Cstring>`_ instead.
var i = 0
var j = 0
while true:
while a[i] == '_': inc(i)
while b[j] == '_': inc(j) # BUGFIX: typo
var aa = toLowerAscii(a[i])
var bb = toLowerAscii(b[j])
result = ord(aa) - ord(bb)
if result != 0 or aa == '\0': break
inc(i)
inc(j)
proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
rtl, extern: "csuCmpIgnoreStyle".} =
## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
## is just optimized to not allocate temporary strings. This should
## NOT be used to compare Nim identifier names. use `macros.eqIdent`
## for that. Returns:
##
## | 0 if a == b
## | < 0 if a < b
## | > 0 if a > b
var i = 0
var j = 0
while true:
while a[i] == '_': inc(i)
while b[j] == '_': inc(j) # BUGFIX: typo
var aa = toLowerAscii(a[i])
var bb = toLowerAscii(b[j])
result = ord(aa) - ord(bb)
if result != 0 or aa == '\0': break
inc(i)
inc(j)
proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
rtl, extern: "csuCmpIgnoreCase".} =
## Compares two strings in a case insensitive manner. Returns:
##
## | 0 if a == b
## | < 0 if a < b
## | > 0 if a > b
##
## Not supported for JS backend, use `strutils.cmpIgnoreCase
## <strutils.html#cmpIgnoreCase%2Cstring%2Cstring>`_ instead.
var i = 0
while true:
var aa = toLowerAscii(a[i])
var bb = toLowerAscii(b[i])
result = ord(aa) - ord(bb)
if result != 0 or aa == '\0': break
inc(i)
proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
rtl, extern: "csuCmpIgnoreCase".} =
## Compares two strings in a case insensitive manner. Returns:
##
## | 0 if a == b
## | < 0 if a < b
## | > 0 if a > b
var i = 0
while true:
var aa = toLowerAscii(a[i])
var bb = toLowerAscii(b[i])
result = ord(aa) - ord(bb)
if result != 0 or aa == '\0': break
inc(i)

View File

@@ -81,6 +81,8 @@ when defined(nimVmExportFixed):
include "system/inclrtl"
import std/private/since
from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl
const
Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
@@ -319,13 +321,7 @@ func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} =
doAssert cmpIgnoreCase("FooBar", "foobar") == 0
doAssert cmpIgnoreCase("bar", "Foo") < 0
doAssert cmpIgnoreCase("Foo5", "foo4") > 0
var i = 0
var m = min(a.len, b.len)
while i < m:
result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
if result != 0: return
inc(i)
result = a.len - b.len
cmpIgnoreCaseImpl(a, b)
{.push checks: off, line_trace: off.} # this is a hot-spot in the compiler!
# thus we compile without checks here
@@ -344,25 +340,7 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
runnableExamples:
doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
var i = 0
var j = 0
while true:
while i < a.len and a[i] == '_': inc i
while j < b.len and b[j] == '_': inc j
var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
result = ord(aa) - ord(bb)
if result != 0: return result
# the characters are identical:
if i >= a.len:
# both cursors at the end:
if j >= b.len: return 0
# not yet at the end of 'b':
return -1
elif j >= b.len:
return 1
inc i
inc j
cmpIgnoreStyleImpl(a, b)
{.pop.}
# --------- Private templates for different split separators -----------

View File

@@ -0,0 +1,53 @@
func toLowerAscii*(c: char): char {.inline.} =
if c in {'A'..'Z'}:
result = chr(ord(c) + (ord('a') - ord('A')))
else:
result = c
template firstCharCaseSensitiveImpl(a, b: typed, aLen, bLen: int) =
if aLen == 0 or bLen == 0:
return aLen - bLen
if a[0] != b[0]: return ord(a[0]) - ord(b[0])
template cmpIgnoreStyleImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
# a, b are string or cstring
let aLen = a.len
let bLen = b.len
var i = 0
var j = 0
when firstCharCaseSensitive:
firstCharCaseSensitiveImpl(a, b, aLen, bLen)
inc i
inc j
while true:
while i < aLen and a[i] == '_': inc i
while j < bLen and b[j] == '_': inc j
let aa = if i < aLen: toLowerAscii(a[i]) else: '\0'
let bb = if j < bLen: toLowerAscii(b[j]) else: '\0'
result = ord(aa) - ord(bb)
if result != 0: return result
# the characters are identical:
if i >= aLen:
# both cursors at the end:
if j >= bLen: return 0
# not yet at the end of 'b':
return -1
elif j >= bLen:
return 1
inc i
inc j
template cmpIgnoreCaseImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
# a, b are string or cstring
let aLen = a.len
let bLen = b.len
var i = 0
when firstCharCaseSensitive:
firstCharCaseSensitiveImpl(a, b, aLen, bLen)
inc i
var m = min(aLen, bLen)
while i < m:
result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
if result != 0: return
inc i
result = aLen - bLen

View File

@@ -29,7 +29,7 @@ Hi Andreas! How do you feel, Rumpf?
"""
import
critbits, cstrutils, sets, strutils, tables, random, algorithm, ropes,
critbits, sets, strutils, tables, random, algorithm, ropes,
lists, htmlgen, xmltree, strtabs
@@ -177,18 +177,3 @@ block txmltree:
])
])
doAssert(y.innerText == "foobar")
block tcstrutils:
let s = cstring "abcdef"
doAssert s.startsWith("a")
doAssert not s.startsWith("b")
doAssert s.endsWith("f")
doAssert not s.endsWith("a")
let a = cstring "abracadabra"
doAssert a.startsWith("abra")
doAssert not a.startsWith("bra")
doAssert a.endsWith("abra")
doAssert not a.endsWith("dab")

View File

@@ -0,0 +1,30 @@
discard """
targets: "c cpp js"
"""
import cstrutils
block tcstrutils:
let s = cstring "abcdef"
doAssert s.startsWith("a")
doAssert not s.startsWith("b")
doAssert s.endsWith("f")
doAssert not s.endsWith("a")
let a = cstring "abracadabra"
doAssert a.startsWith("abra")
doAssert not a.startsWith("bra")
doAssert a.endsWith("abra")
doAssert not a.endsWith("dab")
doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0
doAssert cmpIgnoreCase(cstring "", cstring "") == 0
doAssert cmpIgnoreCase(cstring "", cstring "Hello") < 0
doAssert cmpIgnoreCase(cstring "wind", cstring "") > 0

View File

@@ -38,7 +38,7 @@ true
"""
import
critbits, cstrutils, sets, strutils, tables, random, algorithm, re, ropes,
critbits, sets, strutils, tables, random, algorithm, re, ropes,
segfaults, lists, parsesql, streams, os, htmlgen, xmltree, strtabs
@@ -245,24 +245,3 @@ block txmltree:
])
])
doAssert(y.innerText == "foobar")
block tcstrutils:
let s = cstring "abcdef"
doAssert s.startsWith("a")
doAssert not s.startsWith("b")
doAssert s.endsWith("f")
doAssert not s.endsWith("a")
let a = cstring "abracadabra"
doAssert a.startsWith("abra")
doAssert not a.startsWith("bra")
doAssert a.endsWith("abra")
doAssert not a.endsWith("dab")
doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0