mirror of
https://github.com/nim-lang/Nim.git
synced 2026-02-14 23:33:28 +00:00
refactor cmpIgnoreStyle and cmpIgnoreCase (#16399)
* init * support strutils * more * better * Call len once per string/cstring * Change var to let * Compare ternary on first char * More appropriate param name * fix * better * one test * impl * more efficient * minor Co-authored-by: Clyybber <darkmine956@gmail.com>
This commit is contained in:
@@ -1389,25 +1389,10 @@ when defined(nimVmEqIdent):
|
||||
## these nodes will be unwrapped.
|
||||
|
||||
else:
|
||||
from std/private/strimpl import cmpIgnoreStyleImpl
|
||||
# this procedure is optimized for native code, it should not be compiled to nimVM bytecode.
|
||||
proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
|
||||
proc toLower(c: char): char {.inline.} =
|
||||
if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
|
||||
else: result = c
|
||||
var i = 0
|
||||
var j = 0
|
||||
# first char is case sensitive
|
||||
if a[0] != b[0]: return 1
|
||||
while true:
|
||||
while a[i] == '_': inc(i)
|
||||
while b[j] == '_': inc(j) # BUGFIX: typo
|
||||
var aa = toLower(a[i])
|
||||
var bb = toLower(b[j])
|
||||
result = ord(aa) - ord(bb)
|
||||
if result != 0 or aa == '\0': break
|
||||
inc(i)
|
||||
inc(j)
|
||||
|
||||
cmpIgnoreStyleImpl(a, b, true)
|
||||
|
||||
proc eqIdent*(a, b: string): bool = cmpIgnoreStyle(a, b) == 0
|
||||
## Check if two idents are equal.
|
||||
|
||||
@@ -91,6 +91,8 @@ when not defined(gcDestructors):
|
||||
else:
|
||||
include system/seqs_v2_reimpl
|
||||
|
||||
from std/private/strimpl import cmpIgnoreStyleImpl
|
||||
|
||||
when not defined(js):
|
||||
template rawType(x: Any): PNimType =
|
||||
cast[PNimType](x.rawTypePtr)
|
||||
@@ -366,36 +368,22 @@ iterator fields*(x: Any): tuple[name: string, any: Any] =
|
||||
for name, any in items(ret):
|
||||
yield ($name, any)
|
||||
|
||||
proc cmpIgnoreStyle(a, b: cstring): int {.noSideEffect.} =
|
||||
proc toLower(c: char): char {.inline.} =
|
||||
if c in {'A'..'Z'}: result = chr(ord(c) + (ord('a') - ord('A')))
|
||||
else: result = c
|
||||
var i = 0
|
||||
var j = 0
|
||||
if a[0] != b[0]: return 1
|
||||
while true:
|
||||
while a[i] == '_': inc(i)
|
||||
while b[j] == '_': inc(j) # BUGFIX: typo
|
||||
var aa = toLower(a[i])
|
||||
var bb = toLower(b[j])
|
||||
result = ord(aa) - ord(bb)
|
||||
if result != 0 or aa == '\0': break
|
||||
inc(i)
|
||||
inc(j)
|
||||
proc cmpNimIdentifier(a, b: cstring): int {.noSideEffect.} =
|
||||
cmpIgnoreStyleImpl(a, b, true)
|
||||
|
||||
proc getFieldNode(p: pointer, n: ptr TNimNode,
|
||||
name: cstring): ptr TNimNode =
|
||||
case n.kind
|
||||
of nkNone: assert(false)
|
||||
of nkSlot:
|
||||
if cmpIgnoreStyle(n.name, name) == 0:
|
||||
if cmpNimIdentifier(n.name, name) == 0:
|
||||
result = n
|
||||
of nkList:
|
||||
for i in 0..n.len-1:
|
||||
result = getFieldNode(p, n.sons[i], name)
|
||||
if result != nil: break
|
||||
of nkCase:
|
||||
if cmpIgnoreStyle(n.name, name) == 0:
|
||||
if cmpNimIdentifier(n.name, name) == 0:
|
||||
result = n
|
||||
else:
|
||||
var m = selectBranch(p, n)
|
||||
@@ -599,7 +587,7 @@ proc getEnumOrdinal*(x: Any, name: string): int =
|
||||
var n = typ.node
|
||||
var s = n.sons
|
||||
for i in 0 .. n.len-1:
|
||||
if cmpIgnoreStyle($s[i].name, name) == 0:
|
||||
if cmpNimIdentifier($s[i].name, name) == 0:
|
||||
if ntfEnumHole notin typ.flags:
|
||||
return i
|
||||
else:
|
||||
|
||||
@@ -12,12 +12,8 @@
|
||||
## save allocations.
|
||||
|
||||
include "system/inclrtl"
|
||||
import std/private/strimpl
|
||||
|
||||
proc toLowerAscii(c: char): char {.inline.} =
|
||||
if c in {'A'..'Z'}:
|
||||
result = chr(ord(c) + (ord('a') - ord('A')))
|
||||
else:
|
||||
result = c
|
||||
|
||||
when defined(js):
|
||||
proc startsWith*(s, prefix: cstring): bool {.noSideEffect,
|
||||
@@ -25,7 +21,13 @@ when defined(js):
|
||||
|
||||
proc endsWith*(s, suffix: cstring): bool {.noSideEffect,
|
||||
importjs: "#.endsWith(#)".}
|
||||
|
||||
|
||||
proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect.} =
|
||||
cmpIgnoreStyleImpl(a, b)
|
||||
|
||||
proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect.} =
|
||||
cmpIgnoreCaseImpl(a, b)
|
||||
|
||||
# JS string has more operations that might warrant its own module:
|
||||
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String
|
||||
else:
|
||||
@@ -57,45 +59,39 @@ else:
|
||||
inc(i)
|
||||
if suffix[i] == '\0': return true
|
||||
|
||||
proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
|
||||
rtl, extern: "csuCmpIgnoreStyle".} =
|
||||
## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
|
||||
## is just optimized to not allocate temporary strings. This should
|
||||
## NOT be used to compare Nim identifier names. use `macros.eqIdent`
|
||||
## for that. Returns:
|
||||
##
|
||||
## | 0 if a == b
|
||||
## | < 0 if a < b
|
||||
## | > 0 if a > b
|
||||
##
|
||||
## Not supported for JS backend, use `strutils.cmpIgnoreStyle
|
||||
## <strutils.html#cmpIgnoreStyle%2Cstring%2Cstring>`_ instead.
|
||||
var i = 0
|
||||
var j = 0
|
||||
while true:
|
||||
while a[i] == '_': inc(i)
|
||||
while b[j] == '_': inc(j) # BUGFIX: typo
|
||||
var aa = toLowerAscii(a[i])
|
||||
var bb = toLowerAscii(b[j])
|
||||
result = ord(aa) - ord(bb)
|
||||
if result != 0 or aa == '\0': break
|
||||
inc(i)
|
||||
inc(j)
|
||||
proc cmpIgnoreStyle*(a, b: cstring): int {.noSideEffect,
|
||||
rtl, extern: "csuCmpIgnoreStyle".} =
|
||||
## Semantically the same as ``cmp(normalize($a), normalize($b))``. It
|
||||
## is just optimized to not allocate temporary strings. This should
|
||||
## NOT be used to compare Nim identifier names. use `macros.eqIdent`
|
||||
## for that. Returns:
|
||||
##
|
||||
## | 0 if a == b
|
||||
## | < 0 if a < b
|
||||
## | > 0 if a > b
|
||||
var i = 0
|
||||
var j = 0
|
||||
while true:
|
||||
while a[i] == '_': inc(i)
|
||||
while b[j] == '_': inc(j) # BUGFIX: typo
|
||||
var aa = toLowerAscii(a[i])
|
||||
var bb = toLowerAscii(b[j])
|
||||
result = ord(aa) - ord(bb)
|
||||
if result != 0 or aa == '\0': break
|
||||
inc(i)
|
||||
inc(j)
|
||||
|
||||
proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
|
||||
rtl, extern: "csuCmpIgnoreCase".} =
|
||||
## Compares two strings in a case insensitive manner. Returns:
|
||||
##
|
||||
## | 0 if a == b
|
||||
## | < 0 if a < b
|
||||
## | > 0 if a > b
|
||||
##
|
||||
## Not supported for JS backend, use `strutils.cmpIgnoreCase
|
||||
## <strutils.html#cmpIgnoreCase%2Cstring%2Cstring>`_ instead.
|
||||
var i = 0
|
||||
while true:
|
||||
var aa = toLowerAscii(a[i])
|
||||
var bb = toLowerAscii(b[i])
|
||||
result = ord(aa) - ord(bb)
|
||||
if result != 0 or aa == '\0': break
|
||||
inc(i)
|
||||
proc cmpIgnoreCase*(a, b: cstring): int {.noSideEffect,
|
||||
rtl, extern: "csuCmpIgnoreCase".} =
|
||||
## Compares two strings in a case insensitive manner. Returns:
|
||||
##
|
||||
## | 0 if a == b
|
||||
## | < 0 if a < b
|
||||
## | > 0 if a > b
|
||||
var i = 0
|
||||
while true:
|
||||
var aa = toLowerAscii(a[i])
|
||||
var bb = toLowerAscii(b[i])
|
||||
result = ord(aa) - ord(bb)
|
||||
if result != 0 or aa == '\0': break
|
||||
inc(i)
|
||||
|
||||
@@ -81,6 +81,8 @@ when defined(nimVmExportFixed):
|
||||
|
||||
include "system/inclrtl"
|
||||
import std/private/since
|
||||
from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl
|
||||
|
||||
|
||||
const
|
||||
Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
|
||||
@@ -319,13 +321,7 @@ func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} =
|
||||
doAssert cmpIgnoreCase("FooBar", "foobar") == 0
|
||||
doAssert cmpIgnoreCase("bar", "Foo") < 0
|
||||
doAssert cmpIgnoreCase("Foo5", "foo4") > 0
|
||||
var i = 0
|
||||
var m = min(a.len, b.len)
|
||||
while i < m:
|
||||
result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
|
||||
if result != 0: return
|
||||
inc(i)
|
||||
result = a.len - b.len
|
||||
cmpIgnoreCaseImpl(a, b)
|
||||
|
||||
{.push checks: off, line_trace: off.} # this is a hot-spot in the compiler!
|
||||
# thus we compile without checks here
|
||||
@@ -344,25 +340,7 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
|
||||
runnableExamples:
|
||||
doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
|
||||
doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
|
||||
var i = 0
|
||||
var j = 0
|
||||
while true:
|
||||
while i < a.len and a[i] == '_': inc i
|
||||
while j < b.len and b[j] == '_': inc j
|
||||
var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
|
||||
var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
|
||||
result = ord(aa) - ord(bb)
|
||||
if result != 0: return result
|
||||
# the characters are identical:
|
||||
if i >= a.len:
|
||||
# both cursors at the end:
|
||||
if j >= b.len: return 0
|
||||
# not yet at the end of 'b':
|
||||
return -1
|
||||
elif j >= b.len:
|
||||
return 1
|
||||
inc i
|
||||
inc j
|
||||
cmpIgnoreStyleImpl(a, b)
|
||||
{.pop.}
|
||||
|
||||
# --------- Private templates for different split separators -----------
|
||||
|
||||
53
lib/std/private/strimpl.nim
Normal file
53
lib/std/private/strimpl.nim
Normal file
@@ -0,0 +1,53 @@
|
||||
func toLowerAscii*(c: char): char {.inline.} =
|
||||
if c in {'A'..'Z'}:
|
||||
result = chr(ord(c) + (ord('a') - ord('A')))
|
||||
else:
|
||||
result = c
|
||||
|
||||
template firstCharCaseSensitiveImpl(a, b: typed, aLen, bLen: int) =
|
||||
if aLen == 0 or bLen == 0:
|
||||
return aLen - bLen
|
||||
if a[0] != b[0]: return ord(a[0]) - ord(b[0])
|
||||
|
||||
template cmpIgnoreStyleImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
|
||||
# a, b are string or cstring
|
||||
let aLen = a.len
|
||||
let bLen = b.len
|
||||
var i = 0
|
||||
var j = 0
|
||||
when firstCharCaseSensitive:
|
||||
firstCharCaseSensitiveImpl(a, b, aLen, bLen)
|
||||
inc i
|
||||
inc j
|
||||
while true:
|
||||
while i < aLen and a[i] == '_': inc i
|
||||
while j < bLen and b[j] == '_': inc j
|
||||
let aa = if i < aLen: toLowerAscii(a[i]) else: '\0'
|
||||
let bb = if j < bLen: toLowerAscii(b[j]) else: '\0'
|
||||
result = ord(aa) - ord(bb)
|
||||
if result != 0: return result
|
||||
# the characters are identical:
|
||||
if i >= aLen:
|
||||
# both cursors at the end:
|
||||
if j >= bLen: return 0
|
||||
# not yet at the end of 'b':
|
||||
return -1
|
||||
elif j >= bLen:
|
||||
return 1
|
||||
inc i
|
||||
inc j
|
||||
|
||||
template cmpIgnoreCaseImpl*(a, b: typed, firstCharCaseSensitive: static bool = false) =
|
||||
# a, b are string or cstring
|
||||
let aLen = a.len
|
||||
let bLen = b.len
|
||||
var i = 0
|
||||
when firstCharCaseSensitive:
|
||||
firstCharCaseSensitiveImpl(a, b, aLen, bLen)
|
||||
inc i
|
||||
var m = min(aLen, bLen)
|
||||
while i < m:
|
||||
result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
|
||||
if result != 0: return
|
||||
inc i
|
||||
result = aLen - bLen
|
||||
@@ -29,7 +29,7 @@ Hi Andreas! How do you feel, Rumpf?
|
||||
"""
|
||||
|
||||
import
|
||||
critbits, cstrutils, sets, strutils, tables, random, algorithm, ropes,
|
||||
critbits, sets, strutils, tables, random, algorithm, ropes,
|
||||
lists, htmlgen, xmltree, strtabs
|
||||
|
||||
|
||||
@@ -177,18 +177,3 @@ block txmltree:
|
||||
])
|
||||
])
|
||||
doAssert(y.innerText == "foobar")
|
||||
|
||||
|
||||
|
||||
block tcstrutils:
|
||||
let s = cstring "abcdef"
|
||||
doAssert s.startsWith("a")
|
||||
doAssert not s.startsWith("b")
|
||||
doAssert s.endsWith("f")
|
||||
doAssert not s.endsWith("a")
|
||||
|
||||
let a = cstring "abracadabra"
|
||||
doAssert a.startsWith("abra")
|
||||
doAssert not a.startsWith("bra")
|
||||
doAssert a.endsWith("abra")
|
||||
doAssert not a.endsWith("dab")
|
||||
|
||||
30
tests/stdlib/tcstrutils.nim
Normal file
30
tests/stdlib/tcstrutils.nim
Normal file
@@ -0,0 +1,30 @@
|
||||
discard """
|
||||
targets: "c cpp js"
|
||||
"""
|
||||
|
||||
import cstrutils
|
||||
|
||||
|
||||
block tcstrutils:
|
||||
let s = cstring "abcdef"
|
||||
doAssert s.startsWith("a")
|
||||
doAssert not s.startsWith("b")
|
||||
doAssert s.endsWith("f")
|
||||
doAssert not s.endsWith("a")
|
||||
|
||||
let a = cstring "abracadabra"
|
||||
doAssert a.startsWith("abra")
|
||||
doAssert not a.startsWith("bra")
|
||||
doAssert a.endsWith("abra")
|
||||
doAssert not a.endsWith("dab")
|
||||
|
||||
doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
|
||||
doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
|
||||
doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
|
||||
|
||||
doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
|
||||
doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0
|
||||
|
||||
doAssert cmpIgnoreCase(cstring "", cstring "") == 0
|
||||
doAssert cmpIgnoreCase(cstring "", cstring "Hello") < 0
|
||||
doAssert cmpIgnoreCase(cstring "wind", cstring "") > 0
|
||||
@@ -38,7 +38,7 @@ true
|
||||
"""
|
||||
|
||||
import
|
||||
critbits, cstrutils, sets, strutils, tables, random, algorithm, re, ropes,
|
||||
critbits, sets, strutils, tables, random, algorithm, re, ropes,
|
||||
segfaults, lists, parsesql, streams, os, htmlgen, xmltree, strtabs
|
||||
|
||||
|
||||
@@ -245,24 +245,3 @@ block txmltree:
|
||||
])
|
||||
])
|
||||
doAssert(y.innerText == "foobar")
|
||||
|
||||
|
||||
block tcstrutils:
|
||||
let s = cstring "abcdef"
|
||||
doAssert s.startsWith("a")
|
||||
doAssert not s.startsWith("b")
|
||||
doAssert s.endsWith("f")
|
||||
doAssert not s.endsWith("a")
|
||||
|
||||
let a = cstring "abracadabra"
|
||||
doAssert a.startsWith("abra")
|
||||
doAssert not a.startsWith("bra")
|
||||
doAssert a.endsWith("abra")
|
||||
doAssert not a.endsWith("dab")
|
||||
|
||||
doAssert cmpIgnoreCase(cstring "FooBar", "foobar") == 0
|
||||
doAssert cmpIgnoreCase(cstring "bar", "Foo") < 0
|
||||
doAssert cmpIgnoreCase(cstring "Foo5", "foo4") > 0
|
||||
|
||||
doAssert cmpIgnoreStyle(cstring "foo_bar", "FooBar") == 0
|
||||
doAssert cmpIgnoreStyle(cstring "foo_bar_5", "FooBar4") > 0
|
||||
|
||||
Reference in New Issue
Block a user