mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-28 17:04:41 +00:00
Fix find routines' api to default to last=-1 (#19761)
This changes the default for the `last` parameter of various `find` routines from `0` to `-1`. Previous default prevents limiting the search to the first character. This is a logic error, as full text search was performed for 2 *valid* values of `last`: `0` and `last.high()`. Adds an overload for `initSkipTable` which returns a newly initialized table. This encapsulates every single usage of a `var`-acting original func in this module. Co-authored-by: flywind <xzsflywind@gmail.com>
This commit is contained in:
@@ -1815,6 +1815,7 @@ func initSkipTable*(a: var SkipTable, sub: string) {.rtl,
|
||||
## Initializes table `a` for efficient search of substring `sub`.
|
||||
##
|
||||
## See also:
|
||||
## * `initSkipTable func<#initSkipTable,string>`_
|
||||
## * `find func<#find,SkipTable,string,string,Natural,int>`_
|
||||
# TODO: this should be the `default()` initializer for the type.
|
||||
let m = len(sub)
|
||||
@@ -1823,7 +1824,16 @@ func initSkipTable*(a: var SkipTable, sub: string) {.rtl,
|
||||
for i in 0 ..< m - 1:
|
||||
a[sub[i]] = m - 1 - i
|
||||
|
||||
func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {.
|
||||
func initSkipTable*(sub: string): SkipTable {.noinit, rtl,
|
||||
extern: "nsuInitNewSkipTable".} =
|
||||
## Returns a new table initialized for `sub`.
|
||||
##
|
||||
## See also:
|
||||
## * `initSkipTable func<#initSkipTable,SkipTable,string>`_
|
||||
## * `find func<#find,SkipTable,string,string,Natural,int>`_
|
||||
initSkipTable(result, sub)
|
||||
|
||||
func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = -1): int {.
|
||||
rtl, extern: "nsuFindStrA".} =
|
||||
## Searches for `sub` in `s` inside range `start..last` using preprocessed
|
||||
## table `a`. If `last` is unspecified, it defaults to `s.high` (the last
|
||||
@@ -1832,9 +1842,10 @@ func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {.
|
||||
## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
|
||||
##
|
||||
## See also:
|
||||
## * `initSkipTable func<#initSkipTable,string>`_
|
||||
## * `initSkipTable func<#initSkipTable,SkipTable,string>`_
|
||||
let
|
||||
last = if last == 0: s.high else: last
|
||||
last = if last < 0: s.high else: last
|
||||
subLast = sub.len - 1
|
||||
|
||||
if subLast == -1:
|
||||
@@ -1844,6 +1855,7 @@ func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {.
|
||||
|
||||
# This is an implementation of the Boyer-Moore Horspool algorithms
|
||||
# https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
|
||||
result = -1
|
||||
var skip = start
|
||||
|
||||
while last - skip >= subLast:
|
||||
@@ -1853,7 +1865,6 @@ func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int {.
|
||||
return skip
|
||||
dec i
|
||||
inc skip, a[s[skip + subLast]]
|
||||
return -1
|
||||
|
||||
when not (defined(js) or defined(nimdoc) or defined(nimscript)):
|
||||
func c_memchr(cstr: pointer, c: char, n: csize_t): pointer {.
|
||||
@@ -1865,10 +1876,10 @@ when not (defined(js) or defined(nimdoc) or defined(nimscript)):
|
||||
else:
|
||||
const hasCStringBuiltin = false
|
||||
|
||||
func find*(s: string, sub: char, start: Natural = 0, last = 0): int {.rtl,
|
||||
func find*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl,
|
||||
extern: "nsuFindChar".} =
|
||||
## Searches for `sub` in `s` inside range `start..last` (both ends included).
|
||||
## If `last` is unspecified, it defaults to `s.high` (the last element).
|
||||
## If `last` is unspecified or negative, it defaults to `s.high` (the last element).
|
||||
##
|
||||
## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
|
||||
## Otherwise the index returned is relative to `s[0]`, not `start`.
|
||||
@@ -1877,26 +1888,30 @@ func find*(s: string, sub: char, start: Natural = 0, last = 0): int {.rtl,
|
||||
## See also:
|
||||
## * `rfind func<#rfind,string,char,Natural,int>`_
|
||||
## * `replace func<#replace,string,char,char>`_
|
||||
let last = if last == 0: s.high else: last
|
||||
when nimvm:
|
||||
result = -1
|
||||
let last = if last < 0: s.high else: last
|
||||
|
||||
template findImpl =
|
||||
for i in int(start)..last:
|
||||
if sub == s[i]: return i
|
||||
if s[i] == sub:
|
||||
return i
|
||||
|
||||
when nimvm:
|
||||
findImpl()
|
||||
else:
|
||||
when hasCStringBuiltin:
|
||||
let L = last-start+1
|
||||
if L > 0:
|
||||
let found = c_memchr(s[start].unsafeAddr, sub, cast[csize_t](L))
|
||||
let length = last-start+1
|
||||
if length > 0:
|
||||
let found = c_memchr(s[start].unsafeAddr, sub, cast[csize_t](length))
|
||||
if not found.isNil:
|
||||
return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
|
||||
else:
|
||||
for i in int(start)..last:
|
||||
if sub == s[i]: return i
|
||||
return -1
|
||||
findImpl()
|
||||
|
||||
func find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.
|
||||
func find*(s: string, chars: set[char], start: Natural = 0, last = -1): int {.
|
||||
rtl, extern: "nsuFindCharSet".} =
|
||||
## Searches for `chars` in `s` inside range `start..last` (both ends included).
|
||||
## If `last` is unspecified, it defaults to `s.high` (the last element).
|
||||
## If `last` is unspecified or negative, it defaults to `s.high` (the last element).
|
||||
##
|
||||
## If `s` contains none of the characters in `chars`, -1 is returned.
|
||||
## Otherwise the index returned is relative to `s[0]`, not `start`.
|
||||
@@ -1905,15 +1920,16 @@ func find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.
|
||||
## See also:
|
||||
## * `rfind func<#rfind,string,set[char],Natural,int>`_
|
||||
## * `multiReplace func<#multiReplace,string,varargs[]>`_
|
||||
let last = if last == 0: s.high else: last
|
||||
result = -1
|
||||
let last = if last < 0: s.high else: last
|
||||
for i in int(start)..last:
|
||||
if s[i] in chars: return i
|
||||
return -1
|
||||
if s[i] in chars:
|
||||
return i
|
||||
|
||||
func find*(s, sub: string, start: Natural = 0, last = 0): int {.rtl,
|
||||
func find*(s, sub: string, start: Natural = 0, last = -1): int {.rtl,
|
||||
extern: "nsuFindStr".} =
|
||||
## Searches for `sub` in `s` inside range `start..last` (both ends included).
|
||||
## If `last` is unspecified, it defaults to `s.high` (the last element).
|
||||
## If `last` is unspecified or negative, it defaults to `s.high` (the last element).
|
||||
##
|
||||
## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
|
||||
## Otherwise the index returned is relative to `s[0]`, not `start`.
|
||||
@@ -1925,28 +1941,23 @@ func find*(s, sub: string, start: Natural = 0, last = 0): int {.rtl,
|
||||
if sub.len > s.len - start: return -1
|
||||
if sub.len == 1: return find(s, sub[0], start, last)
|
||||
|
||||
template useSkipTable {.dirty.} =
|
||||
var a {.noinit.}: SkipTable
|
||||
initSkipTable(a, sub)
|
||||
result = find(a, s, sub, start, last)
|
||||
template useSkipTable =
|
||||
result = find(initSkipTable(sub), s, sub, start, last)
|
||||
|
||||
when not hasCStringBuiltin:
|
||||
when nimvm:
|
||||
useSkipTable()
|
||||
else:
|
||||
when nimvm:
|
||||
useSkipTable()
|
||||
else:
|
||||
when hasCStringBuiltin:
|
||||
if last == 0 and s.len > start:
|
||||
let found = c_strstr(s[start].unsafeAddr, sub)
|
||||
if not found.isNil:
|
||||
result = cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
|
||||
when hasCStringBuiltin:
|
||||
if last < 0 and start < s.len:
|
||||
let found = c_strstr(s[start].unsafeAddr, sub)
|
||||
result = if not found.isNil:
|
||||
cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
|
||||
else:
|
||||
result = -1
|
||||
else:
|
||||
useSkipTable()
|
||||
-1
|
||||
else:
|
||||
useSkipTable()
|
||||
else:
|
||||
useSkipTable()
|
||||
|
||||
func rfind*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl,
|
||||
extern: "nsuRFindChar".} =
|
||||
@@ -2121,8 +2132,7 @@ func replace*(s, sub: string, by = ""): string {.rtl,
|
||||
# copy the rest:
|
||||
add result, substr(s, i)
|
||||
else:
|
||||
var a {.noinit.}: SkipTable
|
||||
initSkipTable(a, sub)
|
||||
var a = initSkipTable(sub)
|
||||
let last = s.high
|
||||
var i = 0
|
||||
while true:
|
||||
@@ -2162,9 +2172,8 @@ func replaceWord*(s, sub: string, by = ""): string {.rtl,
|
||||
## replaced.
|
||||
if sub.len == 0: return s
|
||||
const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
|
||||
var a {.noinit.}: SkipTable
|
||||
result = ""
|
||||
initSkipTable(a, sub)
|
||||
var a = initSkipTable(sub)
|
||||
var i = 0
|
||||
let last = s.high
|
||||
let sublen = sub.len
|
||||
|
||||
@@ -232,18 +232,22 @@ template main() =
|
||||
{.pop.}
|
||||
|
||||
block: # find
|
||||
doAssert "0123456789ABCDEFGH".find('A') == 10
|
||||
doAssert "0123456789ABCDEFGH".find('A', 5) == 10
|
||||
doAssert "0123456789ABCDEFGH".find('A', 5, 10) == 10
|
||||
doAssert "0123456789ABCDEFGH".find('A', 5, 9) == -1
|
||||
doAssert "0123456789ABCDEFGH".find("A") == 10
|
||||
doAssert "0123456789ABCDEFGH".find("A", 5) == 10
|
||||
doAssert "0123456789ABCDEFGH".find("A", 5, 10) == 10
|
||||
doAssert "0123456789ABCDEFGH".find("A", 5, 9) == -1
|
||||
doAssert "0123456789ABCDEFGH".find({'A'..'C'}) == 10
|
||||
doAssert "0123456789ABCDEFGH".find({'A'..'C'}, 5) == 10
|
||||
doAssert "0123456789ABCDEFGH".find({'A'..'C'}, 5, 10) == 10
|
||||
doAssert "0123456789ABCDEFGH".find({'A'..'C'}, 5, 9) == -1
|
||||
const haystack: string = "0123456789ABCDEFGH"
|
||||
doAssert haystack.find('A') == 10
|
||||
doAssert haystack.find('A', 5) == 10
|
||||
doAssert haystack.find('A', 5, 10) == 10
|
||||
doAssert haystack.find('A', 5, 9) == -1
|
||||
doAssert haystack.find("A") == 10
|
||||
doAssert haystack.find("A", 5) == 10
|
||||
doAssert haystack.find("A", 5, 10) == 10
|
||||
doAssert haystack.find("A", 5, 9) == -1
|
||||
doAssert haystack.find({'A'..'C'}) == 10
|
||||
doAssert haystack.find({'A'..'C'}, 5) == 10
|
||||
doAssert haystack.find({'A'..'C'}, 5, 10) == 10
|
||||
doAssert haystack.find({'A'..'C'}, 5, 9) == -1
|
||||
doAssert haystack.find('A', 0, 0) == -1 # search limited to the first char
|
||||
doAssert haystack.find('A', 5, 0) == -1 # last < start
|
||||
doAssert haystack.find('A', 5, 4) == -1 # last < start
|
||||
|
||||
block:
|
||||
const haystack: string = "ABCABABABABCAB"
|
||||
@@ -290,17 +294,17 @@ template main() =
|
||||
|
||||
# when last <= start, searching for non-empty string
|
||||
block:
|
||||
let last: int = -1
|
||||
doAssert "abcd".find("ab", start=0, last=last) == -1
|
||||
doAssert "abcd".find("ab", start=1, last=last) == -1
|
||||
doAssert "abcd".find("bc", start=1, last=last) == -1
|
||||
doAssert "abcd".find("bc", start=2, last=last) == -1
|
||||
block:
|
||||
let last: int = 0
|
||||
let last: int = -1 # searching through whole line
|
||||
doAssert "abcd".find("ab", start=0, last=last) == 0
|
||||
doAssert "abcd".find("ab", start=1, last=last) == -1
|
||||
doAssert "abcd".find("bc", start=1, last=last) == 1
|
||||
doAssert "abcd".find("bc", start=2, last=last) == -1
|
||||
block:
|
||||
let last: int = 0
|
||||
doAssert "abcd".find("ab", start=0, last=last) == -1
|
||||
doAssert "abcd".find("ab", start=1, last=last) == -1
|
||||
doAssert "abcd".find("bc", start=1, last=last) == -1
|
||||
doAssert "abcd".find("bc", start=2, last=last) == -1
|
||||
block:
|
||||
let last: int = 1
|
||||
doAssert "abcd".find("ab", start=0, last=last) == 0
|
||||
|
||||
Reference in New Issue
Block a user