stdlib: substr uses copymem if available, improve docs (#24792)

- `system.substr` now uses `copymem` when available, introducing a small
template for nimvm detection (#12517 #12518)
- Docs are updated to clarify behaviour on out-of-bounds input
- Runnable examples cover more edge cases and do not repeat between
overloads
- Docs now explain the difference between overloads

What bothers me is that the `substr*(a: openArray[char]): string =`
which was added by @beef331 is practically an implementation of #14810,
which is just a conversion from `openArray` to `string` but somehow it
ended up being a `substr` overload, even though its behaviour is totally
different, _the "substringing" is performed by a previous step_
(conversion to openArray) and the bounds are not checked. I'm not sure
it's that great for overloads to differ in subtle ways so much.

What are the cases that `substr` covers now, that prohibit renaming it
to `toString` (or something like that)?

(cherry picked from commit b82d7e8ba1)
This commit is contained in:
Zoom
2025-03-26 00:06:40 +04:00
committed by narimiran
parent 3a8b7d987b
commit ce67056f80
2 changed files with 85 additions and 31 deletions

View File

@@ -22,6 +22,7 @@ errors.
## Standard library additions and changes
[//]: # "Additions:"
- `setutils.symmetricDifference` along with its operator version
`` setutils.`-+-` `` and in-place version `setutils.toggle` have been added
to more efficiently calculate the symmetric difference of bitsets.
@@ -29,8 +30,11 @@ errors.
Useful for string sanitation. Follows existing multiReplace semantics.
[//]: # "Changes:"
- `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type.
- `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation.
## Language changes
- An experimental option `--experimental:typeBoundOps` has been added that

View File

@@ -2767,41 +2767,89 @@ template once*(body: untyped): untyped =
{.pop.} # warning[GcMem]: off, warning[Uninit]: off
proc substr*(s: openArray[char]): string =
## Copies a slice of `s` into a new string and returns this new
## string.
runnableExamples:
let a = "abcdefgh"
assert a.substr(2, 5) == "cdef"
assert a.substr(2) == "cdefgh"
assert a.substr(5, 99) == "fgh"
result = newString(s.len)
for i, ch in s:
result[i] = ch
template NotJSnotVMnotNims(): static bool = # hack, see: #12517 #12518
when nimvm:
false
else:
notJSnotNims
proc substr*(s: string, first, last: int): string = # A bug with `magic: Slice` requires this to exist this way
## Copies a slice of `s` into a new string and returns this new
## string.
proc substr*(a: openArray[char]): string =
## Returns a new string, copying contents of `a`.
##
## The bounds `first` and `last` denote the indices of
## the first and last characters that shall be copied. If `last`
## is omitted, it is treated as `high(s)`. If `last >= s.len`, `s.len`
## is used instead: This means `substr` can also be used to `cut`:idx:
## or `limit`:idx: a string's length.
## .. warning:: As opposed to other `substr` overloads, no additional input
## validation and clamping is performed!
##
## This proc does not prevent raising an `IndexDefect` when `a` is being
## passed using a `toOpenArray` call with out-of-bounds indexes:
## * `doAssertRaises(IndexDefect): discard "abc".toOpenArray(-9, 9).substr()`
##
## If clamping is required, consider using
## `substr(s: string; first, last: int) <#substr,string,int,int>`_:
## * `doAssert "abc".substr(-9, 9) == "abc"`
runnableExamples:
let a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
assert a.substr() == "abcdefgh"
assert a.toOpenArray(2, 5).substr() == "cdef"
assert a.toOpenArray(2, high(a)).substr() == "cdefgh" # From index 2 to `high(a)`
doAssertRaises(IndexDefect): discard a.toOpenArray(5, 99).substr()
{.cast(noSideEffect).}:
result = newStringUninit(a.len)
when NotJSnotVMnotNims:
if a.len > 0:
copyMem(result[0].addr, a[0].unsafeAddr, a.len)
else:
for i, ch in a:
result[i] = ch
proc substr*(s: string; first, last: int): string = # A bug with `magic: Slice` requires this to exist this way
## Returns a new string containing a substring (slice) of `s`,
## copying characters from index `first` to index `last` inclusive.
##
## Index values are validated and capped:
## - Negative `first` is clamped to 0
## - If `last >= s.len`, it is clamped to `high(s)`
## - If `last < first`, returns an empty string
## This means `substr` can also be used to `cut`:idx: or `limit`:idx:
## a string's length.
##
## .. note::
## If index values are ensured to be in-bounds, for performance
## critical cases consider using a non-clamping overload
## `substr(a: openArray[char]) <#substr,openArray[char]>`_
runnableExamples:
let a = "abcdefgh"
assert a.substr(2, 5) == "cdef"
assert a.substr(2) == "cdefgh"
assert a.substr(5, 99) == "fgh"
let first = max(first, 0)
let L = max(min(last, high(s)) - first + 1, 0)
result = newString(L)
for i in 0 .. L-1:
result[i] = s[i+first]
assert a.substr(2, 5) == "cdef" # Normal substring
# Invalid indexes
assert a.substr(5, 99) == "fgh" # From index 5 to `high(a)`
assert a.substr(42, 99) == "" # `first` out of bounds
assert a.substr(100, 5) == "" # `first > last`
assert a.substr(-1, 2) == "abc" # Negative `first` clamped to 0
let
first = max(first, 0)
last = min(last, high(s))
L = max(last - first + 1, 0)
{.cast(noSideEffect).}:
result = newStringUninit(L)
when NotJSnotVMnotNims:
if L > 0:
copyMem(result[0].addr, s[first].unsafeAddr, L)
else:
for i in 0..<L:
result[i] = s[i + first]
proc substr*(s: string, first = 0): string =
result = substr(s, first, high(s))
## Convenience `substr <#substr,string,int,int>`_ overload that returns
## a substring from `first` to the end of the string.
##
## `first` value is validated and capped:
## - `first >= s.len` returns an empty string
## - Negative `first` is clamped to 0.
runnableExamples:
let a = "abcdefgh"
assert a.substr(2) == "cdefgh" # From index 2 to string end (`high(a)`)
assert a.substr(100) == "" # `first` out of bounds
assert a.substr(-1) == "abcdefgh" # Negative `first` clamped to 0
substr(s, first, high(s))
when defined(nimconfig):
include "system/nimscript"
@@ -2816,8 +2864,10 @@ when not defined(js):
proc toOpenArray*[T](x: seq[T]; first, last: int): openArray[T] {.
magic: "Slice".}
## Allows passing the slice of `x` from the element at `first` to the element
## at `last` to `openArray[T]` parameters without copying it.
## Returns a non-owning slice (a `view`:idx:) of `x` from the element at
## index `first` to `last` inclusive. Allows passing slices without copying,
## as opposed to using the slice operator
## `\`[]\` <#[],openArray[T],HSlice[U: Ordinal,V: Ordinal]>`_.
##
## Example:
## ```nim