mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-06 13:07:48 +00:00
stdlib: substr uses copymem if available, improve docs (#24792)
- `system.substr` now uses `copymem` when available, introducing a small
template for nimvm detection (#12517 #12518)
- Docs are updated to clarify behaviour on out-of-bounds input
- Runnable examples cover more edge cases and do not repeat between
overloads
- Docs now explain the difference between overloads
What bothers me is that the `substr*(a: openArray[char]): string =`
which was added by @beef331 is practically an implementation of #14810,
which is just a conversion from `openArray` to `string` but somehow it
ended up being a `substr` overload, even though its behaviour is totally
different, _the "substringing" is performed by a previous step_
(conversion to openArray) and the bounds are not checked. I'm not sure
it's that great for overloads to differ in subtle ways so much.
What are the cases that `substr` covers now, that prohibit renaming it
to `toString` (or something like that)?
(cherry picked from commit b82d7e8ba1)
This commit is contained in:
@@ -22,6 +22,7 @@ errors.
|
||||
## Standard library additions and changes
|
||||
|
||||
[//]: # "Additions:"
|
||||
|
||||
- `setutils.symmetricDifference` along with its operator version
|
||||
`` setutils.`-+-` `` and in-place version `setutils.toggle` have been added
|
||||
to more efficiently calculate the symmetric difference of bitsets.
|
||||
@@ -29,8 +30,11 @@ errors.
|
||||
Useful for string sanitation. Follows existing multiReplace semantics.
|
||||
|
||||
[//]: # "Changes:"
|
||||
|
||||
- `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type.
|
||||
|
||||
- `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation.
|
||||
|
||||
## Language changes
|
||||
|
||||
- An experimental option `--experimental:typeBoundOps` has been added that
|
||||
|
||||
112
lib/system.nim
112
lib/system.nim
@@ -2767,41 +2767,89 @@ template once*(body: untyped): untyped =
|
||||
|
||||
{.pop.} # warning[GcMem]: off, warning[Uninit]: off
|
||||
|
||||
proc substr*(s: openArray[char]): string =
|
||||
## Copies a slice of `s` into a new string and returns this new
|
||||
## string.
|
||||
runnableExamples:
|
||||
let a = "abcdefgh"
|
||||
assert a.substr(2, 5) == "cdef"
|
||||
assert a.substr(2) == "cdefgh"
|
||||
assert a.substr(5, 99) == "fgh"
|
||||
result = newString(s.len)
|
||||
for i, ch in s:
|
||||
result[i] = ch
|
||||
template NotJSnotVMnotNims(): static bool = # hack, see: #12517 #12518
|
||||
when nimvm:
|
||||
false
|
||||
else:
|
||||
notJSnotNims
|
||||
|
||||
proc substr*(s: string, first, last: int): string = # A bug with `magic: Slice` requires this to exist this way
|
||||
## Copies a slice of `s` into a new string and returns this new
|
||||
## string.
|
||||
proc substr*(a: openArray[char]): string =
|
||||
## Returns a new string, copying contents of `a`.
|
||||
##
|
||||
## The bounds `first` and `last` denote the indices of
|
||||
## the first and last characters that shall be copied. If `last`
|
||||
## is omitted, it is treated as `high(s)`. If `last >= s.len`, `s.len`
|
||||
## is used instead: This means `substr` can also be used to `cut`:idx:
|
||||
## or `limit`:idx: a string's length.
|
||||
## .. warning:: As opposed to other `substr` overloads, no additional input
|
||||
## validation and clamping is performed!
|
||||
##
|
||||
## This proc does not prevent raising an `IndexDefect` when `a` is being
|
||||
## passed using a `toOpenArray` call with out-of-bounds indexes:
|
||||
## * `doAssertRaises(IndexDefect): discard "abc".toOpenArray(-9, 9).substr()`
|
||||
##
|
||||
## If clamping is required, consider using
|
||||
## `substr(s: string; first, last: int) <#substr,string,int,int>`_:
|
||||
## * `doAssert "abc".substr(-9, 9) == "abc"`
|
||||
runnableExamples:
|
||||
let a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
|
||||
assert a.substr() == "abcdefgh"
|
||||
assert a.toOpenArray(2, 5).substr() == "cdef"
|
||||
assert a.toOpenArray(2, high(a)).substr() == "cdefgh" # From index 2 to `high(a)`
|
||||
doAssertRaises(IndexDefect): discard a.toOpenArray(5, 99).substr()
|
||||
{.cast(noSideEffect).}:
|
||||
result = newStringUninit(a.len)
|
||||
when NotJSnotVMnotNims:
|
||||
if a.len > 0:
|
||||
copyMem(result[0].addr, a[0].unsafeAddr, a.len)
|
||||
else:
|
||||
for i, ch in a:
|
||||
result[i] = ch
|
||||
|
||||
proc substr*(s: string; first, last: int): string = # A bug with `magic: Slice` requires this to exist this way
|
||||
## Returns a new string containing a substring (slice) of `s`,
|
||||
## copying characters from index `first` to index `last` inclusive.
|
||||
##
|
||||
## Index values are validated and capped:
|
||||
## - Negative `first` is clamped to 0
|
||||
## - If `last >= s.len`, it is clamped to `high(s)`
|
||||
## - If `last < first`, returns an empty string
|
||||
## This means `substr` can also be used to `cut`:idx: or `limit`:idx:
|
||||
## a string's length.
|
||||
##
|
||||
## .. note::
|
||||
## If index values are ensured to be in-bounds, for performance
|
||||
## critical cases consider using a non-clamping overload
|
||||
## `substr(a: openArray[char]) <#substr,openArray[char]>`_
|
||||
runnableExamples:
|
||||
let a = "abcdefgh"
|
||||
assert a.substr(2, 5) == "cdef"
|
||||
assert a.substr(2) == "cdefgh"
|
||||
assert a.substr(5, 99) == "fgh"
|
||||
|
||||
let first = max(first, 0)
|
||||
let L = max(min(last, high(s)) - first + 1, 0)
|
||||
result = newString(L)
|
||||
for i in 0 .. L-1:
|
||||
result[i] = s[i+first]
|
||||
assert a.substr(2, 5) == "cdef" # Normal substring
|
||||
# Invalid indexes
|
||||
assert a.substr(5, 99) == "fgh" # From index 5 to `high(a)`
|
||||
assert a.substr(42, 99) == "" # `first` out of bounds
|
||||
assert a.substr(100, 5) == "" # `first > last`
|
||||
assert a.substr(-1, 2) == "abc" # Negative `first` clamped to 0
|
||||
let
|
||||
first = max(first, 0)
|
||||
last = min(last, high(s))
|
||||
L = max(last - first + 1, 0)
|
||||
{.cast(noSideEffect).}:
|
||||
result = newStringUninit(L)
|
||||
when NotJSnotVMnotNims:
|
||||
if L > 0:
|
||||
copyMem(result[0].addr, s[first].unsafeAddr, L)
|
||||
else:
|
||||
for i in 0..<L:
|
||||
result[i] = s[i + first]
|
||||
|
||||
proc substr*(s: string, first = 0): string =
|
||||
result = substr(s, first, high(s))
|
||||
## Convenience `substr <#substr,string,int,int>`_ overload that returns
|
||||
## a substring from `first` to the end of the string.
|
||||
##
|
||||
## `first` value is validated and capped:
|
||||
## - `first >= s.len` returns an empty string
|
||||
## - Negative `first` is clamped to 0.
|
||||
runnableExamples:
|
||||
let a = "abcdefgh"
|
||||
assert a.substr(2) == "cdefgh" # From index 2 to string end (`high(a)`)
|
||||
assert a.substr(100) == "" # `first` out of bounds
|
||||
assert a.substr(-1) == "abcdefgh" # Negative `first` clamped to 0
|
||||
substr(s, first, high(s))
|
||||
|
||||
when defined(nimconfig):
|
||||
include "system/nimscript"
|
||||
@@ -2816,8 +2864,10 @@ when not defined(js):
|
||||
|
||||
proc toOpenArray*[T](x: seq[T]; first, last: int): openArray[T] {.
|
||||
magic: "Slice".}
|
||||
## Allows passing the slice of `x` from the element at `first` to the element
|
||||
## at `last` to `openArray[T]` parameters without copying it.
|
||||
## Returns a non-owning slice (a `view`:idx:) of `x` from the element at
|
||||
## index `first` to `last` inclusive. Allows passing slices without copying,
|
||||
## as opposed to using the slice operator
|
||||
## `\`[]\` <#[],openArray[T],HSlice[U: Ordinal,V: Ordinal]>`_.
|
||||
##
|
||||
## Example:
|
||||
## ```nim
|
||||
|
||||
Reference in New Issue
Block a user