From ce67056f80b5ee8244c42f8d7a74445caf2f6902 Mon Sep 17 00:00:00 2001 From: Zoom Date: Wed, 26 Mar 2025 00:06:40 +0400 Subject: [PATCH] stdlib: substr uses copymem if available, improve docs (#24792) - `system.substr` now uses `copymem` when available, introducing a small template for nimvm detection (#12517 #12518) - Docs are updated to clarify behaviour on out-of-bounds input - Runnable examples cover more edge cases and do not repeat between overloads - Docs now explain the difference between overloads What bothers me is that the `substr*(a: openArray[char]): string =` which was added by @beef331 is practically an implementation of #14810, which is just a conversion from `openArray` to `string` but somehow it ended up being a `substr` overload, even though its behaviour is totally different, _the "substringing" is performed by a previous step_ (conversion to openArray) and the bounds are not checked. I'm not sure it's that great for overloads to differ in subtle ways so much. What are the cases that `substr` covers now, that prohibit renaming it to `toString` (or something like that)? (cherry picked from commit b82d7e8ba1bf15a24561c97198f8741ffe9f454c) --- changelog.md | 4 ++ lib/system.nim | 112 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 85 insertions(+), 31 deletions(-) diff --git a/changelog.md b/changelog.md index ad5ab5f0e3..f3cc3fee4d 100644 --- a/changelog.md +++ b/changelog.md @@ -22,6 +22,7 @@ errors. ## Standard library additions and changes [//]: # "Additions:" + - `setutils.symmetricDifference` along with its operator version `` setutils.`-+-` `` and in-place version `setutils.toggle` have been added to more efficiently calculate the symmetric difference of bitsets. @@ -29,8 +30,11 @@ errors. Useful for string sanitation. Follows existing multiReplace semantics. [//]: # "Changes:" + - `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type. +- `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation. + ## Language changes - An experimental option `--experimental:typeBoundOps` has been added that diff --git a/lib/system.nim b/lib/system.nim index 2b34b94925..ca5e2df89d 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -2767,41 +2767,89 @@ template once*(body: untyped): untyped = {.pop.} # warning[GcMem]: off, warning[Uninit]: off -proc substr*(s: openArray[char]): string = - ## Copies a slice of `s` into a new string and returns this new - ## string. - runnableExamples: - let a = "abcdefgh" - assert a.substr(2, 5) == "cdef" - assert a.substr(2) == "cdefgh" - assert a.substr(5, 99) == "fgh" - result = newString(s.len) - for i, ch in s: - result[i] = ch +template NotJSnotVMnotNims(): static bool = # hack, see: #12517 #12518 + when nimvm: + false + else: + notJSnotNims -proc substr*(s: string, first, last: int): string = # A bug with `magic: Slice` requires this to exist this way - ## Copies a slice of `s` into a new string and returns this new - ## string. +proc substr*(a: openArray[char]): string = + ## Returns a new string, copying contents of `a`. ## - ## The bounds `first` and `last` denote the indices of - ## the first and last characters that shall be copied. If `last` - ## is omitted, it is treated as `high(s)`. If `last >= s.len`, `s.len` - ## is used instead: This means `substr` can also be used to `cut`:idx: - ## or `limit`:idx: a string's length. + ## .. warning:: As opposed to other `substr` overloads, no additional input + ## validation and clamping is performed! + ## + ## This proc does not prevent raising an `IndexDefect` when `a` is being + ## passed using a `toOpenArray` call with out-of-bounds indexes: + ## * `doAssertRaises(IndexDefect): discard "abc".toOpenArray(-9, 9).substr()` + ## + ## If clamping is required, consider using + ## `substr(s: string; first, last: int) <#substr,string,int,int>`_: + ## * `doAssert "abc".substr(-9, 9) == "abc"` + runnableExamples: + let a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] + assert a.substr() == "abcdefgh" + assert a.toOpenArray(2, 5).substr() == "cdef" + assert a.toOpenArray(2, high(a)).substr() == "cdefgh" # From index 2 to `high(a)` + doAssertRaises(IndexDefect): discard a.toOpenArray(5, 99).substr() + {.cast(noSideEffect).}: + result = newStringUninit(a.len) + when NotJSnotVMnotNims: + if a.len > 0: + copyMem(result[0].addr, a[0].unsafeAddr, a.len) + else: + for i, ch in a: + result[i] = ch + +proc substr*(s: string; first, last: int): string = # A bug with `magic: Slice` requires this to exist this way + ## Returns a new string containing a substring (slice) of `s`, + ## copying characters from index `first` to index `last` inclusive. + ## + ## Index values are validated and capped: + ## - Negative `first` is clamped to 0 + ## - If `last >= s.len`, it is clamped to `high(s)` + ## - If `last < first`, returns an empty string + ## This means `substr` can also be used to `cut`:idx: or `limit`:idx: + ## a string's length. + ## + ## .. note:: + ## If index values are ensured to be in-bounds, for performance + ## critical cases consider using a non-clamping overload + ## `substr(a: openArray[char]) <#substr,openArray[char]>`_ runnableExamples: let a = "abcdefgh" - assert a.substr(2, 5) == "cdef" - assert a.substr(2) == "cdefgh" - assert a.substr(5, 99) == "fgh" - - let first = max(first, 0) - let L = max(min(last, high(s)) - first + 1, 0) - result = newString(L) - for i in 0 .. L-1: - result[i] = s[i+first] + assert a.substr(2, 5) == "cdef" # Normal substring + # Invalid indexes + assert a.substr(5, 99) == "fgh" # From index 5 to `high(a)` + assert a.substr(42, 99) == "" # `first` out of bounds + assert a.substr(100, 5) == "" # `first > last` + assert a.substr(-1, 2) == "abc" # Negative `first` clamped to 0 + let + first = max(first, 0) + last = min(last, high(s)) + L = max(last - first + 1, 0) + {.cast(noSideEffect).}: + result = newStringUninit(L) + when NotJSnotVMnotNims: + if L > 0: + copyMem(result[0].addr, s[first].unsafeAddr, L) + else: + for i in 0..`_ overload that returns + ## a substring from `first` to the end of the string. + ## + ## `first` value is validated and capped: + ## - `first >= s.len` returns an empty string + ## - Negative `first` is clamped to 0. + runnableExamples: + let a = "abcdefgh" + assert a.substr(2) == "cdefgh" # From index 2 to string end (`high(a)`) + assert a.substr(100) == "" # `first` out of bounds + assert a.substr(-1) == "abcdefgh" # Negative `first` clamped to 0 + substr(s, first, high(s)) when defined(nimconfig): include "system/nimscript" @@ -2816,8 +2864,10 @@ when not defined(js): proc toOpenArray*[T](x: seq[T]; first, last: int): openArray[T] {. magic: "Slice".} - ## Allows passing the slice of `x` from the element at `first` to the element - ## at `last` to `openArray[T]` parameters without copying it. + ## Returns a non-owning slice (a `view`:idx:) of `x` from the element at + ## index `first` to `last` inclusive. Allows passing slices without copying, + ## as opposed to using the slice operator + ## `\`[]\` <#[],openArray[T],HSlice[U: Ordinal,V: Ordinal]>`_. ## ## Example: ## ```nim