stdlib: substr uses copymem if available, improve docs (#24792)

- `system.substr` now uses `copymem` when available, introducing a small template for nimvm detection (#12517 #12518) - Docs are updated to clarify behaviour on out-of-bounds input - Runnable examples cover more edge cases and do not repeat between overloads - Docs now explain the difference between overloads What bothers me is that the `substr*(a: openArray[char]): string =` which was added by @beef331 is practically an implementation of #14810, which is just a conversion from `openArray` to `string` but somehow it ended up being a `substr` overload, even though its behaviour is totally different, _the "substringing" is performed by a previous step_ (conversion to openArray) and the bounds are not checked. I'm not sure it's that great for overloads to differ in subtle ways so much. What are the cases that `substr` covers now, that prohibit renaming it to `toString` (or something like that)? (cherry picked from commit b82d7e8ba1)
2026-01-06 13:07:48 +00:00 · 2025-03-26 00:06:40 +04:00
parent 3a8b7d987b
commit ce67056f80
2 changed files with 85 additions and 31 deletions
--- a/changelog.md
+++ b/changelog.md
@@ -22,6 +22,7 @@ errors.
 ## Standard library additions and changes

 [//]: # "Additions:"
+
 - `setutils.symmetricDifference` along with its operator version
  `` setutils.`-+-` `` and in-place version `setutils.toggle` have been added
  to more efficiently calculate the symmetric difference of bitsets.
@@ -29,8 +30,11 @@ errors.
 	Useful for string sanitation. Follows existing multiReplace semantics.

 [//]: # "Changes:"
+
 - `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type.

+- `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation.
+
 ## Language changes

 - An experimental option `--experimental:typeBoundOps` has been added that
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -2767,41 +2767,89 @@ template once*(body: untyped): untyped =

 {.pop.} # warning[GcMem]: off, warning[Uninit]: off

-proc substr*(s: openArray[char]): string =
-  ## Copies a slice of `s` into a new string and returns this new
-  ## string.
-  runnableExamples:
-    let a = "abcdefgh"
-    assert a.substr(2, 5) == "cdef"
-    assert a.substr(2) == "cdefgh"
-    assert a.substr(5, 99) == "fgh"
-  result = newString(s.len)
-  for i, ch in s:
-    result[i] = ch
+template NotJSnotVMnotNims(): static bool = # hack, see: #12517 #12518
+  when nimvm:
+    false
+  else:
+    notJSnotNims

-proc substr*(s: string, first, last: int): string = # A bug with `magic: Slice` requires this to exist this way
-  ## Copies a slice of `s` into a new string and returns this new
-  ## string.
+proc substr*(a: openArray[char]): string =
+  ## Returns a new string, copying contents of `a`.
  ##
-  ## The bounds `first` and `last` denote the indices of
-  ## the first and last characters that shall be copied. If `last`
-  ## is omitted, it is treated as `high(s)`. If `last >= s.len`, `s.len`
-  ## is used instead: This means `substr` can also be used to `cut`:idx:
-  ## or `limit`:idx: a string's length.
+  ## .. warning:: As opposed to other `substr` overloads, no additional input
+  ##    validation and clamping is performed!
+  ##
+  ## This proc does not prevent raising an `IndexDefect` when `a` is being
+  ## passed using a `toOpenArray` call with out-of-bounds indexes:
+  ## * `doAssertRaises(IndexDefect): discard "abc".toOpenArray(-9, 9).substr()`
+  ##
+  ## If clamping is required, consider using
+  ## `substr(s: string; first, last: int) <#substr,string,int,int>`_:
+  ## * `doAssert "abc".substr(-9, 9) == "abc"`
+  runnableExamples:
+    let a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
+    assert a.substr() == "abcdefgh"
+    assert a.toOpenArray(2, 5).substr() == "cdef"
+    assert a.toOpenArray(2, high(a)).substr() == "cdefgh"  # From index 2 to `high(a)`
+    doAssertRaises(IndexDefect): discard a.toOpenArray(5, 99).substr()
+  {.cast(noSideEffect).}:
+    result = newStringUninit(a.len)
+  when NotJSnotVMnotNims:
+    if a.len > 0:
+      copyMem(result[0].addr, a[0].unsafeAddr, a.len)
+  else:
+    for i, ch in a:
+      result[i] = ch
+
+proc substr*(s: string; first, last: int): string = # A bug with `magic: Slice` requires this to exist this way
+  ## Returns a new string containing a substring (slice) of `s`,
+  ## copying characters from index `first` to index `last` inclusive.
+  ##
+  ## Index values are validated and capped:
+  ## - Negative `first` is clamped to 0
+  ## - If `last >= s.len`, it is clamped to `high(s)`
+  ## - If `last < first`, returns an empty string
+  ## This means `substr` can also be used to `cut`:idx: or `limit`:idx:
+  ## a string's length.
+  ##
+  ## .. note::
+  ##   If index values are ensured to be in-bounds, for performance
+  ##   critical cases consider using a non-clamping overload
+  ##   `substr(a: openArray[char]) <#substr,openArray[char]>`_
  runnableExamples:
    let a = "abcdefgh"
-    assert a.substr(2, 5) == "cdef"
-    assert a.substr(2) == "cdefgh"
-    assert a.substr(5, 99) == "fgh"
-
-  let first = max(first, 0)
-  let L = max(min(last, high(s)) - first + 1, 0)
-  result = newString(L)
-  for i in 0 .. L-1:
-    result[i] = s[i+first]
+    assert a.substr(2, 5) == "cdef" # Normal substring
+    # Invalid indexes
+    assert a.substr(5, 99) == "fgh" # From index 5 to `high(a)`
+    assert a.substr(42, 99) == ""   # `first` out of bounds
+    assert a.substr(100, 5) == ""   # `first > last`
+    assert a.substr(-1, 2) == "abc" # Negative `first` clamped to 0
+  let
+    first = max(first, 0)
+    last = min(last, high(s))
+    L = max(last - first + 1, 0)
+  {.cast(noSideEffect).}:
+    result = newStringUninit(L)
+  when NotJSnotVMnotNims:
+    if L > 0:
+      copyMem(result[0].addr, s[first].unsafeAddr, L)
+  else:
+    for i in 0..<L:
+      result[i] = s[i + first]

 proc substr*(s: string, first = 0): string =
-  result = substr(s, first, high(s))
+  ## Convenience `substr <#substr,string,int,int>`_ overload that returns
+  ## a substring from `first` to the end of the string.
+  ##
+  ## `first` value is validated and capped:
+  ## - `first >= s.len` returns an empty string
+  ## - Negative `first` is clamped to 0.
+  runnableExamples:
+    let a = "abcdefgh"
+    assert a.substr(2) == "cdefgh"    # From index 2 to string end (`high(a)`)
+    assert a.substr(100) == ""        # `first` out of bounds
+    assert a.substr(-1) == "abcdefgh" # Negative `first` clamped to 0
+  substr(s, first, high(s))

 when defined(nimconfig):
  include "system/nimscript"
@@ -2816,8 +2864,10 @@ when not defined(js):

 proc toOpenArray*[T](x: seq[T]; first, last: int): openArray[T] {.
  magic: "Slice".}
-  ## Allows passing the slice of `x` from the element at `first` to the element
-  ## at `last` to `openArray[T]` parameters without copying it.
+  ## Returns a non-owning slice (a `view`:idx:) of `x` from the element at
+  ## index `first` to `last` inclusive. Allows passing slices without copying,
+  ## as opposed to using the slice operator
+  ## `\`[]\` <#[],openArray[T],HSlice[U: Ordinal,V: Ordinal]>`_.
  ##
  ## Example:
  ##   ```nim