Feat: stdlib: adds system.string.setLenUninit (#24836)

Adds `system.setLenUninit` for the `string` type. Allows setting length
without initializing new memory on growth.

- Required for a follow-up to #15951
- Accompanies #22767 (ref #19727) but for strings
- Expands `stdlib/tstring` with tests for `setLen` and `setLenUninit`

---------

Co-authored-by: Andreas Rumpf <araq4k@proton.me>
This commit is contained in:
Zoom
2026-04-14 19:24:26 +04:00
committed by GitHub
parent e81f5b5890
commit 4dbc382906
6 changed files with 241 additions and 26 deletions

View File

@@ -60,6 +60,7 @@ errors.
- `copyDirWithPermissions` to recursively preserve attributes
- `system.setLenUninit` now supports refc, JS and VM backends.
- `system.setLenUninit` for the `string` type. Allows setting length without initializing new memory on growth.
- `std/parseopt` now supports multiple parser modes via a `CliMode` enum.
Modes include `Nim` (default, fully compatible) and two new experimental modes:

View File

@@ -2418,6 +2418,33 @@ when notJSnotNims and hasAlloc:
when not defined(nimV2):
include "system/repr"
func setLenUninit*(s: var string, newlen: Natural) {.nodestroy.} =
## Sets the length of string `s` to `newlen`.
## New slots will not be initialized.
##
## If the new length is smaller than the new length,
## `s` will be truncated.
let n = max(newLen, 0)
when nimvm:
s.setLen(n)
else:
when notJSnotNims:
when defined(nimSeqsV2):
{.noSideEffect.}:
let str = unsafeAddr s
when defined(nimsso):
setLengthStrV3Uninit(cast[ptr SmallString](str)[], newlen)
else:
setLengthStrV2Uninit(cast[ptr NimStringV2](str)[], newlen)
else:
{.noSideEffect.}:
when hasAlloc:
setLengthStrUninit(s, newlen)
else:
s.setLen(n)
else: s.setLen(n)
when notJSnotNims and hasThreadSupport and hostOS != "standalone":
when not defined(nimPreviewSlimSystem):
include "system/channels_builtin"

View File

@@ -158,6 +158,26 @@ proc setLengthStrV2(s: var NimStringV2, newLen: int) {.compilerRtl.} =
s.p.data[newLen] = '\0'
s.len = newLen
proc setLengthStrV2Uninit(s: var NimStringV2, newLen: int) =
if newLen == 0:
discard "do not free the buffer here, pattern 's.setLen 0' is common for avoiding allocations"
else:
if isLiteral(s):
let oldP = s.p
s.p = allocPayload(newLen)
s.p.cap = newLen
if s.len > 0:
copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
s.p.data[newLen] = '\0'
elif newLen > s.len:
let oldCap = s.p.cap and not strlitFlag
if newLen > oldCap:
let newCap = max(newLen, resize(oldCap))
s.p = reallocPayload0(s.p, oldCap, newCap)
s.p.cap = newCap
s.p.data[newLen] = '\0'
s.len = newLen
proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
if a.p == b.p and a.len == b.len: return
if isLiteral(b):

View File

@@ -496,12 +496,16 @@ proc mnewString(len: int): SmallString {.compilerproc.} =
result.more = p
setSSLen(result, HeapSlen)
proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
## Sets the length of s to newLen, zeroing new bytes on growth.
proc setLengthStr(s: var SmallString; newLen: int; zeroing: bool) =
# Shared implementation for setLengthStrV2 (zeroing) and setLengthStrV3Uninit
# Difference between the two modes:
# - inline/medium -> long growth: alloc0 (zeroing) vs alloc (uninit)
# - long -> long growth: zeroMem the new tail (zeroing) or skip it (uninit)
let slen = ssLen(s)
let curLen = if slen > PayloadSize: s.more.fullLen else: slen
if newLen == curLen: return
if newLen <= 0:
# Pattern 's.setLen 0' is common for avoiding allocations; do NOT free the buffer.
if slen > PayloadSize:
if slen == HeapSlen and s.more.rc == 1:
s.more.fullLen = 0
@@ -517,7 +521,11 @@ proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
if newLen <= PayloadSize:
let inl = inlinePtr(s)
if newLen > curLen:
zeroMem(addr inl[curLen], newLen - curLen)
# Grow within inline/medium
# Bytes above newLen already zero by the SWAR invariant,
# so setSSLen is sufficient.
if zeroing:
zeroMem(addr inl[curLen], newLen - curLen)
inl[newLen] = '\0'
setSSLen(s, newLen)
else:
@@ -542,18 +550,23 @@ proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
else:
# grow into long
let newCap = resize(newLen)
let p = cast[ptr LongString](alloc0(LongStringDataOffset + newCap + 1))
let p = if zeroing:
# bytes [curLen..newLen] and p.data[newLen] zeroed by alloc0
cast[ptr LongString](alloc0(LongStringDataOffset + newCap + 1))
else:
let p = cast[ptr LongString](alloc(LongStringDataOffset + newCap + 1))
p.data[newLen] = '\0'
p
p.rc = 1
p.fullLen = newLen
p.capImpl = newCap
copyMem(addr p.data[0], inlinePtr(s), curLen)
# bytes [curLen..newLen] zeroed by alloc0; p.data[newLen] = '\0' by alloc0
s.more = p
setSSLen(s, HeapSlen)
else:
# currently long
if newLen <= PayloadSize:
# shrink back to inline
# shrink back to inline/medium
let old = s.more
let inl = inlinePtr(s)
copyMem(inl, addr old.data[0], newLen)
@@ -574,11 +587,19 @@ proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
else:
setSSLen(s, newLen)
else:
ensureUniqueLong(s, curLen, newLen)
# long -> long
ensureUniqueLong(s, curLen, newLen) # sets fullLen = newLen
if newLen > curLen:
zeroMem(addr s.more.data[curLen], newLen - curLen)
s.more.data[newLen] = '\0'
s.more.fullLen = newLen
proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
## Sets the length of `s` to `newLen`, zeroing new bytes on growth.
setLengthStr(s, newLen, zeroing = true)
proc setLengthStrV3Uninit(s: var SmallString; newLen: int) {.compilerRtl.} =
## Sets the length of `s` to `newLen`, NOT zeroing new bytes on growth.
setLengthStr(s, newLen, zeroing = false)
proc nimAsgnStrV2(a: var SmallString; b: SmallString) {.compilerRtl, inline.} =
if ssLen(b) <= PayloadSize:

View File

@@ -244,6 +244,31 @@ proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
result.len = n
result.data[n] = '\0'
proc setLengthStrUninit(s: var string, newlen: Natural) {.nodestroy.} =
## Sets the `s` length to `newlen` without zeroing memory on growth.
## Terminating zero for cstring compatibility is set.
var str = cast[NimString](s)
let n = max(newLen, 0)
if str == nil:
if n == 0: return
else:
str = rawNewStringNoInit(n)
str.data[n] = '\0'
str.len = n
s = cast[string](str)
else:
if n > str.space:
let sp = max(resize(str.space), n)
str = rawNewStringNoInit(sp)
copyMem(addr str.data[0], unsafeAddr s[0], s.len)
str.data[n] = '\0'
str.len = n
s = cast[string](str)
elif n < s.len:
str.data[n] = '\0'
str.len = n
else: return
# ----------------- sequences ----------------------------------------------
proc incrSeq(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =

View File

@@ -1,20 +1,24 @@
discard """
matrix: "--mm:refc; --mm:orc"
targets: "c cpp js"
matrix: "--backend:c --mm:refc; --backend:c --mm:orc; --backend:c --mm:orc -d:nimsso; --backend:cpp --mm:refc; --backend:cpp --mm:orc; --backend:js --mm:refc; --backend:js --mm:orc"
"""
from std/sequtils import toSeq, map
from std/sugar import `=>`
import std/assertions
const hasNativeSso = defined(nimsso) and
(defined(gcArc) or defined(gcAtomicArc) or defined(gcOrc) or defined(gcYrc))
proc tester[T](x: T) =
let test = toSeq(0..4).map(i => newSeq[int]())
doAssert $test == "@[@[], @[], @[], @[], @[]]"
func reverse*(a: string): string =
result = a
for i in 0 ..< a.len div 2:
swap(result[i], result[^(i + 1)])
when not hasNativeSso:
func reverse*(a: string): string =
result = a
for i in 0 ..< a.len div 2:
let j = result.len - i - 1
swap(result[i], result[j])
proc main() =
block: # ..
@@ -94,31 +98,148 @@ proc main() =
block: # bug #7816
tester(1)
block: # bug #14497, reverse
doAssert reverse("hello") == "olleh"
when not hasNativeSso:
block: # bug #14497, reverse
doAssert reverse("hello") == "olleh"
block: # len, high
var a = "ab\0cd"
var b = a.cstring
doAssert a.len == 5
block: # bug #16405
when defined(js):
when nimvm: doAssert b.len == 2
else: doAssert b.len == 5
else: doAssert b.len == 2
doAssert a.high == a.len - 1
doAssert b.high == b.len - 1
when not (hasNativeSso and defined(cpp)):
let b = a.cstring
block: # bug #16405
when defined(js):
when nimvm: doAssert b.len == 2
else: doAssert b.len == 5
else: doAssert b.len == 2
doAssert b.high == b.len - 1
doAssert "".len == 0
doAssert "".high == -1
doAssert "".cstring.len == 0
doAssert "".cstring.high == -1
when not (hasNativeSso and defined(cpp)):
doAssert "".cstring.len == 0
doAssert "".cstring.high == -1
block: # bug #16674
var c: cstring = nil
doAssert c.len == 0
doAssert c.high == -1
block: # setLen, setLenUninit
when hasNativeSso:
const
alwaysAvail = sizeof(uint) - 1
payloadSize = sizeof(uint) + sizeof(pointer) - 2
longStringDataOffset = 3 * sizeof(int)
template rawSlenOf(s: string): int =
int(cast[ptr byte](unsafeAddr s)[])
template inlineDataOf(s: string): ptr UncheckedArray[char] =
cast[ptr UncheckedArray[char]](cast[uint](unsafeAddr s) + 1'u)
template longDataOf(s: string): ptr UncheckedArray[char] =
let ssPtr = cast[ptr tuple[bytes: uint, more: pointer]](unsafeAddr s)
cast[ptr UncheckedArray[char]](
cast[uint](ssPtr.more) + uint(longStringDataOffset))
proc checkStrInternals(s: string; expectedLen: int) =
doAssert s.len == expectedLen, "expected " & $expectedLen & ", got " & $s.len
when nimvm:
discard
else:
when hasNativeSso and not defined(js) and not defined(nimscript):
# SSO
let rawSlen = rawSlenOf(s)
if rawSlen > payloadSize:
doAssert rawSlen == 255
let data = longDataOf(s)
doAssert data[expectedLen] == '\0'
else:
doAssert rawSlen == expectedLen
let data = inlineDataOf(s)
doAssert data[expectedLen] == '\0'
if expectedLen < alwaysAvail:
for i in expectedLen + 1 ..< alwaysAvail:
doAssert data[i] == '\0'
elif defined(UncheckedArray): # skip JS
# string V2
let cs = s.cstring
let arr = cast[ptr UncheckedArray[char]](unsafeAddr cs[0])
doAssert arr[expectedLen] == '\0'
proc makeStr(n: int): string =
result = newStringOfCap(n)
for i in 0..<n:
result.add char(ord('a') + i mod 26)
proc checkSetLenUninit(oldLen, newLen: int; cmpAfter = -1) =
var s = makeStr(oldLen)
let prefixLen = min(oldLen, newLen)
let prefix = makeStr(prefixLen)
s.setLenUninit(newLen)
s.checkStrInternals(newLen)
doAssert s[0..<prefixLen] == prefix
if newLen <= oldLen:
doAssert s == prefix
if cmpAfter >= 0:
doAssert s < makeStr(cmpAfter)
const numbers = "1234567890"
block setLen:
# Trim to zero and grow past the old end. Must keep the prefix and zero the tail.
var s = numbers
s.setLen(0)
s.checkStrInternals(0)
doAssert s == ""
s = numbers
s.setLen(numbers.len + 1)
s.checkStrInternals(numbers.len + 1)
doAssert s[0..numbers.high] == numbers
doAssert s[numbers.len] == '\0'
block setLenUninit:
# Shared baseline for both SSO and V2: noop, shrink, grow.
checkSetLenUninit(numbers.len, numbers.len)
checkSetLenUninit(numbers.len, 5)
checkSetLenUninit(numbers.len, 11)
when hasNativeSso:
const
shortLen = alwaysAvail
medLen = payloadSize
longLen = payloadSize + 8
# Staying short and verify short-compare padding after shrink.
checkSetLenUninit(shortLen, shortLen - 1, shortLen)
checkSetLenUninit(shortLen - 2, shortLen - 1)
checkSetLenUninit(shortLen, 0)
# Cross the short/medium boundary in both directions.
checkSetLenUninit(medLen, medLen - 1)
checkSetLenUninit(medLen, alwaysAvail - 1, alwaysAvail)
checkSetLenUninit(alwaysAvail, medLen)
# Cross the inline/long boundary in both directions and cover long growth.
checkSetLenUninit(longLen, longLen - 2)
checkSetLenUninit(longLen, medLen - 1)
checkSetLenUninit(longLen, alwaysAvail - 1, alwaysAvail)
checkSetLenUninit(medLen, longLen)
checkSetLenUninit(longLen, longLen + 10)
checkSetLenUninit(longLen, 0)
when not defined(js) and not defined(nimscript):
# shared long strings must not mutate the original when grown
let src = makeStr(longLen)
var orig = src
var copy = orig
copy.setLenUninit(longLen + 4)
copy.checkStrInternals(longLen + 4)
doAssert orig == src
doAssert copy[0..<longLen] == src
static: main()
main()