Files
Nim/lib/system/sysstr.nim
Zoom cbe8ce59ed fix string setLenUninit growth without realloc for refc (#25767)
`setLenUninit(string)` was broken on the legacy refc backend when
growing within existing spare capacity.

`setLengthStrUninit` in `lib/system/sysstr.nim` only updated len when it
had to reallocate or when shrinking.

If oldLen < newLen <= capacity, it returned early without finalizing:

```nim
var s = newStringOfCap(10)
s.add("abc")
s.setLenUninit(6)
doAssert s.len == 6 # used to fail, len stayed 3
```

This escaped `tests/stdlib/tstring.nim` because the testing routine
`checkSetLenUninit` mostly resizes strings created at **exact**
length/capacity, so growth usually took the reallocating branch.

The new regression test covers the missing edge case.

So sorry for catching this only on the day of the stable release! In my
defense, the original PR hung in limbo for quite a while and it didn't
spend enough time in devel after the merge.
2026-04-25 12:27:13 +02:00

402 lines
14 KiB
Nim

#
#
# Nim's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# string & sequence handling procedures needed by the code generator
# strings are dynamically resized, have a length field
# and are zero-terminated, so they can be casted to C
# strings easily
# we don't use refcounts because that's a behaviour
# the programmer may not want
{.push raises: [], gcsafe.}
proc dataPointer(a: PGenericSeq, elemAlign: int): pointer =
cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign))
proc dataPointer(a: PGenericSeq, elemAlign, elemSize, index: int): pointer =
cast[pointer](cast[int](a) +% align(GenericSeqSize, elemAlign) +% (index*%elemSize))
proc resize(old: int): int {.inline.} =
if old <= 0: result = 4
elif old < 65536: result = old * 2
else: result = old div 2 + old # for large arrays * 3/2 is better
when declared(allocAtomic):
template allocStr(size: untyped): untyped =
cast[NimString](allocAtomic(size))
template allocStrNoInit(size: untyped): untyped =
cast[NimString](boehmAllocAtomic(size))
elif defined(gcRegions):
template allocStr(size: untyped): untyped =
cast[NimString](newStr(addr(strDesc), size, true))
template allocStrNoInit(size: untyped): untyped =
cast[NimString](newStr(addr(strDesc), size, false))
else:
template allocStr(size: untyped): untyped =
cast[NimString](newObj(addr(strDesc), size))
template allocStrNoInit(size: untyped): untyped =
cast[NimString](newObjNoInit(addr(strDesc), size))
proc rawNewStringNoInit(space: int): NimString =
## Returns a newly-allocated NimString with `reserved` set.
## .. warning:: `len` and the terminating null-byte are not set!
let s = max(space, 7)
result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
result.reserved = s
when defined(gogc):
result.elemSize = 1
proc rawNewString(space: int): NimString {.compilerproc.} =
## Returns a newly-allocated and *not* zeroed NimString
## with everything required set:
## - `reserved`
## - `len` (0)
## - terminating null-byte
result = rawNewStringNoInit(space)
result.len = 0
result.data[0] = '\0'
proc mnewString(len: int): NimString {.compilerproc.} =
## Returns a newly-allocated and zeroed NimString
## with everything required set:
## - `reserved`
## - `len`
## - terminating null-byte
result = rawNewStringNoInit(len)
result.len = len
zeroMem(addr result.data[0], len + 1)
proc copyStrLast(s: NimString, start, last: int): NimString {.compilerproc.} =
# This is not used by most recent versions of the compiler anymore, but
# required for bootstrapping purposes.
let start = max(start, 0)
if s == nil: return nil
let len = min(last, s.len-1) - start + 1
result = rawNewStringNoInit(len)
result.len = len
copyMem(addr(result.data), addr(s.data[start]), len)
result.data[len] = '\0'
proc copyStr(s: NimString, start: int): NimString {.compilerproc.} =
# This is not used by most recent versions of the compiler anymore, but
# required for bootstrapping purposes.
if s == nil: return nil
result = copyStrLast(s, start, s.len-1)
proc nimToCStringConv(s: NimString): cstring {.compilerproc, nonReloadable, inline.} =
if s == nil or s.len == 0: result = cstring""
else: result = cast[cstring](addr s.data)
proc toNimStr(str: cstring, len: int): NimString {.compilerproc.} =
result = rawNewStringNoInit(len)
result.len = len
copyMem(addr(result.data), str, len)
result.data[len] = '\0'
proc toOwnedCopy(src: NimString): NimString {.inline, raises: [].} =
## Expects `src` to be not nil and initialized (len and terminating zero set)
result = rawNewStringNoInit(src.len)
result.len = src.len
copyMem(addr(result.data), addr(src.data), src.len + 1)
proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
if str == nil: NimString(nil)
else: toNimStr(str, str.len)
proc copyString(src: NimString): NimString {.compilerRtl.} =
## Expects `src` to be initialized (len and terminating zero set)
if src != nil:
if (src.reserved and seqShallowFlag) != 0:
result = src
else:
result = toOwnedCopy(src)
sysAssert((seqShallowFlag and result.reserved) == 0, "copyString")
when defined(nimShallowStrings):
if (src.reserved and strlitFlag) != 0:
result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
if src != nil:
if (src.reserved and seqShallowFlag) != 0:
result = src
when declared(incRef):
incRef(usrToCell(result))
else:
when declared(newObjRC1) and not defined(gcRegions):
var s = src.len
if s < 7: s = 7
result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
s+1))
result.reserved = s
when defined(gogc):
result.elemSize = 1
result.len = src.len
copyMem(addr(result.data), addr(src.data), src.len + 1)
else:
result = toOwnedCopy(src)
sysAssert((seqShallowFlag and result.reserved) == 0, "copyStringRC1")
when defined(nimShallowStrings):
if (src.reserved and strlitFlag) != 0:
result.reserved = (result.reserved and not strlitFlag) or seqShallowFlag
proc copyDeepString(src: NimString): NimString {.inline, raises: [].} =
if src != nil:
result = toOwnedCopy(src)
# The following resize- and append- routines should be used like following:
# <Nim code>
# s &= "Hello " & name & ", how do you feel?"
#
# <generated C code>
# {
# s = resizeString(s, 6 + name->len + 17);
# appendString(s, strLit1);
# appendString(s, strLit2);
# appendString(s, strLit3);
# }
#
# <Nim code>
# s = "Hello " & name & ", how do you feel?"
#
# <generated C code>
# {
# string tmp0;
# tmp0 = rawNewString(6 + name->len + 17);
# appendString(s, strLit1);
# appendString(s, strLit2);
# appendString(s, strLit3);
# s = tmp0;
# }
#
# <Nim code>
# s = ""
#
# <generated C code>
# s = rawNewString(0);
proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
## Prepares `dest` for appending up to `addlen` new bytes.
## .. warning:: Does not update `len`!
if dest == nil:
return rawNewString(addlen)
let futureLen = dest.len + addlen
if futureLen <= dest.space:
result = dest
else: # slow path:
# growth strategy: next `resize` step or exact `futureLen` if jumping over
let sp = max(resize(dest.space), futureLen)
result = rawNewStringNoInit(sp)
result.len = dest.len
# newFutureLen > space => addlen is never zero, copy terminating null anyway
copyMem(addr(result.data), addr(dest.data), dest.len + 1)
proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
dest.data[dest.len] = c
dest.data[dest.len+1] = '\0'
inc(dest.len)
proc addChar(s: NimString, c: char): NimString =
# is compilerproc! used in `ccgexprs.nim`
if s == nil:
result = rawNewStringNoInit(1)
result.len = 0
else:
result = s
if s.len >= s.space: # len.inc would overflow (`>` just in case)
let sp = resize(s.space)
result = rawNewStringNoInit(sp)
copyMem(addr(result.data), addr(s.data), s.len)
result.len = s.len
result.appendChar(c)
proc appendString(dest, src: NimString) {.compilerproc, inline.} =
## Raw, does not prepare `dest` space for copying
if src != nil:
copyMem(addr(dest.data[dest.len]), addr(src.data), src.len + 1)
inc(dest.len, src.len)
proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
## Sets the `s` length to `newLen` zeroing memory on growth.
## Terminating zero at `s[newLen]` for cstring compatibility is set
## on any length change, including `newLen == 0`.
## Negative `newLen` is bound to zero.
let n = max(newLen, 0)
if s == nil: # early return check
return if n == 0: s else: mnewString(n) # sets everything required
if n <= s.space:
result = s # len and null-byte still need updating
else:
let sp = max(resize(s.space), n)
result = rawNewStringNoInit(sp) # len and null-byte not set
copyMem(addr(result.data), addr(s.data), s.len)
zeroMem(addr result.data[s.len], n - s.len)
result.len = n
result.data[n] = '\0'
proc setLengthStrUninit(s: var string, newlen: Natural) {.nodestroy.} =
## Sets the `s` length to `newlen` without zeroing memory on growth.
## Terminating zero for cstring compatibility is set.
var str = cast[NimString](s)
let n = max(newLen, 0)
if str == nil:
if n == 0: return
else:
str = rawNewStringNoInit(n)
str.data[n] = '\0'
str.len = n
s = cast[string](str)
else:
if n > str.space:
let sp = max(resize(str.space), n)
str = rawNewStringNoInit(sp)
copyMem(addr str.data[0], unsafeAddr s[0], s.len)
str.data[n] = '\0'
str.len = n
s = cast[string](str)
elif n != s.len:
str.data[n] = '\0'
str.len = n
else: return
# ----------------- sequences ----------------------------------------------
proc incrSeq(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =
# increments the length by one:
# this is needed for supporting ``add``;
#
# add(seq, x) generates:
# seq = incrSeq(seq, sizeof(x));
# seq[seq->len-1] = x;
result = seq
if result.len >= result.space:
let r = resize(result.space)
result = cast[PGenericSeq](growObj(result, align(GenericSeqSize, elemAlign) + elemSize * r))
result.reserved = r
inc(result.len)
proc incrSeqV3(s: PGenericSeq, typ: PNimType): PGenericSeq {.compilerproc.} =
if s == nil:
result = cast[PGenericSeq](newSeq(typ, 1))
result.len = 0
else:
result = s
if result.len >= result.space:
let r = resize(result.space)
result = cast[PGenericSeq](newSeq(typ, r))
result.len = s.len
copyMem(dataPointer(result, typ.base.align), dataPointer(s, typ.base.align), s.len * typ.base.size)
# since we steal the content from 's', it's crucial to set s's len to 0.
s.len = 0
proc newSeqUninitRaw(typ: PNimType; len: int): pointer {.inline.} =
## Creates a sequence payload with capacity and length `len` without
## forcing zero-initialization for `ntfNoRefs` element types.
result = nimNewSeqOfCap(typ, len)
cast[PGenericSeq](result).len = len
proc extendCapacityRaw(src: PGenericSeq; typ: PNimType;
elemSize, elemAlign, newLen: int;
doInit: static bool): PGenericSeq {.inline.} =
## Reallocs `src` to fit `newLen` elements without any checks.
## Capacity always increases to at least next `resize` step.
let newCap = max(resize(src.space), newLen)
when doInit:
result = cast[PGenericSeq](newSeq(typ, newCap))
else:
result = cast[PGenericSeq](newSeqUninitRaw(typ, newCap))
copyMem(dataPointer(result, elemAlign), dataPointer(src, elemAlign), src.len * elemSize)
# since we steal the content from 's', it's crucial to set s's len to 0.
src.len = 0
proc truncateRaw(src: PGenericSeq; baseFlags: set[TNimTypeFlag]; isTrivial: bool;
elemSize, elemAlign, newLen: int): PGenericSeq {.inline.} =
## Truncates `src` to `newLen` without any checks.
## Does not set `src.len`
# sysAssert src.space > newlen
# sysAssert newLen < src.len
result = src
# we need to decref here, otherwise the GC leaks!
when not defined(boehmGC) and not defined(nogc) and
not defined(gcMarkAndSweep) and not defined(gogc) and
not defined(gcRegions):
if ntfNoRefs notin baseFlags:
for i in newLen..<result.len:
forAllChildrenAux(dataPointer(result, elemAlign, elemSize, i),
extGetCellType(result).base, waZctDecRef)
# XXX: zeroing out the memory can still result in crashes if a wiped-out
# cell is aliased by another pointer (ie proc parameter or a let variable).
# This is a tough problem, because even if we don't zeroMem here, in the
# presence of user defined destructors, the user will expect the cell to be
# "destroyed" thus creating the same problem. We can destroy the cell in the
# finalizer of the sequence, but this makes destruction non-deterministic.
if not isTrivial: # optimization for trivial types
zeroMem(dataPointer(result, elemAlign, elemSize, newLen),
((result.len-%newLen) *% elemSize))
template setLengthSeqImpl(s: PGenericSeq, typ: PNimType, newLen: int; isTrivial: bool;
doInit: static bool) =
if s == nil:
if newLen == 0: return s
else:
when doInit:
return cast[PGenericSeq](newSeq(typ, newLen)) # newSeq zeroes!
else:
return cast[PGenericSeq](newSeqUninitRaw(typ, newLen))
else:
let elemSize = typ.base.size
let elemAlign = typ.base.align
result = if newLen > s.space:
s.extendCapacityRaw(typ, elemSize, elemAlign, newLen, doInit)
elif newLen < s.len:
s.truncateRaw(typ.base.flags, isTrivial, elemSize, elemAlign, newLen)
else:
when doInit:
zeroMem(dataPointer(s, elemAlign, elemSize, s.len), (newLen-%s.len) *% elemSize)
s
result.len = newLen
proc setLengthSeqUninit(s: PGenericSeq; typ: PNimType; newLen: int; isTrivial: bool): PGenericSeq {.
compilerRtl.} =
sysAssert typ.kind == tySequence, "setLengthSeqUninit: type is not a seq"
setLengthSeqImpl(s, typ, newLen, isTrivial, doInit = false)
proc setLengthSeqV2(s: PGenericSeq, typ: PNimType, newLen: int, isTrivial: bool): PGenericSeq {.
compilerRtl.} =
sysAssert typ.kind == tySequence, "setLengthSeqV2: type is not a seq"
setLengthSeqImpl(s, typ, newLen, isTrivial, doInit = true)
func capacity*(self: string): int {.inline.} =
## Returns the current capacity of the string.
# See https://github.com/nim-lang/RFCs/issues/460
runnableExamples:
var str = newStringOfCap(cap = 42)
str.add "Nim"
assert str.capacity == 42
let str = cast[NimString](self)
result = if str != nil: str.space else: 0
func capacity*[T](self: seq[T]): int {.inline.} =
## Returns the current capacity of the seq.
# See https://github.com/nim-lang/RFCs/issues/460
runnableExamples:
var lst = newSeqOfCap[string](cap = 42)
lst.add "Nim"
assert lst.capacity == 42
let sek = cast[PGenericSeq](self)
result = if sek != nil: sek.space else: 0
{.pop.}