mirror of
https://github.com/nim-lang/Nim.git
synced 2026-06-06 20:04:18 +00:00
Feat: stdlib: adds system.string.setLenUninit (#24836)
Adds `system.setLenUninit` for the `string` type. Allows setting length without initializing new memory on growth. - Required for a follow-up to #15951 - Accompanies #22767 (ref #19727) but for strings - Expands `stdlib/tstring` with tests for `setLen` and `setLenUninit` --------- Co-authored-by: Andreas Rumpf <araq4k@proton.me>
This commit is contained in:
@@ -60,6 +60,7 @@ errors.
|
||||
- `copyDirWithPermissions` to recursively preserve attributes
|
||||
|
||||
- `system.setLenUninit` now supports refc, JS and VM backends.
|
||||
- `system.setLenUninit` for the `string` type. Allows setting length without initializing new memory on growth.
|
||||
|
||||
- `std/parseopt` now supports multiple parser modes via a `CliMode` enum.
|
||||
Modes include `Nim` (default, fully compatible) and two new experimental modes:
|
||||
|
||||
@@ -2418,6 +2418,33 @@ when notJSnotNims and hasAlloc:
|
||||
when not defined(nimV2):
|
||||
include "system/repr"
|
||||
|
||||
func setLenUninit*(s: var string, newlen: Natural) {.nodestroy.} =
|
||||
## Sets the length of string `s` to `newlen`.
|
||||
## New slots will not be initialized.
|
||||
##
|
||||
## If the new length is smaller than the new length,
|
||||
## `s` will be truncated.
|
||||
let n = max(newLen, 0)
|
||||
when nimvm:
|
||||
s.setLen(n)
|
||||
else:
|
||||
when notJSnotNims:
|
||||
when defined(nimSeqsV2):
|
||||
{.noSideEffect.}:
|
||||
let str = unsafeAddr s
|
||||
when defined(nimsso):
|
||||
setLengthStrV3Uninit(cast[ptr SmallString](str)[], newlen)
|
||||
else:
|
||||
setLengthStrV2Uninit(cast[ptr NimStringV2](str)[], newlen)
|
||||
else:
|
||||
{.noSideEffect.}:
|
||||
when hasAlloc:
|
||||
setLengthStrUninit(s, newlen)
|
||||
else:
|
||||
s.setLen(n)
|
||||
else: s.setLen(n)
|
||||
|
||||
|
||||
when notJSnotNims and hasThreadSupport and hostOS != "standalone":
|
||||
when not defined(nimPreviewSlimSystem):
|
||||
include "system/channels_builtin"
|
||||
|
||||
@@ -158,6 +158,26 @@ proc setLengthStrV2(s: var NimStringV2, newLen: int) {.compilerRtl.} =
|
||||
s.p.data[newLen] = '\0'
|
||||
s.len = newLen
|
||||
|
||||
proc setLengthStrV2Uninit(s: var NimStringV2, newLen: int) =
|
||||
if newLen == 0:
|
||||
discard "do not free the buffer here, pattern 's.setLen 0' is common for avoiding allocations"
|
||||
else:
|
||||
if isLiteral(s):
|
||||
let oldP = s.p
|
||||
s.p = allocPayload(newLen)
|
||||
s.p.cap = newLen
|
||||
if s.len > 0:
|
||||
copyMem(unsafeAddr s.p.data[0], unsafeAddr oldP.data[0], min(s.len, newLen))
|
||||
s.p.data[newLen] = '\0'
|
||||
elif newLen > s.len:
|
||||
let oldCap = s.p.cap and not strlitFlag
|
||||
if newLen > oldCap:
|
||||
let newCap = max(newLen, resize(oldCap))
|
||||
s.p = reallocPayload0(s.p, oldCap, newCap)
|
||||
s.p.cap = newCap
|
||||
s.p.data[newLen] = '\0'
|
||||
s.len = newLen
|
||||
|
||||
proc nimAsgnStrV2(a: var NimStringV2, b: NimStringV2) {.compilerRtl.} =
|
||||
if a.p == b.p and a.len == b.len: return
|
||||
if isLiteral(b):
|
||||
|
||||
@@ -496,12 +496,16 @@ proc mnewString(len: int): SmallString {.compilerproc.} =
|
||||
result.more = p
|
||||
setSSLen(result, HeapSlen)
|
||||
|
||||
proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
|
||||
## Sets the length of s to newLen, zeroing new bytes on growth.
|
||||
proc setLengthStr(s: var SmallString; newLen: int; zeroing: bool) =
|
||||
# Shared implementation for setLengthStrV2 (zeroing) and setLengthStrV3Uninit
|
||||
# Difference between the two modes:
|
||||
# - inline/medium -> long growth: alloc0 (zeroing) vs alloc (uninit)
|
||||
# - long -> long growth: zeroMem the new tail (zeroing) or skip it (uninit)
|
||||
let slen = ssLen(s)
|
||||
let curLen = if slen > PayloadSize: s.more.fullLen else: slen
|
||||
if newLen == curLen: return
|
||||
if newLen <= 0:
|
||||
# Pattern 's.setLen 0' is common for avoiding allocations; do NOT free the buffer.
|
||||
if slen > PayloadSize:
|
||||
if slen == HeapSlen and s.more.rc == 1:
|
||||
s.more.fullLen = 0
|
||||
@@ -517,7 +521,11 @@ proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
|
||||
if newLen <= PayloadSize:
|
||||
let inl = inlinePtr(s)
|
||||
if newLen > curLen:
|
||||
zeroMem(addr inl[curLen], newLen - curLen)
|
||||
# Grow within inline/medium
|
||||
# Bytes above newLen already zero by the SWAR invariant,
|
||||
# so setSSLen is sufficient.
|
||||
if zeroing:
|
||||
zeroMem(addr inl[curLen], newLen - curLen)
|
||||
inl[newLen] = '\0'
|
||||
setSSLen(s, newLen)
|
||||
else:
|
||||
@@ -542,18 +550,23 @@ proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
|
||||
else:
|
||||
# grow into long
|
||||
let newCap = resize(newLen)
|
||||
let p = cast[ptr LongString](alloc0(LongStringDataOffset + newCap + 1))
|
||||
let p = if zeroing:
|
||||
# bytes [curLen..newLen] and p.data[newLen] zeroed by alloc0
|
||||
cast[ptr LongString](alloc0(LongStringDataOffset + newCap + 1))
|
||||
else:
|
||||
let p = cast[ptr LongString](alloc(LongStringDataOffset + newCap + 1))
|
||||
p.data[newLen] = '\0'
|
||||
p
|
||||
p.rc = 1
|
||||
p.fullLen = newLen
|
||||
p.capImpl = newCap
|
||||
copyMem(addr p.data[0], inlinePtr(s), curLen)
|
||||
# bytes [curLen..newLen] zeroed by alloc0; p.data[newLen] = '\0' by alloc0
|
||||
s.more = p
|
||||
setSSLen(s, HeapSlen)
|
||||
else:
|
||||
# currently long
|
||||
if newLen <= PayloadSize:
|
||||
# shrink back to inline
|
||||
# shrink back to inline/medium
|
||||
let old = s.more
|
||||
let inl = inlinePtr(s)
|
||||
copyMem(inl, addr old.data[0], newLen)
|
||||
@@ -574,11 +587,19 @@ proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
|
||||
else:
|
||||
setSSLen(s, newLen)
|
||||
else:
|
||||
ensureUniqueLong(s, curLen, newLen)
|
||||
# long -> long
|
||||
ensureUniqueLong(s, curLen, newLen) # sets fullLen = newLen
|
||||
if newLen > curLen:
|
||||
zeroMem(addr s.more.data[curLen], newLen - curLen)
|
||||
s.more.data[newLen] = '\0'
|
||||
s.more.fullLen = newLen
|
||||
|
||||
proc setLengthStrV2(s: var SmallString; newLen: int) {.compilerRtl.} =
|
||||
## Sets the length of `s` to `newLen`, zeroing new bytes on growth.
|
||||
setLengthStr(s, newLen, zeroing = true)
|
||||
|
||||
proc setLengthStrV3Uninit(s: var SmallString; newLen: int) {.compilerRtl.} =
|
||||
## Sets the length of `s` to `newLen`, NOT zeroing new bytes on growth.
|
||||
setLengthStr(s, newLen, zeroing = false)
|
||||
|
||||
proc nimAsgnStrV2(a: var SmallString; b: SmallString) {.compilerRtl, inline.} =
|
||||
if ssLen(b) <= PayloadSize:
|
||||
|
||||
@@ -244,6 +244,31 @@ proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
|
||||
result.len = n
|
||||
result.data[n] = '\0'
|
||||
|
||||
proc setLengthStrUninit(s: var string, newlen: Natural) {.nodestroy.} =
|
||||
## Sets the `s` length to `newlen` without zeroing memory on growth.
|
||||
## Terminating zero for cstring compatibility is set.
|
||||
var str = cast[NimString](s)
|
||||
let n = max(newLen, 0)
|
||||
if str == nil:
|
||||
if n == 0: return
|
||||
else:
|
||||
str = rawNewStringNoInit(n)
|
||||
str.data[n] = '\0'
|
||||
str.len = n
|
||||
s = cast[string](str)
|
||||
else:
|
||||
if n > str.space:
|
||||
let sp = max(resize(str.space), n)
|
||||
str = rawNewStringNoInit(sp)
|
||||
copyMem(addr str.data[0], unsafeAddr s[0], s.len)
|
||||
str.data[n] = '\0'
|
||||
str.len = n
|
||||
s = cast[string](str)
|
||||
elif n < s.len:
|
||||
str.data[n] = '\0'
|
||||
str.len = n
|
||||
else: return
|
||||
|
||||
# ----------------- sequences ----------------------------------------------
|
||||
|
||||
proc incrSeq(seq: PGenericSeq, elemSize, elemAlign: int): PGenericSeq {.compilerproc.} =
|
||||
|
||||
@@ -1,20 +1,24 @@
|
||||
discard """
|
||||
matrix: "--mm:refc; --mm:orc"
|
||||
targets: "c cpp js"
|
||||
matrix: "--backend:c --mm:refc; --backend:c --mm:orc; --backend:c --mm:orc -d:nimsso; --backend:cpp --mm:refc; --backend:cpp --mm:orc; --backend:js --mm:refc; --backend:js --mm:orc"
|
||||
"""
|
||||
|
||||
from std/sequtils import toSeq, map
|
||||
from std/sugar import `=>`
|
||||
import std/assertions
|
||||
|
||||
const hasNativeSso = defined(nimsso) and
|
||||
(defined(gcArc) or defined(gcAtomicArc) or defined(gcOrc) or defined(gcYrc))
|
||||
|
||||
proc tester[T](x: T) =
|
||||
let test = toSeq(0..4).map(i => newSeq[int]())
|
||||
doAssert $test == "@[@[], @[], @[], @[], @[]]"
|
||||
|
||||
func reverse*(a: string): string =
|
||||
result = a
|
||||
for i in 0 ..< a.len div 2:
|
||||
swap(result[i], result[^(i + 1)])
|
||||
when not hasNativeSso:
|
||||
func reverse*(a: string): string =
|
||||
result = a
|
||||
for i in 0 ..< a.len div 2:
|
||||
let j = result.len - i - 1
|
||||
swap(result[i], result[j])
|
||||
|
||||
proc main() =
|
||||
block: # ..
|
||||
@@ -94,31 +98,148 @@ proc main() =
|
||||
block: # bug #7816
|
||||
tester(1)
|
||||
|
||||
block: # bug #14497, reverse
|
||||
doAssert reverse("hello") == "olleh"
|
||||
when not hasNativeSso:
|
||||
block: # bug #14497, reverse
|
||||
doAssert reverse("hello") == "olleh"
|
||||
|
||||
block: # len, high
|
||||
var a = "ab\0cd"
|
||||
var b = a.cstring
|
||||
doAssert a.len == 5
|
||||
block: # bug #16405
|
||||
when defined(js):
|
||||
when nimvm: doAssert b.len == 2
|
||||
else: doAssert b.len == 5
|
||||
else: doAssert b.len == 2
|
||||
|
||||
doAssert a.high == a.len - 1
|
||||
doAssert b.high == b.len - 1
|
||||
|
||||
when not (hasNativeSso and defined(cpp)):
|
||||
let b = a.cstring
|
||||
block: # bug #16405
|
||||
when defined(js):
|
||||
when nimvm: doAssert b.len == 2
|
||||
else: doAssert b.len == 5
|
||||
else: doAssert b.len == 2
|
||||
doAssert b.high == b.len - 1
|
||||
|
||||
doAssert "".len == 0
|
||||
doAssert "".high == -1
|
||||
doAssert "".cstring.len == 0
|
||||
doAssert "".cstring.high == -1
|
||||
when not (hasNativeSso and defined(cpp)):
|
||||
doAssert "".cstring.len == 0
|
||||
doAssert "".cstring.high == -1
|
||||
|
||||
block: # bug #16674
|
||||
var c: cstring = nil
|
||||
doAssert c.len == 0
|
||||
doAssert c.high == -1
|
||||
|
||||
block: # setLen, setLenUninit
|
||||
when hasNativeSso:
|
||||
const
|
||||
alwaysAvail = sizeof(uint) - 1
|
||||
payloadSize = sizeof(uint) + sizeof(pointer) - 2
|
||||
longStringDataOffset = 3 * sizeof(int)
|
||||
|
||||
template rawSlenOf(s: string): int =
|
||||
int(cast[ptr byte](unsafeAddr s)[])
|
||||
|
||||
template inlineDataOf(s: string): ptr UncheckedArray[char] =
|
||||
cast[ptr UncheckedArray[char]](cast[uint](unsafeAddr s) + 1'u)
|
||||
|
||||
template longDataOf(s: string): ptr UncheckedArray[char] =
|
||||
let ssPtr = cast[ptr tuple[bytes: uint, more: pointer]](unsafeAddr s)
|
||||
cast[ptr UncheckedArray[char]](
|
||||
cast[uint](ssPtr.more) + uint(longStringDataOffset))
|
||||
|
||||
proc checkStrInternals(s: string; expectedLen: int) =
|
||||
doAssert s.len == expectedLen, "expected " & $expectedLen & ", got " & $s.len
|
||||
when nimvm:
|
||||
discard
|
||||
else:
|
||||
when hasNativeSso and not defined(js) and not defined(nimscript):
|
||||
# SSO
|
||||
let rawSlen = rawSlenOf(s)
|
||||
if rawSlen > payloadSize:
|
||||
doAssert rawSlen == 255
|
||||
let data = longDataOf(s)
|
||||
doAssert data[expectedLen] == '\0'
|
||||
else:
|
||||
doAssert rawSlen == expectedLen
|
||||
let data = inlineDataOf(s)
|
||||
doAssert data[expectedLen] == '\0'
|
||||
if expectedLen < alwaysAvail:
|
||||
for i in expectedLen + 1 ..< alwaysAvail:
|
||||
doAssert data[i] == '\0'
|
||||
elif defined(UncheckedArray): # skip JS
|
||||
# string V2
|
||||
let cs = s.cstring
|
||||
let arr = cast[ptr UncheckedArray[char]](unsafeAddr cs[0])
|
||||
doAssert arr[expectedLen] == '\0'
|
||||
|
||||
proc makeStr(n: int): string =
|
||||
result = newStringOfCap(n)
|
||||
for i in 0..<n:
|
||||
result.add char(ord('a') + i mod 26)
|
||||
|
||||
proc checkSetLenUninit(oldLen, newLen: int; cmpAfter = -1) =
|
||||
var s = makeStr(oldLen)
|
||||
let prefixLen = min(oldLen, newLen)
|
||||
let prefix = makeStr(prefixLen)
|
||||
s.setLenUninit(newLen)
|
||||
s.checkStrInternals(newLen)
|
||||
doAssert s[0..<prefixLen] == prefix
|
||||
if newLen <= oldLen:
|
||||
doAssert s == prefix
|
||||
if cmpAfter >= 0:
|
||||
doAssert s < makeStr(cmpAfter)
|
||||
|
||||
const numbers = "1234567890"
|
||||
block setLen:
|
||||
# Trim to zero and grow past the old end. Must keep the prefix and zero the tail.
|
||||
var s = numbers
|
||||
s.setLen(0)
|
||||
s.checkStrInternals(0)
|
||||
doAssert s == ""
|
||||
|
||||
s = numbers
|
||||
s.setLen(numbers.len + 1)
|
||||
s.checkStrInternals(numbers.len + 1)
|
||||
doAssert s[0..numbers.high] == numbers
|
||||
doAssert s[numbers.len] == '\0'
|
||||
|
||||
block setLenUninit:
|
||||
# Shared baseline for both SSO and V2: noop, shrink, grow.
|
||||
checkSetLenUninit(numbers.len, numbers.len)
|
||||
checkSetLenUninit(numbers.len, 5)
|
||||
checkSetLenUninit(numbers.len, 11)
|
||||
|
||||
when hasNativeSso:
|
||||
const
|
||||
shortLen = alwaysAvail
|
||||
medLen = payloadSize
|
||||
longLen = payloadSize + 8
|
||||
|
||||
# Staying short and verify short-compare padding after shrink.
|
||||
checkSetLenUninit(shortLen, shortLen - 1, shortLen)
|
||||
checkSetLenUninit(shortLen - 2, shortLen - 1)
|
||||
checkSetLenUninit(shortLen, 0)
|
||||
|
||||
# Cross the short/medium boundary in both directions.
|
||||
checkSetLenUninit(medLen, medLen - 1)
|
||||
checkSetLenUninit(medLen, alwaysAvail - 1, alwaysAvail)
|
||||
checkSetLenUninit(alwaysAvail, medLen)
|
||||
|
||||
# Cross the inline/long boundary in both directions and cover long growth.
|
||||
checkSetLenUninit(longLen, longLen - 2)
|
||||
checkSetLenUninit(longLen, medLen - 1)
|
||||
checkSetLenUninit(longLen, alwaysAvail - 1, alwaysAvail)
|
||||
checkSetLenUninit(medLen, longLen)
|
||||
checkSetLenUninit(longLen, longLen + 10)
|
||||
checkSetLenUninit(longLen, 0)
|
||||
|
||||
when not defined(js) and not defined(nimscript):
|
||||
# shared long strings must not mutate the original when grown
|
||||
let src = makeStr(longLen)
|
||||
var orig = src
|
||||
var copy = orig
|
||||
copy.setLenUninit(longLen + 4)
|
||||
copy.checkStrInternals(longLen + 4)
|
||||
doAssert orig == src
|
||||
doAssert copy[0..<longLen] == src
|
||||
|
||||
static: main()
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user