Files
Nim/lib/system/seqs_v2.nim
ringabout 854c1f15ba fixes #25687; optimizes seq assignment for orc (#25689)
fixes #25687

This pull request introduces an optimization for sequence (`seq`)
assignments and copies in the Nim compiler, enabling bulk memory copying
for sequences whose element types are trivially copyable (i.e., no GC
references or destructors). This can significantly improve performance
for such types by avoiding per-element loops.

Key changes:

### Compiler code generation improvements

* Added the `elemSupportsCopyMem` function in
`compiler/liftdestructors.nim` to detect if a sequence's element type is
trivially copyable (no GC refs, no destructors).
* Updated the `fillSeqOp` procedure to use a new `genBulkCopySeq` code
path for eligible element types, generating a call to
`nimCopySeqPayload` for efficient bulk copying. Fallback to the
element-wise loop remains for non-trivial types.
[[1]](diffhunk://#diff-456118dde9a4e21f1b351fd72504d62fc16e9c30354dbb9a3efcb95a29067863R665-R670)
[[2]](diffhunk://#diff-456118dde9a4e21f1b351fd72504d62fc16e9c30354dbb9a3efcb95a29067863R623-R655)

### Runtime support

* Introduced the `nimCopySeqPayload` procedure in
`lib/system/seqs_v2.nim`, which performs the actual bulk memory copy of
sequence data using `copyMem`. This is only used for types that are safe
for such an operation.

These changes collectively improve the efficiency of sequence operations
for simple types, while maintaining correctness for complex types.


### Benchmarked the original micro-benchmark:
refc: 3.52s user 0.02s system 99% cpu 3.538 total
orc (after change): 3.46s user 0.01s system 99% cpu 3.476 total

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-02 11:46:49 +02:00

324 lines
11 KiB
Nim

#
#
# Nim's Runtime Library
# (c) Copyright 2017 Nim contributors
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# import std/typetraits
# strs already imported allocateds for us.
when defined(gcYrc):
include rwlocks
include threadids
const
NumLockStripes = 64
type
YrcLockState = enum
HasNoLock
HasMutatorLock
HasCollectorLock
Collecting
AlignedRwLock = object
## One RwLock per cache line. {.align: 64.} causes the compiler to round
## the struct size up to 64 bytes, so consecutive array elements never
## share a cache line (sizeof(RwLock) = 56 on Linux x86_64 → 8 byte pad).
lock {.align: 64.}: RwLock
var
gYrcLocks: array[NumLockStripes, AlignedRwLock]
var
lockState {.threadvar.}: YrcLockState
proc getYrcStripe(): int {.inline.} =
## Map this thread to one of the NumLockStripes RwLock stripes.
## getThreadId() is already cached thread-locally in threadids.nim.
getThreadId() and (NumLockStripes - 1)
proc acquireMutatorLock() {.compilerRtl, inl.} =
if lockState == HasNoLock:
acquireRead gYrcLocks[getYrcStripe()].lock
lockState = HasMutatorLock
proc releaseMutatorLock() {.compilerRtl, inl.} =
if lockState == HasMutatorLock:
lockState = HasNoLock
releaseRead gYrcLocks[getYrcStripe()].lock
template yrcMutatorLock*(t: typedesc; body: untyped) =
{.noSideEffect.}:
when canFormCycles(t):
acquireMutatorLock()
try:
body
finally:
{.noSideEffect.}:
when canFormCycles(t):
releaseMutatorLock()
template yrcMutatorLockUntyped(body: untyped) =
{.noSideEffect.}:
acquireMutatorLock()
try:
body
finally:
{.noSideEffect.}:
releaseMutatorLock()
template yrcCollectorLock(body: untyped) =
if lockState == HasMutatorLock: releaseMutatorLock()
let prevState = lockState
let hadToAcquire = prevState < HasCollectorLock
if hadToAcquire:
# Acquire all stripes in ascending order — the only thread ever holding
# multiple write locks is the collector, so there is no lock-order cycle.
for yrcI in 0..<NumLockStripes:
acquireWrite(gYrcLocks[yrcI].lock)
lockState = HasCollectorLock
try:
body
finally:
if hadToAcquire:
for yrcI in 0..<NumLockStripes:
releaseWrite(gYrcLocks[yrcI].lock)
lockState = prevState
else:
template yrcMutatorLock*(t: typedesc; body: untyped) =
body
template yrcMutatorLockUntyped(body: untyped) =
body
# Some optimizations here may be not to empty-seq-initialize some symbols, then StrictNotNil complains.
{.push warning[StrictNotNil]: off.} # See https://github.com/nim-lang/Nim/issues/21401
## Default seq implementation used by Nim's core.
type
NimSeqPayloadBase = object
cap: int
NimSeqPayload[T] = object
cap: int
data: UncheckedArray[T]
NimSeqV2*[T] = object # \
# if you change this implementation, also change seqs_v2_reimpl.nim!
len: int
p: ptr NimSeqPayload[T]
NimRawSeq = object
len: int
p: pointer
const nimSeqVersion {.core.} = 2
# XXX make code memory safe for overflows in '*'
proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises: [].} =
# we have to use type erasure here as Nim does not support generic
# compilerProcs. Oh well, this will all be inlined anyway.
if cap > 0:
var p = cast[ptr NimSeqPayloadBase](alignedAlloc0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
p.cap = cap
result = p
else:
result = nil
proc newSeqPayloadUninit(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises: [].} =
# Used in `newSeqOfCap()`.
if cap > 0:
var p = cast[ptr NimSeqPayloadBase](alignedAlloc(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
p.cap = cap
result = p
else:
result = nil
proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
noSideEffect, tags: [], raises: [], compilerRtl.} =
{.noSideEffect.}:
let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
if addlen <= 0:
result = p
elif p == nil:
result = newSeqPayload(len+addlen, elemSize, elemAlign)
else:
# Note: this means we cannot support things that have internal pointers as
# they get reallocated here. This needs to be documented clearly.
var p = cast[ptr NimSeqPayloadBase](p)
let oldCap = p.cap and not strlitFlag
let newCap = max(resize(oldCap), len+addlen)
var q: ptr NimSeqPayloadBase
if (p.cap and strlitFlag) == strlitFlag:
q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
copyMem(q +! headerSize, p +! headerSize, len * elemSize)
else:
let oldSize = headerSize + elemSize * oldCap
let newSize = headerSize + elemSize * newCap
q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
q.cap = newCap
result = q
proc zeroNewElements(len: int; q: pointer; addlen, elemSize, elemAlign: int) {.
noSideEffect, tags: [], raises: [], compilerRtl.} =
{.noSideEffect.}:
let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
zeroMem(q +! headerSize +! len * elemSize, addlen * elemSize)
proc prepareSeqAddUninit(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
noSideEffect, tags: [], raises: [], compilerRtl.} =
{.noSideEffect.}:
let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
if addlen <= 0:
result = p
elif p == nil:
result = newSeqPayloadUninit(len+addlen, elemSize, elemAlign)
else:
# Note: this means we cannot support things that have internal pointers as
# they get reallocated here. This needs to be documented clearly.
var p = cast[ptr NimSeqPayloadBase](p)
let oldCap = p.cap and not strlitFlag
let newCap = max(resize(oldCap), len+addlen)
if (p.cap and strlitFlag) == strlitFlag:
var q = cast[ptr NimSeqPayloadBase](alignedAlloc(headerSize + elemSize * newCap, elemAlign))
copyMem(q +! headerSize, p +! headerSize, len * elemSize)
q.cap = newCap
result = q
else:
let oldSize = headerSize + elemSize * oldCap
let newSize = headerSize + elemSize * newCap
var q = cast[ptr NimSeqPayloadBase](alignedRealloc(p, oldSize, newSize, elemAlign))
q.cap = newCap
result = q
proc shrink*[T](x: var seq[T]; newLen: Natural) {.tags: [], raises: [], noSideEffect.} =
when nimvm:
{.cast(tags: []).}:
setLen(x, newLen)
else:
#sysAssert newLen <= x.len, "invalid newLen parameter for 'shrink'"
yrcMutatorLock(T):
when not supportsCopyMem(T):
for i in countdown(x.len - 1, newLen):
reset x[i]
# XXX This is wrong for const seqs that were moved into 'x'!
{.noSideEffect.}:
cast[ptr NimSeqV2[T]](addr x).len = newLen
proc grow*[T](x: var seq[T]; newLen: Natural; value: T) {.nodestroy.} =
let oldLen = x.len
#sysAssert newLen >= x.len, "invalid newLen parameter for 'grow'"
if newLen <= oldLen: return
yrcMutatorLock(T):
var xu = cast[ptr NimSeqV2[T]](addr x)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newLen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
xu.len = newLen
for i in oldLen .. newLen-1:
when (NimMajor, NimMinor, NimPatch) >= (2, 3, 1):
xu.p.data[i] = `=dup`(value)
else:
wasMoved(xu.p.data[i])
`=copy`(xu.p.data[i], value)
proc add*[T](x: var seq[T]; y: sink T) {.magic: "AppendSeqElem", noSideEffect, nodestroy.} =
## Generic proc for adding a data item `y` to a container `x`.
##
## For containers that have an order, `add` means *append*. New generic
## containers should also call their adding proc `add` for consistency.
## Generic code becomes much easier to write if the Nim naming scheme is
## respected.
{.cast(noSideEffect).}:
yrcMutatorLock(T):
let oldLen = x.len
var xu = cast[ptr NimSeqV2[T]](addr x)
if xu.p == nil or (xu.p.cap and not strlitFlag) < oldLen+1:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, 1, sizeof(T), alignof(T)))
xu.len = oldLen+1
# .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
# copyMem(). This is fine as know by construction that
# in `xu.p.data[oldLen]` there is nothing to destroy.
# We also save the `wasMoved + destroy` pair for the sink parameter.
xu.p.data[oldLen] = y
proc setLen[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
{.noSideEffect.}:
if newlen < s.len:
shrink(s, newlen)
else:
yrcMutatorLock(T):
let oldLen = s.len
if newlen <= oldLen: return
var xu = cast[ptr NimSeqV2[T]](addr s)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
xu.len = newlen
for i in oldLen..<newlen:
xu.p.data[i] = default(T)
proc newSeq[T](s: var seq[T], len: Natural) =
shrink(s, 0)
setLen(s, len)
proc sameSeqPayload(x: pointer, y: pointer): bool {.compilerRtl, inl.} =
result = cast[ptr NimRawSeq](x)[].p == cast[ptr NimRawSeq](y)[].p
proc nimCopySeqPayload(dest: pointer, src: pointer, elemSize: int, elemAlign: int) {.compilerRtl, inl.} =
## Bulk-copies the payload data from src seq to dest seq using copyMem.
## Only valid for trivially copyable element types (no GC refs, no destructors).
## Caller must have already ensured dest has the correct length and capacity
## (e.g. via setLen).
let d = cast[ptr NimRawSeq](dest)
let s = cast[ptr NimRawSeq](src)
if s.len > 0:
let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
copyMem(d.p +! headerSize, s.p +! headerSize, s.len * elemSize)
func capacity*[T](self: seq[T]): int {.inline.} =
## Returns the current capacity of the seq.
# See https://github.com/nim-lang/RFCs/issues/460
runnableExamples:
var lst = newSeqOfCap[string](cap = 42)
lst.add "Nim"
assert lst.capacity == 42
let sek = cast[ptr NimSeqV2[T]](unsafeAddr self)
result = if sek.p != nil: sek.p.cap and not strlitFlag else: 0
func setLenUninit[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
## Sets the length of seq `s` to `newlen`. `T` may be any sequence type.
## New slots will not be initialized.
##
## If the current length is greater than the new length,
## `s` will be truncated.
## ```nim
## var x = @[10, 20]
## x.setLenUninit(5)
## x[4] = 50
## assert x[4] == 50
## x.setLenUninit(1)
## assert x == @[10]
## ```
{.noSideEffect.}:
if newlen < s.len:
shrink(s, newlen)
else:
yrcMutatorLock(T):
let oldLen = s.len
if newlen <= oldLen: return
var xu = cast[ptr NimSeqV2[T]](addr s)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
xu.len = newlen
{.pop.} # See https://github.com/nim-lang/Nim/issues/21401