mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-19 05:50:30 +00:00
fixes #25687 This pull request introduces an optimization for sequence (`seq`) assignments and copies in the Nim compiler, enabling bulk memory copying for sequences whose element types are trivially copyable (i.e., no GC references or destructors). This can significantly improve performance for such types by avoiding per-element loops. Key changes: ### Compiler code generation improvements * Added the `elemSupportsCopyMem` function in `compiler/liftdestructors.nim` to detect if a sequence's element type is trivially copyable (no GC refs, no destructors). * Updated the `fillSeqOp` procedure to use a new `genBulkCopySeq` code path for eligible element types, generating a call to `nimCopySeqPayload` for efficient bulk copying. Fallback to the element-wise loop remains for non-trivial types. [[1]](diffhunk://#diff-456118dde9a4e21f1b351fd72504d62fc16e9c30354dbb9a3efcb95a29067863R665-R670) [[2]](diffhunk://#diff-456118dde9a4e21f1b351fd72504d62fc16e9c30354dbb9a3efcb95a29067863R623-R655) ### Runtime support * Introduced the `nimCopySeqPayload` procedure in `lib/system/seqs_v2.nim`, which performs the actual bulk memory copy of sequence data using `copyMem`. This is only used for types that are safe for such an operation. These changes collectively improve the efficiency of sequence operations for simple types, while maintaining correctness for complex types. ### Benchmarked the original micro-benchmark: refc: 3.52s user 0.02s system 99% cpu 3.538 total orc (after change): 3.46s user 0.01s system 99% cpu 3.476 total --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -620,11 +620,34 @@ proc checkSelfAssignment(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
cond.typ = getSysType(c.g, c.info, tyBool)
|
||||
body.add genIf(c, cond, newTreeI(nkReturnStmt, c.info, newNodeI(nkEmpty, c.info)))
|
||||
|
||||
proc genBulkCopySeq(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
## Generates a call to nimCopySeqPayload for bulk memcpy of seq data.
|
||||
let elemType = t.elementType
|
||||
let sym = magicsys.getCompilerProc(c.g, "nimCopySeqPayload")
|
||||
if sym == nil:
|
||||
localError(c.g.config, c.info, "system module needs: nimCopySeqPayload")
|
||||
return
|
||||
var sizeOf = genBuiltin(c, mSizeOf, "sizeof", newNodeIT(nkType, c.info, elemType))
|
||||
sizeOf.typ = getSysType(c.g, c.info, tyInt)
|
||||
var alignOf = genBuiltin(c, mAlignOf, "alignof", newNodeIT(nkType, c.info, elemType))
|
||||
alignOf.typ = getSysType(c.g, c.info, tyInt)
|
||||
let call = newNodeI(nkCall, c.info)
|
||||
call.add newSymNode(sym)
|
||||
call.add newTreeIT(nkAddr, c.info, makePtrType(c.fn, x.typ, c.idgen), x)
|
||||
call.add newTreeIT(nkAddr, c.info, makePtrType(c.fn, y.typ, c.idgen), y)
|
||||
call.add sizeOf
|
||||
call.add alignOf
|
||||
call.typ = sym.typ.returnType
|
||||
body.add call
|
||||
|
||||
proc fillSeqOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
case c.kind
|
||||
of attachedDup:
|
||||
body.add setLenSeqCall(c, t, x, y)
|
||||
forallElements(c, t, body, x, y)
|
||||
if supportsCopyMem(t.elementType):
|
||||
genBulkCopySeq(c, t, body, x, y)
|
||||
else:
|
||||
forallElements(c, t, body, x, y)
|
||||
of attachedAsgn, attachedDeepCopy:
|
||||
# we generate:
|
||||
# if x.p == y.p:
|
||||
@@ -633,9 +656,13 @@ proc fillSeqOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
# var i = 0
|
||||
# while i < y.len: dest[i] = y[i]; inc(i)
|
||||
# This is usually more efficient than a destroy/create pair.
|
||||
# For trivially copyable types, use bulk copyMem instead of element loop.
|
||||
checkSelfAssignment(c, t, body, x, y)
|
||||
body.add setLenSeqCall(c, t, x, y)
|
||||
forallElements(c, t, body, x, y)
|
||||
if supportsCopyMem(t.elementType):
|
||||
genBulkCopySeq(c, t, body, x, y)
|
||||
else:
|
||||
forallElements(c, t, body, x, y)
|
||||
of attachedSink:
|
||||
let moveCall = genBuiltin(c, mMove, "move", x)
|
||||
moveCall.add y
|
||||
|
||||
@@ -232,10 +232,7 @@ proc evalTypeTrait(c: PContext; traitCall: PNode, operand: PType, context: PSym)
|
||||
of "stripGenericParams":
|
||||
result = uninstantiate(operand).toNode(traitCall.info)
|
||||
of "supportsCopyMem":
|
||||
let t = operand.skipTypes({tyVar, tyLent, tyGenericInst, tyAlias, tySink, tyInferred})
|
||||
let complexObj = containsGarbageCollectedRef(t) or
|
||||
hasDestructor(t)
|
||||
result = newIntNodeT(toInt128(ord(not complexObj)), traitCall, c.idgen, c.graph)
|
||||
result = newIntNodeT(toInt128(ord(supportsCopyMem(operand))), traitCall, c.idgen, c.graph)
|
||||
of "canFormCycles":
|
||||
result = newIntNodeT(toInt128(ord(types.canFormAcycle(c.graph, operand))), traitCall, c.idgen, c.graph)
|
||||
of "hasDefaultValue":
|
||||
|
||||
@@ -1779,3 +1779,7 @@ proc reduceToBase*(f: PType): PType =
|
||||
result = f.elementType
|
||||
else:
|
||||
result = f
|
||||
|
||||
proc supportsCopyMem*(t: PType): bool =
|
||||
let t = t.skipTypes({tyVar, tyLent, tyGenericInst, tyAlias, tySink, tyInferred})
|
||||
result = not containsGarbageCollectedRef(t) and not hasDestructor(t)
|
||||
|
||||
@@ -272,6 +272,16 @@ proc newSeq[T](s: var seq[T], len: Natural) =
|
||||
proc sameSeqPayload(x: pointer, y: pointer): bool {.compilerRtl, inl.} =
|
||||
result = cast[ptr NimRawSeq](x)[].p == cast[ptr NimRawSeq](y)[].p
|
||||
|
||||
proc nimCopySeqPayload(dest: pointer, src: pointer, elemSize: int, elemAlign: int) {.compilerRtl, inl.} =
|
||||
## Bulk-copies the payload data from src seq to dest seq using copyMem.
|
||||
## Only valid for trivially copyable element types (no GC refs, no destructors).
|
||||
## Caller must have already ensured dest has the correct length and capacity
|
||||
## (e.g. via setLen).
|
||||
let d = cast[ptr NimRawSeq](dest)
|
||||
let s = cast[ptr NimRawSeq](src)
|
||||
if s.len > 0:
|
||||
let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign)
|
||||
copyMem(d.p +! headerSize, s.p +! headerSize, s.len * elemSize)
|
||||
|
||||
func capacity*[T](self: seq[T]): int {.inline.} =
|
||||
## Returns the current capacity of the seq.
|
||||
|
||||
Reference in New Issue
Block a user