From 854c1f15bada3055fb041cc7ba96378c32d34667 Mon Sep 17 00:00:00 2001 From: ringabout <43030857+ringabout@users.noreply.github.com> Date: Thu, 2 Apr 2026 17:46:49 +0800 Subject: [PATCH] fixes #25687; optimizes seq assignment for orc (#25689) fixes #25687 This pull request introduces an optimization for sequence (`seq`) assignments and copies in the Nim compiler, enabling bulk memory copying for sequences whose element types are trivially copyable (i.e., no GC references or destructors). This can significantly improve performance for such types by avoiding per-element loops. Key changes: ### Compiler code generation improvements * Added the `elemSupportsCopyMem` function in `compiler/liftdestructors.nim` to detect if a sequence's element type is trivially copyable (no GC refs, no destructors). * Updated the `fillSeqOp` procedure to use a new `genBulkCopySeq` code path for eligible element types, generating a call to `nimCopySeqPayload` for efficient bulk copying. Fallback to the element-wise loop remains for non-trivial types. [[1]](diffhunk://#diff-456118dde9a4e21f1b351fd72504d62fc16e9c30354dbb9a3efcb95a29067863R665-R670) [[2]](diffhunk://#diff-456118dde9a4e21f1b351fd72504d62fc16e9c30354dbb9a3efcb95a29067863R623-R655) ### Runtime support * Introduced the `nimCopySeqPayload` procedure in `lib/system/seqs_v2.nim`, which performs the actual bulk memory copy of sequence data using `copyMem`. This is only used for types that are safe for such an operation. These changes collectively improve the efficiency of sequence operations for simple types, while maintaining correctness for complex types. ### Benchmarked the original micro-benchmark: refc: 3.52s user 0.02s system 99% cpu 3.538 total orc (after change): 3.46s user 0.01s system 99% cpu 3.476 total --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- compiler/liftdestructors.nim | 31 +++++++++++++++++++++++++++++-- compiler/semmagic.nim | 5 +---- compiler/types.nim | 4 ++++ lib/system/seqs_v2.nim | 10 ++++++++++ 4 files changed, 44 insertions(+), 6 deletions(-) diff --git a/compiler/liftdestructors.nim b/compiler/liftdestructors.nim index e05b14d460..a2f3c94cde 100644 --- a/compiler/liftdestructors.nim +++ b/compiler/liftdestructors.nim @@ -620,11 +620,34 @@ proc checkSelfAssignment(c: var TLiftCtx; t: PType; body, x, y: PNode) = cond.typ = getSysType(c.g, c.info, tyBool) body.add genIf(c, cond, newTreeI(nkReturnStmt, c.info, newNodeI(nkEmpty, c.info))) +proc genBulkCopySeq(c: var TLiftCtx; t: PType; body, x, y: PNode) = + ## Generates a call to nimCopySeqPayload for bulk memcpy of seq data. + let elemType = t.elementType + let sym = magicsys.getCompilerProc(c.g, "nimCopySeqPayload") + if sym == nil: + localError(c.g.config, c.info, "system module needs: nimCopySeqPayload") + return + var sizeOf = genBuiltin(c, mSizeOf, "sizeof", newNodeIT(nkType, c.info, elemType)) + sizeOf.typ = getSysType(c.g, c.info, tyInt) + var alignOf = genBuiltin(c, mAlignOf, "alignof", newNodeIT(nkType, c.info, elemType)) + alignOf.typ = getSysType(c.g, c.info, tyInt) + let call = newNodeI(nkCall, c.info) + call.add newSymNode(sym) + call.add newTreeIT(nkAddr, c.info, makePtrType(c.fn, x.typ, c.idgen), x) + call.add newTreeIT(nkAddr, c.info, makePtrType(c.fn, y.typ, c.idgen), y) + call.add sizeOf + call.add alignOf + call.typ = sym.typ.returnType + body.add call + proc fillSeqOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = case c.kind of attachedDup: body.add setLenSeqCall(c, t, x, y) - forallElements(c, t, body, x, y) + if supportsCopyMem(t.elementType): + genBulkCopySeq(c, t, body, x, y) + else: + forallElements(c, t, body, x, y) of attachedAsgn, attachedDeepCopy: # we generate: # if x.p == y.p: @@ -633,9 +656,13 @@ proc fillSeqOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = # var i = 0 # while i < y.len: dest[i] = y[i]; inc(i) # This is usually more efficient than a destroy/create pair. + # For trivially copyable types, use bulk copyMem instead of element loop. checkSelfAssignment(c, t, body, x, y) body.add setLenSeqCall(c, t, x, y) - forallElements(c, t, body, x, y) + if supportsCopyMem(t.elementType): + genBulkCopySeq(c, t, body, x, y) + else: + forallElements(c, t, body, x, y) of attachedSink: let moveCall = genBuiltin(c, mMove, "move", x) moveCall.add y diff --git a/compiler/semmagic.nim b/compiler/semmagic.nim index 87e085d4fd..5e4388d3c6 100644 --- a/compiler/semmagic.nim +++ b/compiler/semmagic.nim @@ -232,10 +232,7 @@ proc evalTypeTrait(c: PContext; traitCall: PNode, operand: PType, context: PSym) of "stripGenericParams": result = uninstantiate(operand).toNode(traitCall.info) of "supportsCopyMem": - let t = operand.skipTypes({tyVar, tyLent, tyGenericInst, tyAlias, tySink, tyInferred}) - let complexObj = containsGarbageCollectedRef(t) or - hasDestructor(t) - result = newIntNodeT(toInt128(ord(not complexObj)), traitCall, c.idgen, c.graph) + result = newIntNodeT(toInt128(ord(supportsCopyMem(operand))), traitCall, c.idgen, c.graph) of "canFormCycles": result = newIntNodeT(toInt128(ord(types.canFormAcycle(c.graph, operand))), traitCall, c.idgen, c.graph) of "hasDefaultValue": diff --git a/compiler/types.nim b/compiler/types.nim index e18f97ff36..62831624b9 100644 --- a/compiler/types.nim +++ b/compiler/types.nim @@ -1779,3 +1779,7 @@ proc reduceToBase*(f: PType): PType = result = f.elementType else: result = f + +proc supportsCopyMem*(t: PType): bool = + let t = t.skipTypes({tyVar, tyLent, tyGenericInst, tyAlias, tySink, tyInferred}) + result = not containsGarbageCollectedRef(t) and not hasDestructor(t) diff --git a/lib/system/seqs_v2.nim b/lib/system/seqs_v2.nim index 154b443460..fefb6e914c 100644 --- a/lib/system/seqs_v2.nim +++ b/lib/system/seqs_v2.nim @@ -272,6 +272,16 @@ proc newSeq[T](s: var seq[T], len: Natural) = proc sameSeqPayload(x: pointer, y: pointer): bool {.compilerRtl, inl.} = result = cast[ptr NimRawSeq](x)[].p == cast[ptr NimRawSeq](y)[].p +proc nimCopySeqPayload(dest: pointer, src: pointer, elemSize: int, elemAlign: int) {.compilerRtl, inl.} = + ## Bulk-copies the payload data from src seq to dest seq using copyMem. + ## Only valid for trivially copyable element types (no GC refs, no destructors). + ## Caller must have already ensured dest has the correct length and capacity + ## (e.g. via setLen). + let d = cast[ptr NimRawSeq](dest) + let s = cast[ptr NimRawSeq](src) + if s.len > 0: + let headerSize = align(sizeof(NimSeqPayloadBase), elemAlign) + copyMem(d.p +! headerSize, s.p +! headerSize, s.len * elemSize) func capacity*[T](self: seq[T]): int {.inline.} = ## Returns the current capacity of the seq.