yrc progress (#25534)

This commit is contained in:
Andreas Rumpf
2026-02-23 13:39:55 +01:00
committed by GitHub
parent df42ebc5e6
commit 6badeb1b4d
11 changed files with 665 additions and 235 deletions

View File

@@ -1286,7 +1286,15 @@ proc produceSym(g: ModuleGraph; c: PContext; typ: PType; kind: TTypeAttachedOp;
tk = tyNone # no special casing for strings and seqs
case tk
of tySequence:
let needsYrcLock = g.config.selectedGC == gcYrc and
kind in {attachedDestructor, attachedSink, attachedAsgn, attachedDeepCopy, attachedDup} and
types.canFormAcycle(g, skipped.elementType)
# YRC: topology-changing seq ops must hold the mutator (read) lock
if needsYrcLock:
result.ast[bodyPos].add callCodegenProc(g, "acquireMutatorLock", info)
fillSeqOp(a, typ, result.ast[bodyPos], d, src)
if needsYrcLock:
result.ast[bodyPos].add callCodegenProc(g, "releaseMutatorLock", info)
of tyString:
fillStrOp(a, typ, result.ast[bodyPos], d, src)
else:

View File

@@ -236,6 +236,8 @@ proc evalTypeTrait(c: PContext; traitCall: PNode, operand: PType, context: PSym)
let complexObj = containsGarbageCollectedRef(t) or
hasDestructor(t)
result = newIntNodeT(toInt128(ord(not complexObj)), traitCall, c.idgen, c.graph)
of "canFormCycles":
result = newIntNodeT(toInt128(ord(types.canFormAcycle(c.graph, operand))), traitCall, c.idgen, c.graph)
of "hasDefaultValue":
result = newIntNodeT(toInt128(ord(not operand.requiresInit)), traitCall, c.idgen, c.graph)
of "isNamedTuple":

View File

@@ -96,6 +96,9 @@ proc supportsCopyMem*(t: typedesc): bool {.magic: "TypeTrait".}
##
## Other languages name a type like these `blob`:idx:.
proc canFormCycles*(t: typedesc): bool {.magic: "TypeTrait".}
## Returns true if `t` can form cycles.
proc hasDefaultValue*(t: typedesc): bool {.magic: "TypeTrait".} =
## Returns true if `t` has a valid default value.
runnableExamples:

View File

@@ -627,7 +627,7 @@ proc newSeq*[T](s: var seq[T], len: Natural) {.magic: "NewSeq", noSideEffect.}
## #inputStrings[3] = "out of bounds"
## ```
proc newSeq*[T](len = 0.Natural): seq[T] =
proc newSeq*[T](len = 0.Natural): seq[T] {.noSideEffect.} =
## Creates a new sequence of type `seq[T]` with length `len`.
##
## Note that the sequence will be filled with zeroed entries.
@@ -1459,6 +1459,7 @@ proc isNil*[T: proc | iterator {.closure.}](x: T): bool {.noSideEffect, magic: "
## `== nil`.
proc supportsCopyMem(t: typedesc): bool {.magic: "TypeTrait".}
proc canFormCycles(t: typedesc): bool {.magic: "TypeTrait".}
when defined(nimHasTopDownInference):
# magic used for seq type inference

143
lib/system/rwlocks.nim Normal file
View File

@@ -0,0 +1,143 @@
#
#
# Nim's Runtime Library
# (c) Copyright 2026 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# Read-write lock (RwLock) for lib/system.
# Used by YRC and by traceable containers that perform topology-changing ops.
# POSIX: pthread_rwlock_* ; Windows: SRWLOCK (slim reader/writer).
{.push stackTrace: off.}
when defined(windows):
# SRWLOCK is pointer-sized; use single pointer for ABI compatibility
type
RwLock* {.importc: "SRWLOCK", header: "<synchapi.h>", final, pure, byref.} = object
p: pointer
proc initializeSRWLock(L: var RwLock) {.importc: "InitializeSRWLock",
header: "<synchapi.h>".}
proc acquireSRWLockShared(L: var RwLock) {.importc: "AcquireSRWLockShared",
header: "<synchapi.h>".}
proc releaseSRWLockShared(L: var RwLock) {.importc: "ReleaseSRWLockShared",
header: "<synchapi.h>".}
proc acquireSRWLockExclusive(L: var RwLock) {.importc: "AcquireSRWLockExclusive",
header: "<synchapi.h>".}
proc releaseSRWLockExclusive(L: var RwLock) {.importc: "ReleaseSRWLockExclusive",
header: "<synchapi.h>".}
proc initRwLock*(L: var RwLock) {.inline.} =
initializeSRWLock(L)
proc deinitRwLock*(L: var RwLock) {.inline.} =
discard
proc acquireRead*(L: var RwLock) {.inline.} =
acquireSRWLockShared(L)
proc releaseRead*(L: var RwLock) {.inline.} =
releaseSRWLockShared(L)
proc acquireWrite*(L: var RwLock) {.inline.} =
acquireSRWLockExclusive(L)
proc releaseWrite*(L: var RwLock) {.inline.} =
releaseSRWLockExclusive(L)
elif defined(genode):
{.error: "RwLock is not implemented for Genode".}
else:
# POSIX: pthread_rwlock_*
type
SysRwLockObj {.importc: "pthread_rwlock_t", pure, final,
header: """#include <sys/types.h>
#include <pthread.h>""", byref.} = object
when defined(linux) and defined(amd64):
abi: array[56 div sizeof(clong), clong]
proc pthread_rwlock_init(rwlock: var SysRwLockObj, attr: pointer): cint {.
importc: "pthread_rwlock_init", header: "<pthread.h>", noSideEffect.}
proc pthread_rwlock_destroy(rwlock: var SysRwLockObj): cint {.
importc: "pthread_rwlock_destroy", header: "<pthread.h>", noSideEffect.}
proc pthread_rwlock_rdlock(rwlock: var SysRwLockObj): cint {.
importc: "pthread_rwlock_rdlock", header: "<pthread.h>", noSideEffect.}
proc pthread_rwlock_wrlock(rwlock: var SysRwLockObj): cint {.
importc: "pthread_rwlock_wrlock", header: "<pthread.h>", noSideEffect.}
proc pthread_rwlock_unlock(rwlock: var SysRwLockObj): cint {.
importc: "pthread_rwlock_unlock", header: "<pthread.h>", noSideEffect.}
when defined(linux):
# PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP: once a writer is waiting,
# new readers block. Prevents continuous mutator read-locks from starving
# the collector's write-lock acquisition (glibc default is PREFER_READER).
type
SysRwLockAttr {.importc: "pthread_rwlockattr_t", pure, final,
header: "<pthread.h>".} = object
const PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP = cint(3)
proc pthread_rwlockattr_init(attr: ptr SysRwLockAttr): cint {.
importc: "pthread_rwlockattr_init", header: "<pthread.h>".}
proc pthread_rwlockattr_destroy(attr: ptr SysRwLockAttr): cint {.
importc: "pthread_rwlockattr_destroy", header: "<pthread.h>".}
proc pthread_rwlockattr_setkind_np(attr: ptr SysRwLockAttr; pref: cint): cint {.
importc: "pthread_rwlockattr_setkind_np", header: "<pthread.h>".}
when defined(ios):
type RwLock* = ptr SysRwLockObj
proc initRwLock*(L: var RwLock) =
when not declared(c_malloc):
proc c_malloc(size: csize_t): pointer {.importc: "malloc", header: "<stdlib.h>".}
proc c_free(p: pointer) {.importc: "free", header: "<stdlib.h>".}
L = cast[RwLock](c_malloc(csize_t(sizeof(SysRwLockObj))))
discard pthread_rwlock_init(L[], nil)
proc deinitRwLock*(L: var RwLock) =
if L != nil:
discard pthread_rwlock_destroy(L[])
when not declared(c_free):
proc c_free(p: pointer) {.importc: "free", header: "<stdlib.h>".}
c_free(L)
L = nil
proc acquireRead*(L: var RwLock) =
discard pthread_rwlock_rdlock(L[])
proc releaseRead*(L: var RwLock) =
discard pthread_rwlock_unlock(L[])
proc acquireWrite*(L: var RwLock) =
discard pthread_rwlock_wrlock(L[])
proc releaseWrite*(L: var RwLock) =
discard pthread_rwlock_unlock(L[])
else:
type RwLock* = SysRwLockObj
proc initRwLock*(L: var RwLock) =
when defined(linux):
var attr: SysRwLockAttr
discard pthread_rwlockattr_init(addr attr)
discard pthread_rwlockattr_setkind_np(addr attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP)
discard pthread_rwlock_init(L, addr attr)
discard pthread_rwlockattr_destroy(addr attr)
else:
discard pthread_rwlock_init(L, nil)
proc deinitRwLock*(L: var RwLock) =
discard pthread_rwlock_destroy(L)
proc acquireRead*(L: var RwLock) =
discard pthread_rwlock_rdlock(L)
proc releaseRead*(L: var RwLock) =
discard pthread_rwlock_unlock(L)
proc acquireWrite*(L: var RwLock) =
discard pthread_rwlock_wrlock(L)
proc releaseWrite*(L: var RwLock) =
discard pthread_rwlock_unlock(L)
template withReadLock*(L: var RwLock, body: untyped) =
acquireRead(L)
try:
body
finally:
releaseRead(L)
template withWriteLock*(L: var RwLock, body: untyped) =
acquireWrite(L)
try:
body
finally:
releaseWrite(L)
{.pop.}

View File

@@ -11,6 +11,90 @@
# import std/typetraits
# strs already imported allocateds for us.
when defined(gcYrc):
include rwlocks
include threadids
const
NumLockStripes = 64
type
YrcLockState = enum
HasNoLock
HasMutatorLock
HasCollectorLock
Collecting
AlignedRwLock = object
## One RwLock per cache line. {.align: 64.} causes the compiler to round
## the struct size up to 64 bytes, so consecutive array elements never
## share a cache line (sizeof(RwLock) = 56 on Linux x86_64 → 8 byte pad).
lock {.align: 64.}: RwLock
var
gYrcLocks: array[NumLockStripes, AlignedRwLock]
var
lockState {.threadvar.}: YrcLockState
proc getYrcStripe(): int {.inline.} =
## Map this thread to one of the NumLockStripes RwLock stripes.
## getThreadId() is already cached thread-locally in threadids.nim.
getThreadId() and (NumLockStripes - 1)
proc acquireMutatorLock() {.compilerRtl, inl.} =
if lockState == HasNoLock:
acquireRead gYrcLocks[getYrcStripe()].lock
lockState = HasMutatorLock
proc releaseMutatorLock() {.compilerRtl, inl.} =
if lockState == HasMutatorLock:
lockState = HasNoLock
releaseRead gYrcLocks[getYrcStripe()].lock
template yrcMutatorLock*(t: typedesc; body: untyped) =
{.noSideEffect.}:
when canFormCycles(t):
acquireMutatorLock()
try:
body
finally:
{.noSideEffect.}:
when canFormCycles(t):
releaseMutatorLock()
template yrcMutatorLockUntyped(body: untyped) =
{.noSideEffect.}:
acquireMutatorLock()
try:
body
finally:
{.noSideEffect.}:
releaseMutatorLock()
template yrcCollectorLock(body: untyped) =
if lockState == HasMutatorLock: releaseMutatorLock()
let prevState = lockState
let hadToAcquire = prevState < HasCollectorLock
if hadToAcquire:
# Acquire all stripes in ascending order — the only thread ever holding
# multiple write locks is the collector, so there is no lock-order cycle.
for yrcI in 0..<NumLockStripes:
acquireWrite(gYrcLocks[yrcI].lock)
lockState = HasCollectorLock
try:
body
finally:
if hadToAcquire:
for yrcI in 0..<NumLockStripes:
releaseWrite(gYrcLocks[yrcI].lock)
lockState = prevState
else:
template yrcMutatorLock*(t: typedesc; body: untyped) =
body
template yrcMutatorLockUntyped(body: untyped) =
body
# Some optimizations here may be not to empty-seq-initialize some symbols, then StrictNotNil complains.
{.push warning[StrictNotNil]: off.} # See https://github.com/nim-lang/Nim/issues/21401
@@ -116,33 +200,35 @@ proc prepareSeqAddUninit(len: int; p: pointer; addlen, elemSize, elemAlign: int)
q.cap = newCap
result = q
proc shrink*[T](x: var seq[T]; newLen: Natural) {.tags: [], raises: [].} =
proc shrink*[T](x: var seq[T]; newLen: Natural) {.tags: [], raises: [], noSideEffect.} =
when nimvm:
{.cast(tags: []).}:
setLen(x, newLen)
else:
#sysAssert newLen <= x.len, "invalid newLen parameter for 'shrink'"
when not supportsCopyMem(T):
for i in countdown(x.len - 1, newLen):
reset x[i]
# XXX This is wrong for const seqs that were moved into 'x'!
{.noSideEffect.}:
cast[ptr NimSeqV2[T]](addr x).len = newLen
yrcMutatorLock(T):
when not supportsCopyMem(T):
for i in countdown(x.len - 1, newLen):
reset x[i]
# XXX This is wrong for const seqs that were moved into 'x'!
{.noSideEffect.}:
cast[ptr NimSeqV2[T]](addr x).len = newLen
proc grow*[T](x: var seq[T]; newLen: Natural; value: T) {.nodestroy.} =
let oldLen = x.len
#sysAssert newLen >= x.len, "invalid newLen parameter for 'grow'"
if newLen <= oldLen: return
var xu = cast[ptr NimSeqV2[T]](addr x)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newLen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
xu.len = newLen
for i in oldLen .. newLen-1:
when (NimMajor, NimMinor, NimPatch) >= (2, 3, 1):
xu.p.data[i] = `=dup`(value)
else:
wasMoved(xu.p.data[i])
`=copy`(xu.p.data[i], value)
yrcMutatorLock(T):
var xu = cast[ptr NimSeqV2[T]](addr x)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newLen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newLen - oldLen, sizeof(T), alignof(T)))
xu.len = newLen
for i in oldLen .. newLen-1:
when (NimMajor, NimMinor, NimPatch) >= (2, 3, 1):
xu.p.data[i] = `=dup`(value)
else:
wasMoved(xu.p.data[i])
`=copy`(xu.p.data[i], value)
proc add*[T](x: var seq[T]; y: sink T) {.magic: "AppendSeqElem", noSideEffect, nodestroy.} =
## Generic proc for adding a data item `y` to a container `x`.
@@ -152,30 +238,32 @@ proc add*[T](x: var seq[T]; y: sink T) {.magic: "AppendSeqElem", noSideEffect, n
## Generic code becomes much easier to write if the Nim naming scheme is
## respected.
{.cast(noSideEffect).}:
let oldLen = x.len
var xu = cast[ptr NimSeqV2[T]](addr x)
if xu.p == nil or (xu.p.cap and not strlitFlag) < oldLen+1:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, 1, sizeof(T), alignof(T)))
xu.len = oldLen+1
# .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
# copyMem(). This is fine as know by construction that
# in `xu.p.data[oldLen]` there is nothing to destroy.
# We also save the `wasMoved + destroy` pair for the sink parameter.
xu.p.data[oldLen] = y
yrcMutatorLock(T):
let oldLen = x.len
var xu = cast[ptr NimSeqV2[T]](addr x)
if xu.p == nil or (xu.p.cap and not strlitFlag) < oldLen+1:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, 1, sizeof(T), alignof(T)))
xu.len = oldLen+1
# .nodestroy means `xu.p.data[oldLen] = value` is compiled into a
# copyMem(). This is fine as know by construction that
# in `xu.p.data[oldLen]` there is nothing to destroy.
# We also save the `wasMoved + destroy` pair for the sink parameter.
xu.p.data[oldLen] = y
proc setLen[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
{.noSideEffect.}:
if newlen < s.len:
shrink(s, newlen)
else:
let oldLen = s.len
if newlen <= oldLen: return
var xu = cast[ptr NimSeqV2[T]](addr s)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
xu.len = newlen
for i in oldLen..<newlen:
xu.p.data[i] = default(T)
yrcMutatorLock(T):
let oldLen = s.len
if newlen <= oldLen: return
var xu = cast[ptr NimSeqV2[T]](addr s)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
xu.len = newlen
for i in oldLen..<newlen:
xu.p.data[i] = default(T)
proc newSeq[T](s: var seq[T], len: Natural) =
shrink(s, 0)
@@ -214,11 +302,12 @@ func setLenUninit[T](s: var seq[T], newlen: Natural) {.nodestroy.} =
if newlen < s.len:
shrink(s, newlen)
else:
let oldLen = s.len
if newlen <= oldLen: return
var xu = cast[ptr NimSeqV2[T]](addr s)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
xu.len = newlen
yrcMutatorLock(T):
let oldLen = s.len
if newlen <= oldLen: return
var xu = cast[ptr NimSeqV2[T]](addr s)
if xu.p == nil or (xu.p.cap and not strlitFlag) < newlen:
xu.p = cast[typeof(xu.p)](prepareSeqAddUninit(oldLen, xu.p, newlen - oldLen, sizeof(T), alignof(T)))
xu.len = newlen
{.pop.} # See https://github.com/nim-lang/Nim/issues/21401

View File

@@ -18,7 +18,8 @@ type
template frees(s: NimSeqV2Reimpl) =
if s.p != nil and (s.p.cap and strlitFlag) != strlitFlag:
when compileOption("threads"):
deallocShared(s.p)
else:
dealloc(s.p)
yrcMutatorLockUntyped:
when compileOption("threads"):
deallocShared(s.p)
else:
dealloc(s.p)

View File

@@ -1,30 +1,29 @@
#
# YRC: Thread-safe ORC (concurrent cycle collector).
# Same API as orc.nim but with striped queues and global lock for merge/collect.
# Same API as orc.nim but with the global mutator/collector RWLock for safety.
# Destructors for refs run at collection time, not immediately on last decRef.
# See yrc_proof.lean for a Lean 4 proof of safety and deadlock freedom.
#
# ## Key Invariant: Topology vs. Reference Counts
# ## Locking Protocol
#
# Only `obj.field = x` can change the topology of the heap graph (heap-to-heap
# edges). Local variable assignments (`var local = someRef`) affect reference
# counts but never create heap-to-heap edges and thus cannot create cycles.
# ALL topology-changing operations — heap-field writes (`nimAsgnYrc`,
# `nimSinkYrc`) and seq mutations that resize internal buffers — hold the
# global mutator read lock (`gYrcGlobalLock` via `acquireMutatorLock`).
# Multiple mutators may hold this read lock simultaneously.
#
# The actual pointer write in `obj.field = x` happens immediately and lock-free —
# the graph topology is always up-to-date in memory. Only the RC adjustments are
# deferred: increments and decrements are buffered into per-stripe queues
# (`toInc`, `toDec`) protected by fine-grained per-stripe locks.
# The cycle collector acquires the exclusive write lock for the entire
# mark/scan/collect phase. This means the heap topology is *completely
# frozen* during collection: no `nimAsgnYrc` or seq operation can mutate
# any pointer field while the three passes run. This gives the Bacon
# algorithm the stable subgraph it requires without full write barriers.
#
# When `collectCycles` runs it takes the global lock, drains all stripe buffers
# via `mergePendingRoots`, and then traces the physical pointer graph (via
# `traceImpl`) to detect cycles. This is sound because `trace` follows the actual
# pointer values in memory — which are always current — and uses the reconciled
# RCs only to identify candidate roots and confirm garbage.
#
# In summary: the physical pointer graph is always consistent (writes are
# immediate); only the reference counts are eventually consistent (writes are
# buffered). The per-stripe locks are cheap; the expensive global lock is only
# needed when interpreting the RCs during collection.
# Consequence for incRef in `nimAsgnYrc`:
# Because the collector is blocked, the incRef can be a direct atomic
# increment on the RefHeader (`increment head(src)`) rather than going
# through the `toInc` stripe queue. The collector will see the updated
# RC immediately when it next acquires the write lock. Only decrements
# (`yrcDec`) still use the `toDec` stripe queue so that objects whose RC
# might reach zero are handled by the collector's cycle-detection logic.
#
# ## Why No Write Barrier Is Needed
#
@@ -35,40 +34,19 @@
# while A still points to it. Traditional concurrent collectors need write
# barriers to prevent this.
#
# This problem structurally cannot arise in YRC because the cycle collector only
# frees *closed cycles* — subgraphs where every reference to every member comes
# from within the group, with zero external references. To execute `A.field = B`
# the mutator must hold a reference to A, which means A has an external reference
# (from the stack) that is not a heap-to-heap edge. During trial deletion
# (`markGray`) only internal edges are subtracted from RCs, so A's external
# reference survives, `scan` finds A's RC >= 0, calls `scanBlack`, and rescues A
# and everything reachable from it — including B. In short: the mutator can only
# modify objects it can reach, but the cycle collector only frees objects nothing
# external can reach. The two conditions are mutually exclusive.
# This problem structurally cannot arise in YRC for two reasons:
#
#[
The problem described in Bacon01 is: during markGray/scan, a mutator concurrently
does X.field = Z (was X→Y), changing the physical graph while the collector is tracing
it. The collector might see stale or new edges. The reasons this is still safe:
Stale edges cancel with unbuffered decrements: If the collector sees old edge X→Y
(mutator already wrote X→Z and buffered dec(Y)), the phantom trial deletion and the
unbuffered dec cancel — Y's effective RC is correct.
scanBlack rescues via current physical edges: If X has external refs (merged RC reflects
the mutator's access), scanBlack(X) re-traces X and follows the current physical edge X→Z,
incrementing Z's RC and marking it black. Z survives.
rcSum==edges fast path is conservative: Any discrepancy between physical graph and merged
state (stale or new edges) causes rcSum != edges, falling back to the slow path which
rescues anything with RC >= 0.
Unreachable cycles are truly unreachable: The mutator can only reach objects through chains
rooted in merged references. If a cycle has zero external refs at merge time, no mutator
can reach it.
]#
# 1. The mutator lock freezes the topology during all three passes, so no
# concurrent field write can race with markGray/scan/collectWhite.
#
# 2. Even without the lock, the cycle collector only frees *closed cycles* —
# subgraphs where every reference to every member comes from within the
# group, with zero external references. To execute `A.field = B` the
# mutator must hold a reference to A (external ref), which `scan` would
# rescue. The two conditions are mutually exclusive.
#
# In practice reason (1) makes reason (2) a belt-and-suspenders safety
# argument rather than the primary mechanism.
{.push raises: [].}
@@ -93,12 +71,49 @@ type
TraceProc = proc (p, env: pointer) {.nimcall, gcsafe, raises: [].}
DisposeProc = proc (p: pointer) {.nimcall, gcsafe, raises: [].}
template color(c): untyped = c.rc and colorMask
template setColor(c, col) =
when col == colBlack:
c.rc = c.rc and not colorMask
else:
c.rc = c.rc and not colorMask or col
when defined(nimYrcAtomicIncs):
template color(c): untyped = atomicLoadN(addr c.rc, ATOMIC_ACQUIRE) and colorMask
template setColor(c, col) =
block:
var expected = atomicLoadN(addr c.rc, ATOMIC_RELAXED)
while true:
let desired = (expected and not colorMask) or col
if atomicCompareExchangeN(addr c.rc, addr expected, desired, true,
ATOMIC_ACQ_REL, ATOMIC_RELAXED):
break
template loadRc(c): int = atomicLoadN(addr c.rc, ATOMIC_ACQUIRE)
template trialDec(c) =
discard atomicFetchAdd(addr c.rc, -rcIncrement, ATOMIC_ACQ_REL)
template trialInc(c) =
discard atomicFetchAdd(addr c.rc, rcIncrement, ATOMIC_ACQ_REL)
template rcClearFlag(c, flag) =
block:
var expected = atomicLoadN(addr c.rc, ATOMIC_RELAXED)
while true:
let desired = expected and not flag
if atomicCompareExchangeN(addr c.rc, addr expected, desired, true,
ATOMIC_ACQ_REL, ATOMIC_RELAXED):
break
template rcSetFlag(c, flag) =
block:
var expected = atomicLoadN(addr c.rc, ATOMIC_RELAXED)
while true:
let desired = expected or flag
if atomicCompareExchangeN(addr c.rc, addr expected, desired, true,
ATOMIC_ACQ_REL, ATOMIC_RELAXED):
break
else:
template color(c): untyped = c.rc and colorMask
template setColor(c, col) =
when col == colBlack:
c.rc = c.rc and not colorMask
else:
c.rc = c.rc and not colorMask or col
template loadRc(c): int = c.rc
template trialDec(c) = c.rc = c.rc -% rcIncrement
template trialInc(c) = c.rc = c.rc +% rcIncrement
template rcClearFlag(c, flag) = c.rc = c.rc and not flag
template rcSetFlag(c, flag) = c.rc = c.rc or flag
const
optimizedOrc = false
@@ -118,8 +133,6 @@ proc trace(s: Cell; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
var p = s +! sizeof(RefHeader)
cast[TraceProc](desc.traceImpl)(p, addr(j))
include threadids
type
Stripe = object
when not defined(yrcAtomics):
@@ -137,7 +150,6 @@ type
## Invoked while holding the global lock; must not call back into YRC.
var
gYrcGlobalLock: Lock
roots: CellSeq[Cell] # merged roots, used under global lock
stripes: array[NumStripes, Stripe]
rootsThreshold: int = 128
@@ -182,13 +194,15 @@ proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
let h = head(p)
when optimizedOrc:
if cyclic: h.rc = h.rc or maybeCycle
when defined(yrcAtomics):
when defined(nimYrcAtomicIncs):
discard atomicFetchAdd(addr h.rc, rcIncrement, ATOMIC_ACQ_REL)
elif defined(yrcAtomics):
let s = getStripeIdx()
let slot = atomicFetchAdd(addr stripes[s].toIncLen, 1, ATOMIC_ACQ_REL)
if slot < QueueSize:
atomicStoreN(addr stripes[s].toInc[slot], h, ATOMIC_RELEASE)
else:
withLock gYrcGlobalLock:
yrcCollectorLock:
h.rc = h.rc +% rcIncrement
for i in 0..<NumStripes:
let len = atomicExchangeN(addr stripes[i].toIncLen, 0, ATOMIC_ACQUIRE)
@@ -206,7 +220,7 @@ proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
else:
overflow = true
if overflow:
withLock gYrcGlobalLock:
yrcCollectorLock:
for i in 0..<NumStripes:
withLock stripes[i].lockInc:
for j in 0..<stripes[i].toIncLen:
@@ -221,23 +235,25 @@ proc mergePendingRoots() =
# we don't need to set color to black on incRef because collection runs
# under the global lock, so no concurrent mutations happen during collection.
for i in 0..<NumStripes:
when defined(yrcAtomics):
let incLen = atomicExchangeN(addr stripes[i].toIncLen, 0, ATOMIC_ACQUIRE)
for j in 0..<min(incLen, QueueSize):
let x = atomicLoadN(addr stripes[i].toInc[j], ATOMIC_ACQUIRE)
x.rc = x.rc +% rcIncrement
else:
withLock stripes[i].lockInc:
for j in 0..<stripes[i].toIncLen:
let x = stripes[i].toInc[j]
when not defined(nimYrcAtomicIncs):
# Inc buffers only exist when increfs are buffered (not atomic)
when defined(yrcAtomics):
let incLen = atomicExchangeN(addr stripes[i].toIncLen, 0, ATOMIC_ACQUIRE)
for j in 0..<min(incLen, QueueSize):
let x = atomicLoadN(addr stripes[i].toInc[j], ATOMIC_ACQUIRE)
x.rc = x.rc +% rcIncrement
stripes[i].toIncLen = 0
else:
withLock stripes[i].lockInc:
for j in 0..<stripes[i].toIncLen:
let x = stripes[i].toInc[j]
x.rc = x.rc +% rcIncrement
stripes[i].toIncLen = 0
withLock stripes[i].lockDec:
for j in 0..<stripes[i].toDecLen:
let (c, desc) = stripes[i].toDec[j]
c.rc = c.rc -% rcIncrement
if (c.rc and inRootsFlag) == 0:
c.rc = c.rc or inRootsFlag
trialDec(c)
if (loadRc(c) and inRootsFlag) == 0:
rcSetFlag(c, inRootsFlag)
if roots.d == nil: init(roots)
add(roots, c, desc)
stripes[i].toDecLen = 0
@@ -257,8 +273,8 @@ when logOrc or orcLeakDetector:
proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
when traceCollector:
cprintf("[From ] %p rc %ld color %ld\n", s, s.rc shr rcShift, s.color)
if (s.rc and inRootsFlag) == 0:
cprintf("[From ] %p rc %ld color %ld\n", s, loadRc(s) shr rcShift, s.color)
if (loadRc(s) and inRootsFlag) == 0:
let p = s +! sizeof(RefHeader)
when logOrc: writeCell("free", s, desc)
if desc.destructor != nil:
@@ -291,7 +307,7 @@ proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
while j.traceStack.len > until:
let (entry, desc) = j.traceStack.pop()
let t = head entry[]
t.rc = t.rc +% rcIncrement
trialInc(t)
if t.color != colBlack:
t.setColor colBlack
trace(t, desc, j)
@@ -301,23 +317,23 @@ proc markGray(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
if s.color != colGray:
s.setColor colGray
j.touched = j.touched +% 1
j.rcSum = j.rcSum +% (s.rc shr rcShift) +% 1
j.rcSum = j.rcSum +% (loadRc(s) shr rcShift) +% 1
orcAssert(j.traceStack.len == 0, "markGray: trace stack not empty")
trace(s, desc, j)
while j.traceStack.len > 0:
let (entry, desc) = j.traceStack.pop()
let t = head entry[]
t.rc = t.rc -% rcIncrement
trialDec(t)
j.edges = j.edges +% 1
if t.color != colGray:
t.setColor colGray
j.touched = j.touched +% 1
j.rcSum = j.rcSum +% (t.rc shr rcShift) +% 2
j.rcSum = j.rcSum +% (loadRc(t) shr rcShift) +% 2
trace(t, desc, j)
proc scan(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
if s.color == colGray:
if (s.rc shr rcShift) >= 0:
if (loadRc(s) shr rcShift) >= 0:
scanBlack(s, desc, j)
else:
orcAssert(j.traceStack.len == 0, "scan: trace stack not empty")
@@ -327,14 +343,14 @@ proc scan(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
let (entry, desc) = j.traceStack.pop()
let t = head entry[]
if t.color == colGray:
if (t.rc shr rcShift) >= 0:
if (loadRc(t) shr rcShift) >= 0:
scanBlack(t, desc, j)
else:
t.setColor(colWhite)
trace(t, desc, j)
proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
if s.color == col and (s.rc and inRootsFlag) == 0:
if s.color == col and (loadRc(s) and inRootsFlag) == 0:
orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
s.setColor(colBlack)
j.toFree.add(s, desc)
@@ -343,7 +359,7 @@ proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
let (entry, desc) = j.traceStack.pop()
let t = head entry[]
entry[] = nil
if t.color == col and (t.rc and inRootsFlag) == 0:
if t.color == col and (loadRc(t) and inRootsFlag) == 0:
j.toFree.add(t, desc)
t.setColor(colBlack)
trace(t, desc, j)
@@ -351,6 +367,9 @@ proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
# YRC defers all destruction to collection time - process ALL roots through Bacon's algorithm
# This is different from ORC which handles immediate garbage (rc == 0) directly
if lockState == Collecting:
return
lockState = Collecting
let last = roots.len -% 1
when logOrc:
for i in countdown(last, lowMark):
@@ -374,13 +393,10 @@ proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
init j.toFree
for i in 0 ..< roots.len:
let s = roots.d[i][0]
s.rc = s.rc and not inRootsFlag
rcClearFlag(s, inRootsFlag)
collectColor(s, roots.d[i][1], colToCollect, j)
# Clear roots before freeing to prevent nested collectCycles() from accessing freed cells
when not defined(nimStressOrc):
let oldThreshold = rootsThreshold
rootsThreshold = high(int)
roots.len = 0
# Free all collected objects
@@ -390,8 +406,6 @@ proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
when orcLeakDetector:
writeCell("CYCLIC OBJECT FREED", s, j.toFree.d[i][1])
free(s, j.toFree.d[i][1])
when not defined(nimStressOrc):
rootsThreshold = oldThreshold
j.freed = j.freed +% j.toFree.len
deinit j.toFree
@@ -401,7 +415,7 @@ when defined(nimOrcStats):
proc collectCycles() =
when logOrc:
cfprintf(cstderr, "[collectCycles] begin\n")
withLock gYrcGlobalLock:
yrcCollectorLock:
mergePendingRoots()
if roots.len >= RootsThreshold and mayRunCycleCollect():
var j: GcEnv
@@ -436,9 +450,9 @@ when defined(nimOrcStats):
result = OrcStats(freedCyclicObjects: freedCyclicObjects)
proc GC_runOrc* =
withLock gYrcGlobalLock:
yrcCollectorLock:
mergePendingRoots()
if mayRunCycleCollect():
if roots.len > 0 and mayRunCycleCollect():
var j: GcEnv
init j.traceStack
collectCyclesBacon(j, 0)
@@ -455,12 +469,12 @@ proc GC_disableOrc*() =
rootsThreshold = high(int)
proc GC_prepareOrc*(): int {.inline.} =
withLock gYrcGlobalLock:
yrcCollectorLock:
mergePendingRoots()
result = roots.len
proc GC_partialCollect*(limit: int) =
withLock gYrcGlobalLock:
yrcCollectorLock:
mergePendingRoots()
if roots.len > limit and mayRunCycleCollect():
var j: GcEnv
@@ -536,22 +550,24 @@ proc yrcDec(tmp: pointer; desc: PNimTypeV2) {.inline.} =
proc nimAsgnYrc(dest: ptr pointer; src: pointer; desc: PNimTypeV2) {.compilerRtl.} =
## YRC write barrier for ref copy assignment.
## Atomically stores src into dest, then buffers RC adjustments.
## Freeing is always done by the cycle collector, never inline.
## Holds the mutator read lock for the entire operation so the collector
## cannot run between the incRef and decRef, closing the stale-decRef
## bug. Direct atomic incRef replaces the toInc stripe queue: the
## collector is blocked, so the RC update is immediately visible and correct.
acquireMutatorLock()
if src != nil: increment head(src) # direct atomic: no toInc queue needed
let tmp = dest[]
atomicStoreN(dest, src, ATOMIC_RELEASE)
if src != nil:
nimIncRefCyclic(src, true)
if tmp != nil:
yrcDec(tmp, desc)
dest[] = src
if tmp != nil: yrcDec(tmp, desc) # still deferred via toDec for cycle detection
releaseMutatorLock()
proc nimSinkYrc(dest: ptr pointer; src: pointer; desc: PNimTypeV2) {.compilerRtl.} =
## YRC write barrier for ref sink (move). No incRef on source.
## Freeing is always done by the cycle collector, never inline.
acquireMutatorLock()
let tmp = dest[]
atomicStoreN(dest, src, ATOMIC_RELEASE)
if tmp != nil:
yrcDec(tmp, desc)
dest[] = src
if tmp != nil: yrcDec(tmp, desc)
releaseMutatorLock()
proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
when optimizedOrc:
@@ -559,10 +575,12 @@ proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
let h = head(p)
h.rc = h.rc or maybeCycle
# Initialize locks at module load
initLock(gYrcGlobalLock)
# Initialize locks at module load.
# RwLock stripes live in seqs_v2 (gYrcLocks); NumLockStripes is exported from there.
for i in 0..<NumLockStripes:
initRwLock(gYrcLocks[i].lock)
for i in 0..<NumStripes:
when not defined(yrcAtomics):
when not defined(yrcAtomics) and not defined(nimYrcAtomicIncs):
initLock(stripes[i].lockInc)
initLock(stripes[i].lockDec)

View File

@@ -42,6 +42,32 @@
\* - Mutator must hold stack ref to modify object (external ref)
\* - scanBlack follows current physical edges (rescues newly written objects)
\* - Only objects unreachable from any stack root are freed
\*
\* ## Seq Payload Race and RWLock Fix
\*
\* Value types like seq[T] (where T can form cycles) have internal heap
\* allocations (data arrays / "payloads") that are freed by value-type
\* hooks (=sink, =destroy), NOT by the cycle collector. This creates a race:
\*
\* 1. Object O has a seq field with payload P containing refs
\* 2. Collector starts tracing O -- reads payload pointer P
\* 3. Mutator does O.seq = newSeq -- frees P (value-type destructor)
\* 4. Collector dereferences P -- use-after-free!
\*
\* Fix: Change the global YRC lock to a read-write lock (RWLock).
\* - Collector acquires the WRITE lock (exclusive access during tracing)
\* - Seq mutations (assign, setLen, add, etc.) acquire the READ lock
\* - Multiple seq mutations can proceed concurrently (read lock is shared)
\* - But seq mutations block while the collector traces (write lock is exclusive)
\*
\* This prevents the race: the mutator cannot free a payload while the
\* collector is tracing it, because acquiring the read lock requires
\* the write lock to be unheld.
\*
\* Deadlock avoidance: If a seq operation triggers collectCycles() via stripe
\* overflow while already holding the read lock, it must NOT attempt to
\* acquire the write lock. Instead, it should drain the overflow buffers
\* without running the full collection cycle.
EXTENDS Naturals, Integers, Sequences, FiniteSets, TLC
@@ -53,10 +79,14 @@ ASSUME IsFiniteSet(Objects)
ASSUME IsFiniteSet(Threads)
ASSUME IsFiniteSet(ObjTypes)
\* Seq payload identifiers (models heap-allocated data arrays of seq[T])
CONSTANTS SeqPayloads
ASSUME IsFiniteSet(SeqPayloads)
\* NULL constant (represents "no thread" for locks)
\* We use a sentinel value that's guaranteed not to be in Threads or Objects
NULL == "NULL" \* String literal that won't conflict with Threads/Objects
ASSUME NULL \notin Threads /\ NULL \notin Objects
ASSUME NULL \notin Threads /\ NULL \notin Objects /\ NULL \notin SeqPayloads
\* Helper functions
\* Note: GetStripeIdx is not used, GetStripe is used instead
@@ -90,15 +120,29 @@ VARIABLES
\* Per-stripe locks
lockInc, \* lockInc[stripe] = thread holding increment lock (or NULL)
lockDec, \* lockDec[stripe] = thread holding decrement lock (or NULL)
\* Global lock
globalLock, \* thread holding global lock (or NULL)
\* Global lock (now the WRITE side of the RWLock)
globalLock, \* thread holding write lock (or NULL)
\* Merged roots array (used during collection)
mergedRoots, \* sequence of (object, type) pairs
\* Collection state
collecting, \* TRUE if collection is in progress
gcEnv, \* GC environment: {touched, edges, rcSum, toFree, ...}
\* Pending operations (for modeling atomicity)
pendingWrites \* set of pending write barrier operations
pendingWrites, \* set of pending write barrier operations
\* --- Seq payload race modeling ---
\* Seq payloads: models the heap-allocated data arrays of seq[T] fields
seqData, \* [Objects -> SeqPayloads \cup {NULL}] -- current payload for obj's seq
payloadAlive, \* [SeqPayloads -> BOOLEAN] -- is this payload's memory valid?
\* RWLock read side: set of threads holding the read lock.
\* Seq mutations (assign, add, setLen, etc.) acquire the read lock.
\* The collector (write lock holder) gets exclusive access.
rwLockReaders, \* SUBSET Threads -- threads currently holding the read lock
\* Collector's in-progress seq trace: the payload pointer read during tracing.
\* Between reading the pointer and accessing the data, the payload could be freed.
collectorPayload \* SeqPayloads \cup {NULL} -- payload being traced by collector
\* Convenience tuple for seq-related variables (used in UNCHANGED clauses)
seqVars == <<seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* Type invariants
TypeOK ==
@@ -117,6 +161,11 @@ TypeOK ==
/\ mergedRoots \in Seq([obj: Objects, desc: ObjTypes])
/\ collecting \in BOOLEAN
/\ pendingWrites \in SUBSET ([thread: Threads, dest: Objects, old: Objects \cup {NULL}, src: Objects \cup {NULL}, phase: {"store", "inc", "dec"}])
\* Seq payload types
/\ seqData \in [Objects -> SeqPayloads \cup {NULL}]
/\ payloadAlive \in [SeqPayloads -> BOOLEAN]
/\ rwLockReaders \in SUBSET Threads
/\ collectorPayload \in SeqPayloads \cup {NULL}
\* Helper: internal reference count (heap-to-heap edges)
InternalRC(obj) ==
@@ -163,7 +212,7 @@ MutatorWriteAtomicStore(thread, destObj, destField, oldVal, newVal, desc) ==
IF x = newVal /\ newVal # NULL
THEN TRUE
ELSE FALSE]]
/\ UNCHANGED <<roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Phase 2: RC Buffering (if space available)
@@ -193,7 +242,7 @@ WriteBarrier(thread, destObj, destField, oldVal, newVal, desc) ==
/\ toDec' = IF oldVal # NULL /\ toDecLen[stripe] < QueueSize
THEN [toDec EXCEPT ![stripe] = Append(toDec[stripe], [obj |-> oldVal, desc |-> desc])]
ELSE toDec
/\ UNCHANGED <<edges, roots, rc, color, inRoots, mergedRoots, lockInc, lockDec, globalLock, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, rc, color, inRoots, mergedRoots, lockInc, lockDec, globalLock, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Phase 3: Overflow Handling (separate actions that can block)
@@ -215,7 +264,7 @@ MutatorWriteMergeInc(thread) ==
externalRC == Cardinality({t \in Threads : roots[t][x]})
IN internalRC + externalRC]
/\ globalLock' = NULL \* Release lock after merge
/\ UNCHANGED <<edges, roots, color, inRoots, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, color, inRoots, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* Handle decrement overflow: merge ALL buffers when lock is available
\* This calls collectCycles() which merges both increment and decrement buffers
@@ -257,7 +306,7 @@ MutatorWriteMergeDec(thread) ==
/\ toDecLen' = [s \in 0..(NumStripes-1) |-> 0]
/\ toDec' = [s \in 0..(NumStripes-1) |-> <<>>]
/\ globalLock' = NULL \* Lock acquired, merge done, lock released (entire withLock block is atomic)
/\ UNCHANGED <<edges, roots, color, lockInc, lockDec, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, color, lockInc, lockDec, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Merge Operation: mergePendingRoots
@@ -311,7 +360,7 @@ MergePendingRoots ==
/\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>]
/\ toDecLen' = [s \in 0..(NumStripes-1) |-> 0]
/\ toDec' = [s \in 0..(NumStripes-1) |-> <<>>]
/\ UNCHANGED <<edges, roots, color, lockInc, lockDec, globalLock, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, color, lockInc, lockDec, globalLock, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Trial Deletion: markGray
@@ -365,7 +414,7 @@ MarkGray(obj, desc) ==
\* For roots, the RC includes external refs which survive trial deletion.
rc' = [x \in Objects |->
IF x \in allReachable THEN rc[x] - internalEdgeCount[x] ELSE rc[x]]
/\ UNCHANGED <<edges, roots, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Scan Phase
@@ -422,7 +471,7 @@ Scan(obj, desc) ==
ELSE \* Mark white (part of closed cycle)
/\ color' = [color EXCEPT ![obj] = colWhite]
/\ UNCHANGED <<rc>>
/\ UNCHANGED <<edges, roots, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Collection Phase: collectColor
@@ -442,7 +491,7 @@ CollectColor(obj, desc, targetColor) ==
edges' = [edges EXCEPT ![obj] = [x \in Objects |->
IF x = obj THEN FALSE ELSE edges[obj][x]]]
/\ color' = [color EXCEPT ![obj] = colBlack] \* Mark as freed
/\ UNCHANGED <<roots, rc, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<roots, rc, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Collection Cycle: collectCyclesBacon
@@ -454,7 +503,7 @@ StartCollection ==
/\ Len(mergedRoots) >= RootsThreshold
/\ collecting' = TRUE
/\ gcEnv' = [touched |-> 0, edges |-> 0, rcSum |-> 0, toFree |-> {}]
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, pendingWrites>>
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
EndCollection ==
/\ globalLock # NULL
@@ -464,7 +513,7 @@ EndCollection ==
IF x \in {r.obj : r \in mergedRoots} THEN FALSE ELSE inRoots[x]]
/\ mergedRoots' = <<>>
/\ collecting' = FALSE
/\ UNCHANGED <<edges, roots, rc, color, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, rc, color, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Mutator Actions
@@ -491,7 +540,7 @@ MutatorWrite(thread, destObj, destField, oldVal, newVal, desc) ==
IF incOverflow \/ decOverflow
THEN \* Overflow: atomic store happened, but buffering is deferred
\* Buffers stay full, merge will happen when lock is available (via MutatorWriteMergeInc/Dec)
/\ UNCHANGED <<roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
ELSE \* No overflow: buffer normally
/\ WriteBarrier(thread, destObj, destField, oldVal, newVal, desc)
/\ UNCHANGED <<roots, collecting, pendingWrites>>
@@ -521,16 +570,19 @@ MutatorRootAssign(thread, obj, val) ==
/\ collecting' = collecting
/\ gcEnv' = gcEnv
/\ pendingWrites' = pendingWrites
/\ UNCHANGED seqVars
\* ============================================================================
\* Collector Actions
\* ============================================================================
\* Collector acquires global lock for entire collection cycle
\* Collector acquires write lock (global lock) for entire collection cycle.
\* RWLock semantics: writer can only acquire when no readers hold the read lock.
CollectorAcquireLock(thread) ==
/\ globalLock = NULL
/\ rwLockReaders = {} \* RWLock: no readers allowed when acquiring write lock
/\ globalLock' = thread
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
CollectorMerge ==
/\ globalLock # NULL
@@ -571,7 +623,93 @@ CollectorEnd ==
CollectorReleaseLock(thread) ==
/\ globalLock = thread
/\ globalLock' = NULL
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
\* ============================================================================
\* Seq Payload Actions (RWLock-protected)
\* ============================================================================
\* These actions model the race between the collector tracing seq payloads
\* and mutators replacing/freeing seq payloads.
\*
\* The collector traces seq payloads in two steps:
\* 1. CollectorStartTraceSeq: reads seqData[obj] (gets payload pointer)
\* 2. CollectorFinishTraceSeq: accesses the payload data
\* Between these steps, a mutator could free the payload (the race).
\*
\* The RWLock prevents this:
\* - Collector holds write lock (globalLock) during tracing
\* - MutatorSeqAssign requires read lock (rwLockReaders)
\* - Read lock requires globalLock = NULL
\* - Therefore MutatorSeqAssign is blocked during collection
\*
\* Note: This models the memory safety aspect of seq tracing.
\* The cycle collection algorithm (MarkGray, Scan, etc.) operates on the
\* logical edge graph. Seq payloads are a physical representation detail
\* that affects memory safety but not GC correctness (which is already
\* covered by the existing Safety property).
\* Mutator acquires read lock for seq mutation.
\* RWLock semantics: read lock can be acquired when no writer holds the write lock.
\* Multiple readers can hold the read lock simultaneously.
MutatorAcquireSeqLock(thread) ==
/\ globalLock = NULL \* RWLock: no writer allowed when acquiring read lock
/\ thread \notin rwLockReaders
/\ rwLockReaders' = rwLockReaders \cup {thread}
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, collectorPayload>>
\* Mutator releases read lock after seq mutation completes.
MutatorReleaseSeqLock(thread) ==
/\ thread \in rwLockReaders
/\ rwLockReaders' = rwLockReaders \ {thread}
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, collectorPayload>>
\* Mutator replaces a seq field's payload (e.g., r.list = newSeq).
\* This frees the old payload and installs a new one.
\* Requires the read lock (RWLock protection against concurrent collection).
\*
\* In the real implementation, this is a value-type assignment (=sink/=copy)
\* that frees the old data array and installs a new one. The old array is freed
\* immediately, NOT deferred to the cycle collector.
MutatorSeqAssign(thread, obj, newPayload) ==
/\ thread \in rwLockReaders \* Must hold read lock
/\ seqData[obj] # NULL \* Object has an existing seq payload
/\ newPayload \in SeqPayloads
/\ ~payloadAlive[newPayload] \* New payload is freshly allocated (not yet alive)
/\ LET oldPayload == seqData[obj]
IN
/\ seqData' = [seqData EXCEPT ![obj] = newPayload]
/\ payloadAlive' = [payloadAlive EXCEPT ![oldPayload] = FALSE,
![newPayload] = TRUE]
\* Note: In a complete model, this would also update edges[obj] to reflect
\* the new seq elements and buffer RC changes (inc new elements, dec old elements).
\* We omit this here to focus on the memory safety property (payload lifetime).
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, rwLockReaders, collectorPayload>>
\* Collector begins tracing an object's seq field.
\* Reads the seqData pointer and stores it in collectorPayload.
\* This is the first step of a two-step trace operation.
\* The collector must hold the write lock (globalLock).
CollectorStartTraceSeq(obj) ==
/\ globalLock # NULL \* Collector holds write lock
/\ collecting = TRUE \* In collection phase
/\ seqData[obj] # NULL \* Object has a seq field
/\ collectorPayload = NULL \* Not already mid-trace
/\ collectorPayload' = seqData[obj]
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders>>
\* Collector finishes tracing an object's seq field.
\* Accesses the payload data via collectorPayload.
\* The payload MUST still be alive (this is checked by SeqPayloadSafety).
\* After accessing the payload, clears collectorPayload.
CollectorFinishTraceSeq ==
/\ globalLock # NULL \* Collector holds write lock
/\ collecting = TRUE \* In collection phase
/\ collectorPayload # NULL \* Mid-trace on a payload
\* The actual work: read payloadEdges[collectorPayload] to discover children.
\* We don't model the trace results here; the safety property ensures
\* the read is valid (payload is alive).
/\ collectorPayload' = NULL
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders>>
\* ============================================================================
\* Next State Relation
@@ -607,6 +745,16 @@ Next ==
\/ CollectorEnd
\/ \E thread \in Threads:
CollectorReleaseLock(thread)
\* --- Seq payload actions ---
\/ \E thread \in Threads:
MutatorAcquireSeqLock(thread)
\/ \E thread \in Threads:
MutatorReleaseSeqLock(thread)
\/ \E thread \in Threads, obj \in Objects, p \in SeqPayloads:
MutatorSeqAssign(thread, obj, p)
\/ \E obj \in Objects:
CollectorStartTraceSeq(obj)
\/ CollectorFinishTraceSeq
\* ============================================================================
\* Initial State
@@ -642,6 +790,11 @@ Init ==
/\ collecting = FALSE
/\ gcEnv = [touched |-> 0, edges |-> 0, rcSum |-> 0, toFree |-> {}]
/\ pendingWrites = {}
\* Seq payload initial state
/\ seqData = [x \in Objects |-> NULL] \* No seq fields initially
/\ payloadAlive = [p \in SeqPayloads |-> FALSE] \* No payloads alive initially
/\ rwLockReaders = {} \* No threads hold read lock
/\ collectorPayload = NULL \* Collector not mid-trace
/\ TypeOK
\* ============================================================================
@@ -748,14 +901,53 @@ CycleInvariant ==
THEN ExternalRC(obj) = 0
ELSE TRUE
\* ============================================================================
\* Seq Payload Safety
\* ============================================================================
\* Memory safety: The collector never accesses a freed seq payload.
\*
\* collectorPayload holds the payload pointer the collector read during
\* CollectorStartTraceSeq. Between that action and CollectorFinishTraceSeq,
\* the collector will dereference this pointer to read the seq's elements.
\* If the payload has been freed in between, this is a use-after-free.
\*
\* The RWLock prevents this:
\* - collectorPayload is only set when globalLock # NULL (write lock held)
\* - MutatorSeqAssign (which frees payloads) requires rwLockReaders membership
\* - MutatorAcquireSeqLock requires globalLock = NULL (no writer)
\* - Therefore: while collectorPayload # NULL, no MutatorSeqAssign can execute
\* - Therefore: payloadAlive[collectorPayload] remains TRUE
\*
\* Without the RWLock (if MutatorSeqAssign didn't require the read lock),
\* the following interleaving would violate this property:
\* 1. Collector acquires write lock
\* 2. CollectorStartTraceSeq(obj) -- collectorPayload = P
\* 3. MutatorSeqAssign(thread, obj, Q) -- frees P, payloadAlive[P] = FALSE
\* 4. SeqPayloadSafety VIOLATED: collectorPayload = P but payloadAlive[P] = FALSE
SeqPayloadSafety ==
collectorPayload # NULL => payloadAlive[collectorPayload]
\* ============================================================================
\* RWLock Invariant
\* ============================================================================
\* The read-write lock ensures mutual exclusion between the collector (writer)
\* and seq mutations (readers). The writer and readers are never active at
\* the same time.
RWLockInvariant ==
globalLock # NULL => rwLockReaders = {}
\* ============================================================================
\* Specification
\* ============================================================================
Spec == Init /\ [][Next]_<<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
Spec == Init /\ [][Next]_<<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites, seqData, payloadAlive, rwLockReaders, collectorPayload>>
THEOREM Spec => []Safety
THEOREM Spec => []RCInvariant
THEOREM Spec => []CycleInvariant
THEOREM Spec => []SeqPayloadSafety
THEOREM Spec => []RWLockInvariant
====

View File

@@ -35,4 +35,5 @@ proc main() =
main()
GC_fullCollect()
echo getOccupiedMem() < 10 * 1024 * 1024, " peak memory: ", getMaxMem() < 10 * 1024 * 1024
when not defined(useMalloc):
echo getOccupiedMem() < 10 * 1024 * 1024, " peak memory: ", getMaxMem() < 10 * 1024 * 1024

View File

@@ -1,35 +1,7 @@
discard """
targets: "c"
matrix: "--debugger:native --mangle:nim"
ccodecheck: "'testFunc__titaniummangle95nim_u1316'"
ccodecheck: "'testFunc__titaniummangle95nim_u156'"
ccodecheck: "'testFunc__titaniummangle95nim_u1305'"
ccodecheck: "'testFunc__titaniummangle95nim_u241'"
ccodecheck: "'testFunc__titaniummangle95nim_u1357'"
ccodecheck: "'testFunc__titaniummangle95nim_u292'"
ccodecheck: "'testFunc__titaniummangle95nim_u38'"
ccodecheck: "'testFunc__titaniummangle95nim_u175'"
ccodecheck: "'testFunc__titaniummangle95nim_u1302'"
ccodecheck: "'testFunc__titaniummangle95nim_u1305'"
ccodecheck: "'testFunc__titaniummangle95nim_u535'"
ccodecheck: "'testFunc__titaniummangle95nim_u1294'"
ccodecheck: "'testFunc__titaniummangle95nim_u336'"
ccodecheck: "'testFunc__titaniummangle95nim_u425'"
ccodecheck: "'testFunc__titaniummangle95nim_u308'"
ccodecheck: "'testFunc__titaniummangle95nim_u129'"
ccodecheck: "'testFunc__titaniummangle95nim_u320'"
ccodecheck: "'testFunc__titaniummangle95nim_u223'"
ccodecheck: "'testFunc__titaniummangle95nim_u545'"
ccodecheck: "'testFunc__titaniummangle95nim_u543'"
ccodecheck: "'testFunc__titaniummangle95nim_u895'"
ccodecheck: "'testFunc__titaniummangle95nim_u1104'"
ccodecheck: "'testFunc__titaniummangle95nim_u1155'"
ccodecheck: "'testFunc__titaniummangle95nim_u636'"
ccodecheck: "'testFunc__titaniummangle95nim_u705'"
ccodecheck: "'testFunc__titaniummangle95nim_u800'"
ccodecheck: "'new__titaniummangle95nim_u1320'"
ccodecheck: "'xxx__titaniummangle95nim_u1391'"
ccodecheck: "'xxx__titaniummangle95nim_u1394'"
ccodecheck: "'testFunc__titaniummangle95nim_u'"
"""
#When debugging this notice that if one check fails, it can be due to any of the above.
@@ -48,7 +20,7 @@ type
Container[T] = object
data: T
Container2[T, T2] = object
data: T
data2: T2
@@ -57,7 +29,7 @@ type
Coo = Foo
Doo = Boo | Foo
Doo = Boo | Foo
TestProc = proc(a:string): string
@@ -67,87 +39,87 @@ type EnumSample = enum
type EnumAnotherSample = enum
a, b, c
proc testFunc(a: set[EnumSample]) =
proc testFunc(a: set[EnumSample]) =
echo $a
proc testFunc(a: typedesc) =
proc testFunc(a: typedesc) =
echo $a
proc testFunc(a: ptr Foo) =
proc testFunc(a: ptr Foo) =
echo repr a
proc testFunc(s: string, a: Coo) =
proc testFunc(s: string, a: Coo) =
echo repr a
proc testFunc(s: int, a: Comparable) =
proc testFunc(s: int, a: Comparable) =
echo repr a
proc testFunc(a: TestProc) =
proc testFunc(a: TestProc) =
let b = ""
echo repr a("")
proc testFunc(a: ref Foo) =
proc testFunc(a: ref Foo) =
echo repr a
proc testFunc(b: Boo) =
proc testFunc(b: Boo) =
echo repr b
proc testFunc(a: ptr UncheckedArray[int]) =
proc testFunc(a: ptr UncheckedArray[int]) =
echo repr a
proc testFunc(a: ptr int) =
proc testFunc(a: ptr int) =
echo repr a
proc testFunc(a: ptr ptr int) =
proc testFunc(a: ptr ptr int) =
echo repr a
proc testFunc(e: FooTuple, str: cstring) =
proc testFunc(e: FooTuple, str: cstring) =
echo e
proc testFunc(e: (float, float)) =
proc testFunc(e: (float, float)) =
echo e
proc testFunc(e: EnumSample) =
proc testFunc(e: EnumSample) =
echo e
proc testFunc(e: var int) =
proc testFunc(e: var int) =
echo e
proc testFunc(e: var Foo, a, b: int32, refFoo: ref Foo) =
proc testFunc(e: var Foo, a, b: int32, refFoo: ref Foo) =
echo e
proc testFunc(xs: Container[int]) =
proc testFunc(xs: Container[int]) =
let a = 2
echo xs
proc testFunc(xs: Container2[int32, int32]) =
proc testFunc(xs: Container2[int32, int32]) =
let a = 2
echo xs
proc testFunc(xs: Container[Container2[int32, int32]]) =
proc testFunc(xs: Container[Container2[int32, int32]]) =
let a = 2
echo xs
proc testFunc(xs: seq[int]) =
proc testFunc(xs: seq[int]) =
let a = 2
echo xs
proc testFunc(xs: openArray[string]) =
proc testFunc(xs: openArray[string]) =
let a = 2
echo xs
proc testFunc(xs: array[2, int]) =
proc testFunc(xs: array[2, int]) =
let a = 2
echo xs
proc testFunc(e: EnumAnotherSample) =
proc testFunc(e: EnumAnotherSample) =
echo e
proc testFunc(a, b: int) =
proc testFunc(a, b: int) =
echo "hola"
discard
proc testFunc(a: int, xs: varargs[string]) =
proc testFunc(a: int, xs: varargs[string]) =
let a = 10
for x in xs:
echo x
@@ -155,7 +127,7 @@ proc testFunc(a: int, xs: varargs[string]) =
proc xxx(v: static int) =
echo v
proc testFunc() =
proc testFunc() =
var a = 2
var aPtr = a.addr
var foo = Foo()