YRC: threadsafe cycle collection for Nim (#25495)

First performance numbers: time tests/arc/torcbench -- YRC true peak memory: true real 0m0,163s user 0m0,161s sys 0m0,002s time tests/arc/torcbench -- ORC true peak memory: true real 0m0,107s user 0m0,104s sys 0m0,003s So it's 1.6x slower. But it's threadsafe and provably correct. (Lean and model checking via TLA+ used.) Of course there is always the chance that the implementation is wrong and doesn't match the model.
2026-02-11 22:08:54 +00:00 · 2026-02-10 00:04:11 +01:00
parent 9225d9e9e6
commit a690a9ac90
17 changed files with 1934 additions and 41 deletions
--- a/compiler/ccgliterals.nim
+++ b/compiler/ccgliterals.nim
@@ -16,7 +16,7 @@
 ## implementation.

 template detectVersion(field, corename) =
-  if m.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc, gcHooks}:
+  if m.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc, gcHooks}:
    result = 2
  else:
    result = 1
--- a/compiler/ccgtypes.nim
+++ b/compiler/ccgtypes.nim
@@ -277,7 +277,7 @@ proc isInvalidReturnType(conf: ConfigRef; typ: PType, isProc = true): bool =
    of ctStruct:
      let t = skipTypes(rettype, typedescInst)
      if rettype.isImportedCppType or t.isImportedCppType or
-          (typ.callConv == ccCDecl and conf.selectedGC in {gcArc, gcAtomicArc, gcOrc}):
+          (typ.callConv == ccCDecl and conf.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}):
        # prevents nrvo for cdecl procs; # bug #23401
        result = false
      else:
@@ -1692,7 +1692,7 @@ proc genHook(m: BModule; t: PType; info: TLineInfo; op: TTypeAttachedOp; result:
        echo "ayclic but has this =trace ", t, " ", theProc.ast
  else:
    when false:
-      if op == attachedTrace and m.config.selectedGC == gcOrc and
+      if op == attachedTrace and m.config.selectedGC in {gcOrc, gcYrc} and
          containsGarbageCollectedRef(t):
        # unfortunately this check is wrong for an object type that only contains
        # .cursor fields like 'Node' inside 'cycleleak'.
--- a/compiler/commands.nim
+++ b/compiler/commands.nim
@@ -245,7 +245,7 @@ proc processCompile(conf: ConfigRef; filename: string) =
  extccomp.addExternalFileToCompile(conf, found)

 const
-  errNoneBoehmRefcExpectedButXFound = "'arc', 'orc', 'atomicArc', 'markAndSweep', 'boehm', 'go', 'none', 'regions', or 'refc' expected, but '$1' found"
+  errNoneBoehmRefcExpectedButXFound = "'arc', 'orc', 'yrc', 'atomicArc', 'markAndSweep', 'boehm', 'go', 'none', 'regions', or 'refc' expected, but '$1' found"
  errNoneSpeedOrSizeExpectedButXFound = "'none', 'speed' or 'size' expected, but '$1' found"
  errGuiConsoleOrLibExpectedButXFound = "'gui', 'console', 'lib' or 'staticlib' expected, but '$1' found"
  errInvalidExceptionSystem = "'goto', 'setjmp', 'cpp' or 'quirky' expected, but '$1' found"
@@ -266,6 +266,7 @@ proc testCompileOptionArg*(conf: ConfigRef; switch, arg: string, info: TLineInfo
    of "markandsweep": result = conf.selectedGC == gcMarkAndSweep
    of "destructors", "arc": result = conf.selectedGC == gcArc
    of "orc": result = conf.selectedGC == gcOrc
+    of "yrc": result = conf.selectedGC == gcYrc
    of "hooks": result = conf.selectedGC == gcHooks
    of "go": result = conf.selectedGC == gcGo
    of "none": result = conf.selectedGC == gcNone
@@ -570,6 +571,7 @@ proc unregisterArcOrc*(conf: ConfigRef) =
  undefSymbol(conf.symbols, "gcdestructors")
  undefSymbol(conf.symbols, "gcarc")
  undefSymbol(conf.symbols, "gcorc")
+  undefSymbol(conf.symbols, "gcyrc")
  undefSymbol(conf.symbols, "gcatomicarc")
  undefSymbol(conf.symbols, "nimSeqsV2")
  undefSymbol(conf.symbols, "nimV2")
@@ -603,6 +605,10 @@ proc processMemoryManagementOption(switch, arg: string, pass: TCmdLinePass,
      conf.selectedGC = gcOrc
      defineSymbol(conf.symbols, "gcorc")
      registerArcOrc(pass, conf)
+    of "yrc":
+      conf.selectedGC = gcYrc
+      defineSymbol(conf.symbols, "gcyrc")
+      registerArcOrc(pass, conf)
    of "atomicarc":
      conf.selectedGC = gcAtomicArc
      defineSymbol(conf.symbols, "gcatomicarc")
--- a/compiler/injectdestructors.nim
+++ b/compiler/injectdestructors.nim
@@ -69,7 +69,7 @@ proc hasDestructor(c: Con; t: PType): bool {.inline.} =
  result = ast.hasDestructor(t)
  when toDebug.len > 0:
    # for more effective debugging
-    if not result and c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
+    if not result and c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
      assert(not containsGarbageCollectedRef(t))

 proc getTemp(c: var Con; s: var Scope; typ: PType; info: TLineInfo): PNode =
@@ -165,7 +165,7 @@ proc isLastReadImpl(n: PNode; c: var Con; scope: var Scope): bool =

 template hasDestructorOrAsgn(c: var Con, typ: PType): bool =
  # bug #23354; an object type could have a non-trivial assignements when it is passed to a sink parameter
-  hasDestructor(c, typ) or (c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and
+  hasDestructor(c, typ) or (c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and
        typ.kind == tyObject and not isTrivial(getAttachedOp(c.graph, typ, attachedAsgn)))

 proc isLastRead(n: PNode; c: var Con; s: var Scope): bool =
@@ -329,14 +329,14 @@ proc isCriticalLink(dest: PNode): bool {.inline.} =
  result = dest.kind != nkSym

 proc finishCopy(c: var Con; result, dest: PNode; flags: set[MoveOrCopyFlag]; isFromSink: bool) =
-  if c.graph.config.selectedGC == gcOrc and IsExplicitSink notin flags:
+  if c.graph.config.selectedGC in {gcOrc, gcYrc} and IsExplicitSink notin flags:
    # add cyclic flag, but not to sink calls, which IsExplicitSink generates
    let t = dest.typ.skipTypes(tyUserTypeClasses + {tyGenericInst, tyAlias, tySink, tyDistinct})
    if cyclicType(c.graph, t):
      result.add boolLit(c.graph, result.info, isFromSink or isCriticalLink(dest))

 proc genMarkCyclic(c: var Con; result, dest: PNode) =
-  if c.graph.config.selectedGC == gcOrc:
+  if c.graph.config.selectedGC in {gcOrc, gcYrc}:
    let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct})
    if cyclicType(c.graph, t):
      if t.kind == tyRef:
@@ -495,7 +495,7 @@ proc passCopyToSink(n: PNode; c: var Con; s: var Scope): PNode =
      localError(c.graph.config, n.info, errFailedMove,
        ("cannot move '$1', passing '$1' to a sink parameter introduces an implicit copy") % $n)
  else:
-    if c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
+    if c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
      assert(not containsManagedMemory(nTyp))
    if nTyp.skipTypes(abstractInst).kind in {tyOpenArray, tyVarargs}:
      localError(c.graph.config, n.info, "cannot create an implicit openArray copy to be passed to a sink parameter")
@@ -926,7 +926,7 @@ proc p(n: PNode; c: var Con; s: var Scope; mode: ProcessMode; tmpFlags = {sfSing

      if n[0].kind == nkSym and n[0].sym.magic in {mNew, mNewFinalize}:
        result[0] = copyTree(n[0])
-        if c.graph.config.selectedGC in {gcHooks, gcArc, gcAtomicArc, gcOrc}:
+        if c.graph.config.selectedGC in {gcHooks, gcArc, gcAtomicArc, gcOrc, gcYrc}:
          let destroyOld = c.genDestroy(result[1])
          result = newTree(nkStmtList, destroyOld, result)
      else:
--- a/compiler/liftdestructors.nim
+++ b/compiler/liftdestructors.nim
@@ -163,7 +163,7 @@ proc fillBodyObj(c: var TLiftCtx; n, body, x, y: PNode; enforceDefaultOp: bool,
    if c.filterDiscriminator != nil: return
    let f = n.sym
    let b = if c.kind == attachedTrace: y else: y.dotField(f)
-    if (sfCursor in f.flags and c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcHooks}) or
+    if (sfCursor in f.flags and c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc, gcHooks}) or
        enforceDefaultOp:
      defaultOp(c, f.typ, body, x.dotField(f), b)
    else:
@@ -730,14 +730,43 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
    dest[] = source
    decRef tmp

+  For YRC the write barrier is more complicated still and must be:
+
+    let tmp = dest
+    # assignment must come first so that the collector sees the most-recent graph:
+    atomic: dest[] = source
+    # Then teach the cycle collector about the changes edge (these use locks, see yrc.nim):
+    incRef source
+    decRef tmp
+
+  This is implemented as a single runtime call (nimAsgnYrc / nimSinkYrc).
  ]#
  var actions = newNodeI(nkStmtList, c.info)
  let elemType = t.elementType

  createTypeBoundOps(c.g, c.c, elemType, c.info, c.idgen)
-  let isCyclic = c.g.config.selectedGC == gcOrc and types.canFormAcycle(c.g, elemType)

-  let isInheritableAcyclicRef = c.g.config.selectedGC == gcOrc and
+  # YRC uses dedicated runtime procs for the entire write barrier:
+  if c.g.config.selectedGC == gcYrc:
+    let desc =
+      if isFinal(elemType):
+        let ti = genBuiltin(c, mGetTypeInfoV2, "getTypeInfoV2", newNodeIT(nkType, x.info, elemType))
+        ti.typ = getSysType(c.g, c.info, tyPointer)
+        ti
+      else:
+        newNodeIT(nkNilLit, c.info, getSysType(c.g, c.info, tyPointer))
+    case c.kind
+    of attachedAsgn, attachedDup:
+      body.add callCodegenProc(c.g, "nimAsgnYrc", c.info, genAddr(c, x), y, desc)
+      return
+    of attachedSink:
+      body.add callCodegenProc(c.g, "nimSinkYrc", c.info, genAddr(c, x), y, desc)
+      return
+    else: discard # fall through for destructor, trace, wasMoved
+
+  let isCyclic = c.g.config.selectedGC in {gcOrc, gcYrc} and types.canFormAcycle(c.g, elemType)
+
+  let isInheritableAcyclicRef = c.g.config.selectedGC in {gcOrc, gcYrc} and
                      (not isPureObject(elemType)) and
                      tfAcyclic in skipTypes(elemType, abstractInst+{tyOwned}-{tyTypeDesc}).flags
  # dynamic Acyclic refs need to use dyn decRef
@@ -819,7 +848,26 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
  let xenv = genBuiltin(c, mAccessEnv, "accessEnv", x)
  xenv.typ = getSysType(c.g, c.info, tyPointer)

-  let isCyclic = c.g.config.selectedGC == gcOrc
+  # Closures are (fnPtr, env) pairs. nimAsgnYrc/nimSinkYrc handle the env pointer
+  # (atomic store + buffered inc/dec). We also need newAsgnStmt to copy the fnPtr.
+  if c.g.config.selectedGC == gcYrc:
+    let nilDesc = newNodeIT(nkNilLit, c.info, getSysType(c.g, c.info, tyPointer))
+    let yenv = genBuiltin(c, mAccessEnv, "accessEnv", y)
+    yenv.typ = getSysType(c.g, c.info, tyPointer)
+    case c.kind
+    of attachedAsgn, attachedDup:
+      # nimAsgnYrc: save old env, atomic store new env, inc new env, dec old env
+      body.add callCodegenProc(c.g, "nimAsgnYrc", c.info, genAddr(c, xenv), yenv, nilDesc)
+      # Raw struct copy to also update the function pointer (env write is redundant but benign)
+      body.add newAsgnStmt(x, y)
+      return
+    of attachedSink:
+      body.add callCodegenProc(c.g, "nimSinkYrc", c.info, genAddr(c, xenv), yenv, nilDesc)
+      body.add newAsgnStmt(x, y)
+      return
+    else: discard # fall through for destructor, trace, wasMoved
+
+  let isCyclic = c.g.config.selectedGC in {gcOrc, gcYrc}
  let tmp =
    if isCyclic and c.kind in {attachedAsgn, attachedSink, attachedDup}:
      declareTempOf(c, body, xenv)
@@ -852,7 +900,6 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
      body.add genIf(c, cond, actions)
    else:
      body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRef", c.info, yenv))
-
      body.add genIf(c, cond, actions)
      body.add newAsgnStmt(x, y)
  of attachedDup:
@@ -937,7 +984,7 @@ proc closureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
    call[1] = y
    body.add newAsgnStmt(x, call)
  elif (optOwnedRefs in c.g.config.globalOptions and
-      optRefCheck in c.g.config.options) or c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc}:
+      optRefCheck in c.g.config.options) or c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}:
    let xx = genBuiltin(c, mAccessEnv, "accessEnv", x)
    xx.typ = getSysType(c.g, c.info, tyPointer)
    case c.kind
@@ -992,7 +1039,7 @@ proc fillBody(c: var TLiftCtx; t: PType; body, x, y: PNode) =
      tyPtr, tyUncheckedArray, tyVar, tyLent:
    defaultOp(c, t, body, x, y)
  of tyRef:
-    if c.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
+    if c.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
      atomicRefOp(c, t, body, x, y)
    elif (optOwnedRefs in c.g.config.globalOptions and
        optRefCheck in c.g.config.options):
@@ -1001,7 +1048,7 @@ proc fillBody(c: var TLiftCtx; t: PType; body, x, y: PNode) =
      defaultOp(c, t, body, x, y)
  of tyProc:
    if t.callConv == ccClosure:
-      if c.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
+      if c.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
        atomicClosureOp(c, t, body, x, y)
      else:
        closureOp(c, t, body, x, y)
@@ -1125,7 +1172,7 @@ proc symDupPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttache

  result.typ.addParam src

-  if g.config.selectedGC == gcOrc and
+  if g.config.selectedGC in {gcOrc, gcYrc} and
    cyclicType(g, typ.skipTypes(abstractInst)):
    let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"),
                            idgen, result, info)
@@ -1152,7 +1199,7 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp
  let src = newSym(skParam, getIdent(g.cache, if kind == attachedTrace: "env" else: "src"),
                   idgen, result, info)

-  if kind == attachedDestructor and g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and
+  if kind == attachedDestructor and g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and
     ((g.config.isDefined("nimPreviewNonVarDestructor") and not isDiscriminant) or (typ.kind in {tyRef, tyString, tySequence})):
    dest.typ = typ
  else:
@@ -1168,7 +1215,7 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp
  if kind notin {attachedDestructor, attachedWasMoved}:
    result.typ.addParam src

-  if kind == attachedAsgn and g.config.selectedGC == gcOrc and
+  if kind == attachedAsgn and g.config.selectedGC in {gcOrc, gcYrc} and
      cyclicType(g, typ.skipTypes(abstractInst)):
    let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"),
                            idgen, result, info)
@@ -1226,7 +1273,7 @@ proc produceSym(g: ModuleGraph; c: PContext; typ: PType; kind: TTypeAttachedOp;
  else:
    var tk: TTypeKind
    var skipped: PType = nil
-    if g.config.selectedGC in {gcArc, gcOrc, gcHooks, gcAtomicArc}:
+    if g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcHooks, gcAtomicArc}:
      skipped = skipTypes(typ, {tyOrdinal, tyRange, tyInferred, tyGenericInst, tyStatic, tyAlias, tySink})
      tk = skipped.kind
    else:
@@ -1348,7 +1395,7 @@ proc createTypeBoundOps(g: ModuleGraph; c: PContext; orig: PType; info: TLineInf

  # we do not generate '=trace' procs if we
  # have the cycle detection disabled, saves code size.
-  let lastAttached = if g.config.selectedGC == gcOrc: attachedTrace
+  let lastAttached = if g.config.selectedGC in {gcOrc, gcYrc}: attachedTrace
                     else: attachedSink

  # bug #15122: We need to produce all prototypes before entering the
--- a/compiler/options.nim
+++ b/compiler/options.nim
@@ -195,6 +195,7 @@ type
    gcRegions = "regions"
    gcArc = "arc"
    gcOrc = "orc"
+    gcYrc = "yrc"       # thread-safe ORC (concurrent cycle collector)
    gcAtomicArc = "atomicArc"
    gcMarkAndSweep = "markAndSweep"
    gcHooks = "hooks"
--- a/compiler/scriptconfig.nim
+++ b/compiler/scriptconfig.nim
@@ -231,7 +231,7 @@ proc runNimScript*(cache: IdentCache; scriptName: AbsoluteFile;
  if optOwnedRefs in oldGlobalOptions:
    conf.globalOptions.incl {optTinyRtti, optOwnedRefs, optSeqDestructors}
    defineSymbol(conf.symbols, "nimv2")
-  if conf.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
+  if conf.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
    conf.globalOptions.incl {optTinyRtti, optSeqDestructors}
    defineSymbol(conf.symbols, "nimv2")
    defineSymbol(conf.symbols, "gcdestructors")
@@ -241,6 +241,8 @@ proc runNimScript*(cache: IdentCache; scriptName: AbsoluteFile;
      defineSymbol(conf.symbols, "gcarc")
    of gcOrc:
      defineSymbol(conf.symbols, "gcorc")
+    of gcYrc:
+      defineSymbol(conf.symbols, "gcyrc")
    of gcAtomicArc:
      defineSymbol(conf.symbols, "gcatomicarc")
    else:
--- a/compiler/sempass2.nim
+++ b/compiler/sempass2.nim
@@ -1756,7 +1756,7 @@ proc trackProc*(c: PContext; s: PSym, body: PNode) =
      let param = params[i].sym
      let typ = param.typ
      if isSinkTypeForParam(typ) or
-          (t.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and
+          (t.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and
            (isClosure(typ.skipTypes(abstractInst)) or param.id in t.escapingParams)):
        createTypeBoundOps(t, typ, param.info)
      if isOutParam(typ) and param.id notin t.init and s.magic == mNone:
--- a/compiler/semstmts.nim
+++ b/compiler/semstmts.nim
@@ -2175,7 +2175,7 @@ proc bindTypeHook(c: PContext; s: PSym; n: PNode; op: TTypeAttachedOp) =
  template notRefc: bool =
    # fixes refc with non-var destructor; cancel warnings (#23156)
    c.config.backend == backendJs or
-      c.config.selectedGC in {gcArc, gcAtomicArc, gcOrc}
+      c.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}
  let cond = case op
             of attachedWasMoved:
               t.len == 2 and t.returnType == nil and t.firstParamType.kind == tyVar
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -125,7 +125,7 @@ proc unsafeAddr*[T](x: T): ptr T {.magic: "Addr", noSideEffect.} =

 const ThisIsSystem = true

-const arcLikeMem = defined(gcArc) or defined(gcAtomicArc) or defined(gcOrc)
+const arcLikeMem = defined(gcArc) or defined(gcAtomicArc) or defined(gcOrc) or defined(gcYrc)

 when defined(nimAllowNonVarDestructor) and arcLikeMem:
  proc new*[T](a: var ref T, finalizer: proc (x: T) {.nimcall.}) {.
@@ -356,7 +356,7 @@ proc low*(x: string): int {.magic: "Low", noSideEffect.}
  ## See also:
  ## * `high(string) <#high,string>`_

-when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc):
+when not defined(gcArc) and not defined(gcOrc) and not defined(gcYrc) and not defined(gcAtomicArc):
  proc shallowCopy*[T](x: var T, y: T) {.noSideEffect, magic: "ShallowCopy".}
    ## Use this instead of `=` for a `shallow copy`:idx:.
    ##
@@ -407,7 +407,7 @@ when defined(nimHasDup):

 proc `=sink`*[T](x: var T; y: T) {.inline, nodestroy, magic: "Asgn".} =
  ## Generic `sink`:idx: implementation that can be overridden.
-  when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc):
+  when defined(gcArc) or defined(gcOrc) or defined(gcYrc) or defined(gcAtomicArc):
    x = y
  else:
    shallowCopy(x, y)
@@ -2561,7 +2561,7 @@ when compileOption("rangechecks"):
 else:
  template rangeCheck*(cond) = discard

-when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc):
+when not defined(gcArc) and not defined(gcOrc) and not defined(gcYrc) and not defined(gcAtomicArc):
  proc shallow*[T](s: var seq[T]) {.noSideEffect, inline.} =
    ## Marks a sequence `s` as `shallow`:idx:. Subsequent assignments will not
    ## perform deep copies of `s`.
@@ -2630,7 +2630,7 @@ when hasAlloc or defined(nimscript):
    setLen(x, xl+item.len)
    var j = xl-1
    while j >= i:
-      when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc):
+      when defined(gcArc) or defined(gcOrc) or defined(gcYrc) or defined(gcAtomicArc):
        x[j+item.len] = move x[j]
      else:
        shallowCopy(x[j+item.len], x[j])
--- a/lib/system/arc.nim
+++ b/lib/system/arc.nim
@@ -16,7 +16,7 @@ runtime type and only contains a reference count.

 {.push raises: [], rangeChecks: off.}

-when defined(gcOrc):
+when defined(gcOrc) or defined(gcYrc):
  const
    rcIncrement = 0b10000 # so that lowest 4 bits are not touched
    rcMask = 0b1111
@@ -36,12 +36,12 @@ type
    rc: int # the object header is now a single RC field.
            # we could remove it in non-debug builds for the 'owned ref'
            # design but this seems unwise.
-    when defined(gcOrc):
+    when defined(gcOrc) or defined(gcYrc):
      rootIdx: int # thanks to this we can delete potential cycle roots
                   # in O(1) without doubly linked lists
    when defined(nimArcDebug) or defined(nimArcIds):
      refId: int
-    when defined(gcOrc) and orcLeakDetector:
+    when (defined(gcOrc) or defined(gcYrc)) and orcLeakDetector:
      filename: cstring
      line: int

@@ -74,7 +74,7 @@ elif defined(nimArcIds):

  const traceId = -1

-when defined(gcAtomicArc) and hasThreadSupport:
+when (defined(gcAtomicArc) or defined(gcYrc)) and hasThreadSupport:
  template decrement(cell: Cell): untyped =
    discard atomicDec(cell.rc, rcIncrement)
  template increment(cell: Cell): untyped =
@@ -119,7 +119,7 @@ proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
  else:
    result = cast[ptr RefHeader](alignedAlloc(s, alignment) +! hdrSize)
  head(result).rc = 0
-  when defined(gcOrc):
+  when defined(gcOrc) or defined(gcYrc):
    head(result).rootIdx = 0
  when defined(nimArcDebug):
    head(result).refId = gRefId
@@ -157,7 +157,7 @@ proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
  when traceCollector:
    cprintf("[INCREF] %p\n", head(p))

-when not defined(gcOrc) or defined(nimThinout):
+when not (defined(gcOrc) or defined(gcYrc)) or defined(nimThinout):
  proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
    # This is only used by the old RTTI mechanism and we know
    # that 'dest[]' is nil and needs no destruction. Which is really handy
@@ -208,7 +208,9 @@ proc nimDestroyAndDispose(p: pointer) {.compilerRtl, quirky, raises: [].} =
      cstderr.rawWrite "has destructor!\n"
  nimRawDispose(p, rti.align)

-when defined(gcOrc):
+when defined(gcYrc):
+  include yrc
+elif defined(gcOrc):
  when defined(nimThinout):
    include cyclebreaker
  else:
@@ -225,7 +227,7 @@ proc nimDecRefIsLast(p: pointer): bool {.compilerRtl, inl.} =
        writeStackTrace()
        cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.count)

-    when defined(gcAtomicArc) and hasThreadSupport:
+    when (defined(gcAtomicArc) or defined(gcYrc)) and hasThreadSupport:
      # `atomicDec` returns the new value
      if atomicDec(cell.rc, rcIncrement) == -rcIncrement:
        result = true
@@ -251,7 +253,7 @@ proc GC_ref*[T](x: ref T) =
  ## New runtime only supports this operation for 'ref T'.
  if x != nil: nimIncRef(cast[pointer](x))

-when not defined(gcOrc):
+when not (defined(gcOrc) or defined(gcYrc)):
  template GC_fullCollect* =
    ## Forces a full garbage collection pass. With `--mm:arc` a nop.
    discard
--- a/lib/system/mm/malloc.nim
+++ b/lib/system/mm/malloc.nim
@@ -50,7 +50,7 @@ proc deallocSharedImpl(p: pointer) = deallocImpl(p)
 proc GC_disable() = discard
 proc GC_enable() = discard

-when not defined(gcOrc):
+when not defined(gcOrc) and not defined(gcYrc):
  proc GC_fullCollect() = discard
  proc GC_enableMarkAndSweep() = discard
  proc GC_disableMarkAndSweep() = discard
--- a/lib/system/yrc.nim
+++ b/lib/system/yrc.nim
@@ -0,0 +1,549 @@
+#
+# YRC: Thread-safe ORC (concurrent cycle collector).
+# Same API as orc.nim but with striped queues and global lock for merge/collect.
+# Destructors for refs run at collection time, not immediately on last decRef.
+# See yrc_proof.lean for a Lean 4 proof of safety and deadlock freedom.
+#
+# ## Key Invariant: Topology vs. Reference Counts
+#
+# Only `obj.field = x` can change the topology of the heap graph (heap-to-heap
+# edges). Local variable assignments (`var local = someRef`) affect reference
+# counts but never create heap-to-heap edges and thus cannot create cycles.
+#
+# The actual pointer write in `obj.field = x` happens immediately and lock-free —
+# the graph topology is always up-to-date in memory. Only the RC adjustments are
+# deferred: increments and decrements are buffered into per-stripe queues
+# (`toInc`, `toDec`) protected by fine-grained per-stripe locks.
+#
+# When `collectCycles` runs it takes the global lock, drains all stripe buffers
+# via `mergePendingRoots`, and then traces the physical pointer graph (via
+# `traceImpl`) to detect cycles. This is sound because `trace` follows the actual
+# pointer values in memory — which are always current — and uses the reconciled
+# RCs only to identify candidate roots and confirm garbage.
+#
+# In summary: the physical pointer graph is always consistent (writes are
+# immediate); only the reference counts are eventually consistent (writes are
+# buffered). The per-stripe locks are cheap; the expensive global lock is only
+# needed when interpreting the RCs during collection.
+#
+# ## Why No Write Barrier Is Needed
+#
+# The classic concurrent-GC hazard is the "lost object" problem: during
+# collection the mutator executes `A.field = B` where A is already scanned
+# (black), B is reachable only through an unscanned (gray) object C, and then
+# C's reference to B is removed. The collector never discovers B and frees it
+# while A still points to it. Traditional concurrent collectors need write
+# barriers to prevent this.
+#
+# This problem structurally cannot arise in YRC because the cycle collector only
+# frees *closed cycles* — subgraphs where every reference to every member comes
+# from within the group, with zero external references. To execute `A.field = B`
+# the mutator must hold a reference to A, which means A has an external reference
+# (from the stack) that is not a heap-to-heap edge. During trial deletion
+# (`markGray`) only internal edges are subtracted from RCs, so A's external
+# reference survives, `scan` finds A's RC >= 0, calls `scanBlack`, and rescues A
+# and everything reachable from it — including B. In short: the mutator can only
+# modify objects it can reach, but the cycle collector only frees objects nothing
+# external can reach. The two conditions are mutually exclusive.
+#
+#[
+
+The problem described in Bacon01 is: during markGray/scan, a mutator concurrently
+does X.field = Z (was X→Y), changing the physical graph while the collector is tracing
+it. The collector might see stale or new edges. The reasons this is still safe:
+
+Stale edges cancel with unbuffered decrements: If the collector sees old edge X→Y
+(mutator already wrote X→Z and buffered dec(Y)), the phantom trial deletion and the
+unbuffered dec cancel — Y's effective RC is correct.
+
+scanBlack rescues via current physical edges: If X has external refs (merged RC reflects
+the mutator's access), scanBlack(X) re-traces X and follows the current physical edge X→Z,
+incrementing Z's RC and marking it black. Z survives.
+
+rcSum==edges fast path is conservative: Any discrepancy between physical graph and merged
+state (stale or new edges) causes rcSum != edges, falling back to the slow path which
+rescues anything with RC >= 0.
+
+Unreachable cycles are truly unreachable: The mutator can only reach objects through chains
+rooted in merged references. If a cycle has zero external refs at merge time, no mutator
+can reach it.
+
+]#
+
+{.push raises: [].}
+
+include cellseqs_v2
+
+import std/locks
+
+const
+  NumStripes = 64
+  QueueSize = 128
+  RootsThreshold = 10
+
+  colBlack = 0b000
+  colGray = 0b001
+  colWhite = 0b010
+  maybeCycle = 0b100
+  inRootsFlag = 0b1000
+  colorMask = 0b011
+  logOrc = defined(nimArcIds)
+
+type
+  TraceProc = proc (p, env: pointer) {.nimcall, benign, raises: [].}
+  DisposeProc = proc (p: pointer) {.nimcall, benign, raises: [].}
+
+template color(c): untyped = c.rc and colorMask
+template setColor(c, col) =
+  when col == colBlack:
+    c.rc = c.rc and not colorMask
+  else:
+    c.rc = c.rc and not colorMask or col
+
+const
+  optimizedOrc = false
+  useJumpStack = false
+
+type
+  GcEnv = object
+    traceStack: CellSeq[ptr pointer]
+    when useJumpStack:
+      jumpStack: CellSeq[ptr pointer]
+    toFree: CellSeq[Cell]
+    freed, touched, edges, rcSum: int
+    keepThreshold: bool
+
+proc trace(s: Cell; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
+  if desc.traceImpl != nil:
+    var p = s +! sizeof(RefHeader)
+    cast[TraceProc](desc.traceImpl)(p, addr(j))
+
+include threadids
+
+type
+  Stripe = object
+    when not defined(yrcAtomics):
+      lockInc: Lock
+    toIncLen: int
+    toInc: array[QueueSize, Cell]
+    lockDec: Lock
+    toDecLen: int
+    toDec: array[QueueSize, (Cell, PNimTypeV2)]
+
+type
+  PreventThreadFromCollectProc* = proc(): bool {.nimcall, benign, raises: [].}
+    ## Callback run before this thread runs the cycle collector.
+    ## Return `true` to allow collection, `false` to skip (e.g. real-time thread).
+    ## Invoked while holding the global lock; must not call back into YRC.
+
+var
+  gYrcGlobalLock: Lock
+  roots: CellSeq[Cell]  # merged roots, used under global lock
+  stripes: array[NumStripes, Stripe]
+  rootsThreshold: int = 128
+  defaultThreshold = when defined(nimFixedOrc): 10_000 else: 128
+  gPreventThreadFromCollectProc: PreventThreadFromCollectProc = nil
+
+proc GC_setPreventThreadFromCollectProc*(cb: PreventThreadFromCollectProc) =
+  ##[ Can be used to customize the cycle collector for a thread. For example,
+  to ensure that a hard realtime thread cannot run the cycle collector use:
+
+  ```nim
+  var hardRealTimeThread: int
+  GC_setPreventThreadFromCollectProc(proc(): bool {.nimcall.} = hardRealTimeThread == getThreadId())
+  ```
+
+  To ensure that a hard realtime thread cannot by involved in any cycle collector activity use:
+
+  ```nim
+  GC_setPreventThreadFromCollectProc(proc(): bool {.nimcall.} =
+    if hardRealTimeThread == getThreadId():
+      writeStackTrace()
+      echo "Realtime thread involved in inpredictable cycle collector activity!"
+    result = false
+  ```
+  ]##
+  gPreventThreadFromCollectProc = cb
+
+proc GC_getPreventThreadFromCollectProc*(): PreventThreadFromCollectProc =
+  ## Returns the current "prevent thread from collecting proc".
+  ## Typically `nil` if not set.
+  result = gPreventThreadFromCollectProc
+
+proc mayRunCycleCollect(): bool {.inline.} =
+  if gPreventThreadFromCollectProc == nil: true
+  else: not gPreventThreadFromCollectProc()
+
+proc getStripeIdx(): int {.inline.} =
+  getThreadId() and (NumStripes - 1)
+
+proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
+  let h = head(p)
+  when optimizedOrc:
+    if cyclic: h.rc = h.rc or maybeCycle
+  when defined(yrcAtomics):
+    let s = getStripeIdx()
+    let slot = atomicFetchAdd(addr stripes[s].toIncLen, 1, ATOMIC_ACQ_REL)
+    if slot < QueueSize:
+      atomicStoreN(addr stripes[s].toInc[slot], h, ATOMIC_RELEASE)
+    else:
+      withLock gYrcGlobalLock:
+        h.rc = h.rc +% rcIncrement
+        for i in 0..<NumStripes:
+          let len = atomicExchangeN(addr stripes[i].toIncLen, 0, ATOMIC_ACQUIRE)
+          for j in 0..<min(len, QueueSize):
+            let x = atomicLoadN(addr stripes[i].toInc[j], ATOMIC_ACQUIRE)
+            x.rc = x.rc +% rcIncrement
+  else:
+    let idx = getStripeIdx()
+    while true:
+      var overflow = false
+      withLock stripes[idx].lockInc:
+        if stripes[idx].toIncLen < QueueSize:
+          stripes[idx].toInc[stripes[idx].toIncLen] = h
+          stripes[idx].toIncLen += 1
+        else:
+          overflow = true
+      if overflow:
+        withLock gYrcGlobalLock:
+          for i in 0..<NumStripes:
+            withLock stripes[i].lockInc:
+              for j in 0..<stripes[i].toIncLen:
+                let x = stripes[i].toInc[j]
+                x.rc = x.rc +% rcIncrement
+              stripes[i].toIncLen = 0
+      else:
+        break
+
+proc mergePendingRoots() =
+  for i in 0..<NumStripes:
+    when defined(yrcAtomics):
+      let incLen = atomicExchangeN(addr stripes[i].toIncLen, 0, ATOMIC_ACQUIRE)
+      for j in 0..<min(incLen, QueueSize):
+        let x = atomicLoadN(addr stripes[i].toInc[j], ATOMIC_ACQUIRE)
+        x.rc = x.rc +% rcIncrement
+    else:
+      withLock stripes[i].lockInc:
+        for j in 0..<stripes[i].toIncLen:
+          let x = stripes[i].toInc[j]
+          x.rc = x.rc +% rcIncrement
+        stripes[i].toIncLen = 0
+    withLock stripes[i].lockDec:
+      for j in 0..<stripes[i].toDecLen:
+        let (c, desc) = stripes[i].toDec[j]
+        c.rc = c.rc -% rcIncrement
+        if (c.rc and inRootsFlag) == 0:
+          c.rc = c.rc or inRootsFlag
+          if roots.d == nil: init(roots)
+          add(roots, c, desc)
+      stripes[i].toDecLen = 0
+
+proc collectCycles()
+
+when logOrc or orcLeakDetector:
+  proc writeCell(msg: cstring; s: Cell; desc: PNimTypeV2) =
+    when orcLeakDetector:
+      cfprintf(cstderr, "%s %s file: %s:%ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.filename, s.line, s.color, getThreadId())
+    else:
+      cfprintf(cstderr, "%s %s %ld root index: %ld; RC: %ld; color: %ld; thread: %ld\n",
+        msg, desc.name, s.refId, (if (s.rc and inRootsFlag) != 0: 1 else: 0), s.rc shr rcShift, s.color, getThreadId())
+
+proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
+  when traceCollector:
+    cprintf("[From ] %p rc %ld color %ld\n", s, s.rc shr rcShift, s.color)
+  let p = s +! sizeof(RefHeader)
+  when logOrc: writeCell("free", s, desc)
+  if desc.destructor != nil:
+    cast[DestructorProc](desc.destructor)(p)
+  nimRawDispose(p, desc.align)
+
+template orcAssert(cond, msg) =
+  when logOrc:
+    if not cond:
+      cfprintf(cstderr, "[Bug!] %s\n", msg)
+      rawQuit 1
+
+when logOrc:
+  proc strstr(s, sub: cstring): cstring {.header: "<string.h>", importc.}
+
+proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl, inl.} =
+  let p = cast[ptr pointer](q)
+  if p[] != nil:
+    orcAssert strstr(desc.name, "TType") == nil, "following a TType but it's acyclic!"
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, desc)
+
+proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl, inl.} =
+  let p = cast[ptr pointer](q)
+  if p[] != nil:
+    var j = cast[ptr GcEnv](env)
+    j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[])
+
+proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  s.setColor colBlack
+  let until = j.traceStack.len
+  trace(s, desc, j)
+  when logOrc: writeCell("root still alive", s, desc)
+  while j.traceStack.len > until:
+    let (entry, desc) = j.traceStack.pop()
+    let t = head entry[]
+    t.rc = t.rc +% rcIncrement
+    if t.color != colBlack:
+      t.setColor colBlack
+      trace(t, desc, j)
+      when logOrc: writeCell("child still alive", t, desc)
+
+proc markGray(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  if s.color != colGray:
+    s.setColor colGray
+    j.touched = j.touched +% 1
+    j.rcSum = j.rcSum +% (s.rc shr rcShift) +% 1
+    orcAssert(j.traceStack.len == 0, "markGray: trace stack not empty")
+    trace(s, desc, j)
+    while j.traceStack.len > 0:
+      let (entry, desc) = j.traceStack.pop()
+      let t = head entry[]
+      t.rc = t.rc -% rcIncrement
+      j.edges = j.edges +% 1
+      if t.color != colGray:
+        t.setColor colGray
+        j.touched = j.touched +% 1
+        j.rcSum = j.rcSum +% (t.rc shr rcShift) +% 2
+        trace(t, desc, j)
+
+proc scan(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
+  if s.color == colGray:
+    if (s.rc shr rcShift) >= 0:
+      scanBlack(s, desc, j)
+    else:
+      orcAssert(j.traceStack.len == 0, "scan: trace stack not empty")
+      s.setColor(colWhite)
+      trace(s, desc, j)
+      while j.traceStack.len > 0:
+        let (entry, desc) = j.traceStack.pop()
+        let t = head entry[]
+        if t.color == colGray:
+          if (t.rc shr rcShift) >= 0:
+            scanBlack(t, desc, j)
+          else:
+            t.setColor(colWhite)
+            trace(t, desc, j)
+
+proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
+  if s.color == col and (s.rc and inRootsFlag) == 0:
+    orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
+    s.setColor(colBlack)
+    j.toFree.add(s, desc)
+    trace(s, desc, j)
+    while j.traceStack.len > 0:
+      let (entry, desc) = j.traceStack.pop()
+      let t = head entry[]
+      entry[] = nil
+      if t.color == col and (t.rc and inRootsFlag) == 0:
+        j.toFree.add(t, desc)
+        t.setColor(colBlack)
+        trace(t, desc, j)
+
+proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
+  let last = roots.len -% 1
+  when logOrc:
+    for i in countdown(last, lowMark):
+      writeCell("root", roots.d[i][0], roots.d[i][1])
+  for i in countdown(last, lowMark):
+    markGray(roots.d[i][0], roots.d[i][1], j)
+  var colToCollect = colWhite
+  if j.rcSum == j.edges:
+    colToCollect = colGray
+    j.keepThreshold = true
+  else:
+    for i in countdown(last, lowMark):
+      scan(roots.d[i][0], roots.d[i][1], j)
+  init j.toFree
+  for i in 0 ..< roots.len:
+    let s = roots.d[i][0]
+    s.rc = s.rc and not inRootsFlag
+    collectColor(s, roots.d[i][1], colToCollect, j)
+  when not defined(nimStressOrc):
+    let oldThreshold = rootsThreshold
+    rootsThreshold = high(int)
+  roots.len = 0
+  for i in 0 ..< j.toFree.len:
+    when orcLeakDetector:
+      writeCell("CYCLIC OBJECT FREED", j.toFree.d[i][0], j.toFree.d[i][1])
+    free(j.toFree.d[i][0], j.toFree.d[i][1])
+  when not defined(nimStressOrc):
+    rootsThreshold = oldThreshold
+  j.freed = j.freed +% j.toFree.len
+  deinit j.toFree
+
+when defined(nimOrcStats):
+  var freedCyclicObjects {.threadvar.}: int
+
+proc collectCycles() =
+  when logOrc:
+    cfprintf(cstderr, "[collectCycles] begin\n")
+  withLock gYrcGlobalLock:
+    mergePendingRoots()
+    if roots.len >= RootsThreshold and mayRunCycleCollect():
+      var j: GcEnv
+      init j.traceStack
+      collectCyclesBacon(j, 0)
+      if roots.len == 0 and roots.d != nil:
+        deinit roots
+      when not defined(nimStressOrc):
+        if j.keepThreshold:
+          discard
+        elif j.freed *% 2 >= j.touched:
+          when not defined(nimFixedOrc):
+            rootsThreshold = max(rootsThreshold div 3 *% 2, 16)
+          else:
+            rootsThreshold = 0
+        elif rootsThreshold < high(int) div 4:
+          rootsThreshold = (if rootsThreshold <= 0: defaultThreshold else: rootsThreshold)
+          rootsThreshold = rootsThreshold div 2 +% rootsThreshold
+      when logOrc:
+        cfprintf(cstderr, "[collectCycles] end; freed %ld new threshold %ld\n", j.freed, rootsThreshold)
+      when defined(nimOrcStats):
+        inc freedCyclicObjects, j.freed
+      deinit j.traceStack
+
+when defined(nimOrcStats):
+  type
+    OrcStats* = object
+      freedCyclicObjects*: int
+  proc GC_orcStats*(): OrcStats =
+    result = OrcStats(freedCyclicObjects: freedCyclicObjects)
+
+proc GC_runOrc* =
+  withLock gYrcGlobalLock:
+    mergePendingRoots()
+    if mayRunCycleCollect():
+      var j: GcEnv
+      init j.traceStack
+      collectCyclesBacon(j, 0)
+      deinit j.traceStack
+      roots.len = 0
+  when logOrc: orcAssert roots.len == 0, "roots not empty!"
+
+proc GC_enableOrc*() =
+  when not defined(nimStressOrc):
+    rootsThreshold = 0
+
+proc GC_disableOrc*() =
+  when not defined(nimStressOrc):
+    rootsThreshold = high(int)
+
+proc GC_prepareOrc*(): int {.inline.} =
+  withLock gYrcGlobalLock:
+    mergePendingRoots()
+    result = roots.len
+
+proc GC_partialCollect*(limit: int) =
+  withLock gYrcGlobalLock:
+    mergePendingRoots()
+    if roots.len > limit and mayRunCycleCollect():
+      var j: GcEnv
+      init j.traceStack
+      collectCyclesBacon(j, limit)
+      deinit j.traceStack
+      roots.len = limit
+
+proc GC_fullCollect* =
+  GC_runOrc()
+
+proc GC_enableMarkAndSweep*() = GC_enableOrc()
+proc GC_disableMarkAndSweep*() = GC_disableOrc()
+
+const acyclicFlag = 1
+
+when optimizedOrc:
+  template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
+    (desc.flags and acyclicFlag) == 0 and (s.rc and maybeCycle) != 0
+else:
+  template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
+    (desc.flags and acyclicFlag) == 0
+
+proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
+  result = false
+  if p != nil:
+    let cell = head(p)
+    let desc = cast[ptr PNimTypeV2](p)[]
+    let idx = getStripeIdx()
+    while true:
+      var overflow = false
+      withLock stripes[idx].lockDec:
+        if stripes[idx].toDecLen < QueueSize:
+          stripes[idx].toDec[stripes[idx].toDecLen] = (cell, desc)
+          stripes[idx].toDecLen += 1
+        else:
+          overflow = true
+      if overflow:
+        collectCycles()
+      else:
+        break
+
+proc nimDecRefIsLastDyn(p: pointer): bool {.compilerRtl, inl.} =
+  nimDecRefIsLastCyclicDyn(p)
+
+proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
+  result = false
+  if p != nil:
+    let cell = head(p)
+    let idx = getStripeIdx()
+    while true:
+      var overflow = false
+      withLock stripes[idx].lockDec:
+        if stripes[idx].toDecLen < QueueSize:
+          stripes[idx].toDec[stripes[idx].toDecLen] = (cell, desc)
+          stripes[idx].toDecLen += 1
+        else:
+          overflow = true
+      if overflow:
+        collectCycles()
+      else:
+        break
+
+proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
+  dest[] = src
+  if src != nil: nimIncRefCyclic(src, true)
+
+proc yrcDec(tmp: pointer; desc: PNimTypeV2) {.inline.} =
+  if desc != nil:
+    discard nimDecRefIsLastCyclicStatic(tmp, desc)
+  else:
+    discard nimDecRefIsLastCyclicDyn(tmp)
+
+proc nimAsgnYrc(dest: ptr pointer; src: pointer; desc: PNimTypeV2) {.compilerRtl.} =
+  ## YRC write barrier for ref copy assignment.
+  ## Atomically stores src into dest, then buffers RC adjustments.
+  ## Freeing is always done by the cycle collector, never inline.
+  let tmp = dest[]
+  atomicStoreN(dest, src, ATOMIC_RELEASE)
+  if src != nil:
+    nimIncRefCyclic(src, true)
+  if tmp != nil:
+    yrcDec(tmp, desc)
+
+proc nimSinkYrc(dest: ptr pointer; src: pointer; desc: PNimTypeV2) {.compilerRtl.} =
+  ## YRC write barrier for ref sink (move). No incRef on source.
+  ## Freeing is always done by the cycle collector, never inline.
+  let tmp = dest[]
+  atomicStoreN(dest, src, ATOMIC_RELEASE)
+  if tmp != nil:
+    yrcDec(tmp, desc)
+
+proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
+  when optimizedOrc:
+    if p != nil:
+      let h = head(p)
+      h.rc = h.rc or maybeCycle
+
+# Initialize locks at module load
+initLock(gYrcGlobalLock)
+for i in 0..<NumStripes:
+  when not defined(yrcAtomics):
+    initLock(stripes[i].lockInc)
+  initLock(stripes[i].lockDec)
+
+{.pop.}
--- a/lib/system/yrc_proof.lean
+++ b/lib/system/yrc_proof.lean
@@ -0,0 +1,353 @@
+/-
+  YRC Safety Proof (self-contained, no Mathlib)
+  ==============================================
+  Formal model of YRC's key invariant: the cycle collector never frees
+  an object that any mutator thread can reach.
+
+  ## Model overview
+
+  We model the heap as a set of objects with directed edges (ref fields).
+  Each thread owns a set of *stack roots* — objects reachable from local variables.
+  The write barrier (nimAsgnYrc) does:
+    1. atomic store  dest ← src          (graph is immediately current)
+    2. buffer inc(src)                    (deferred)
+    3. buffer dec(old)                    (deferred)
+
+  The collector (under global lock) does:
+    1. Merge all buffered inc/dec into merged RCs
+    2. Trial deletion (markGray): subtract internal edges from merged RCs
+    3. scan: objects with RC ≥ 0 after trial deletion are rescued (scanBlack)
+    4. Free objects that remain white (closed cycles with zero external refs)
+-/
+
+-- Objects and threads are just natural numbers for simplicity.
+abbrev Obj := Nat
+abbrev Thread := Nat
+
+/-! ### State -/
+
+/-- The state of the heap and collector at a point in time. -/
+structure State where
+  /-- Physical heap edges: `edges x y` means object `x` has a ref field pointing to `y`.
+      Always up-to-date (atomic stores). -/
+  edges : Obj → Obj → Prop
+  /-- Stack roots per thread. `roots t x` means thread `t` has a local variable pointing to `x`. -/
+  roots : Thread → Obj → Prop
+  /-- Pending buffered increments (not yet merged). -/
+  pendingInc : Obj → Nat
+  /-- Pending buffered decrements (not yet merged). -/
+  pendingDec : Obj → Nat
+
+/-! ### Reachability -/
+
+/-- An object is *reachable* if some thread can reach it via stack roots + heap edges. -/
+inductive Reachable (s : State) : Obj → Prop where
+  | root (t : Thread) (x : Obj) : s.roots t x → Reachable s x
+  | step (x y : Obj) : Reachable s x → s.edges x y → Reachable s y
+
+/-- Directed reachability between heap objects (following physical edges only). -/
+inductive HeapReachable (s : State) : Obj → Obj → Prop where
+  | refl (x : Obj) : HeapReachable s x x
+  | step (x y z : Obj) : HeapReachable s x y → s.edges y z → HeapReachable s x z
+
+/-- If a root reaches `r` and `r` heap-reaches `x`, then `x` is Reachable. -/
+theorem heapReachable_of_reachable (s : State) (r x : Obj)
+    (hr : Reachable s r) (hp : HeapReachable s r x) :
+    Reachable s x := by
+  induction hp with
+  | refl => exact hr
+  | step _ _ _ hedge ih => exact Reachable.step _ _ ih hedge
+
+/-! ### What the collector frees -/
+
+/-- An object has an *external reference* if some thread's stack roots point to it. -/
+def hasExternalRef (s : State) (x : Obj) : Prop :=
+  ∃ t, s.roots t x
+
+/-- An object is *externally anchored* if it is heap-reachable from some
+    object that has an external reference. This is what scanBlack computes:
+    it starts from objects with trialRC ≥ 0 (= has external refs) and traces
+    the current physical graph. -/
+def anchored (s : State) (x : Obj) : Prop :=
+  ∃ r, hasExternalRef s r ∧ HeapReachable s r x
+
+/-- The collector frees `x` only if `x` is *not anchored*:
+    no external ref, and not reachable from any externally-referenced object.
+    This models: after trial deletion, x remained white, and scanBlack
+    didn't rescue it. -/
+def collectorFrees (s : State) (x : Obj) : Prop :=
+  ¬ anchored s x
+
+/-! ### Main safety theorem -/
+
+/-- **Lemma**: Every reachable object is anchored.
+    If thread `t` reaches `x`, then there is a chain from a stack root
+    (which has an external ref) through heap edges to `x`. -/
+theorem reachable_is_anchored (s : State) (x : Obj)
+    (h : Reachable s x) : anchored s x := by
+  induction h with
+  | root t x hroot =>
+    exact ⟨x, ⟨t, hroot⟩, HeapReachable.refl x⟩
+  | step a b h_reach_a h_edge ih =>
+    obtain ⟨r, h_ext_r, h_path_r_a⟩ := ih
+    exact ⟨r, h_ext_r, HeapReachable.step r a b h_path_r_a h_edge⟩
+
+/-- **Main Safety Theorem**: If the collector frees `x`, then no thread
+    can reach `x`. Freed objects are unreachable.
+
+    This is the contrapositive of `reachable_is_anchored`. -/
+theorem yrc_safety (s : State) (x : Obj)
+    (h_freed : collectorFrees s x) : ¬ Reachable s x := by
+  intro h_reach
+  exact h_freed (reachable_is_anchored s x h_reach)
+
+/-! ### The write barrier preserves reachability -/
+
+/-- Model of `nimAsgnYrc(dest_field_of_a, src)`:
+    Object `a` had a field pointing to `old`, now points to `src`.
+    Graph update is immediate. The new edge takes priority (handles src = old). -/
+def writeBarrier (s : State) (a old src : Obj) : State :=
+  { s with
+    edges := fun x y =>
+      if x = a ∧ y = src then True
+      else if x = a ∧ y = old then False
+      else s.edges x y
+    pendingInc := fun x => if x = src then s.pendingInc x + 1 else s.pendingInc x
+    pendingDec := fun x => if x = old then s.pendingDec x + 1 else s.pendingDec x }
+
+/-- **No Lost Object Theorem**: If thread `t` holds a stack ref to `a` and
+    executes `a.field = b` (replacing old), then `b` is reachable afterward.
+
+    This is why the "lost object" problem from concurrent GC literature
+    doesn't arise in YRC: the atomic store makes `a→b` visible immediately,
+    and `a` is anchored (thread `t` holds it), so scanBlack traces `a→b`
+    and rescues `b`. -/
+theorem no_lost_object (s : State) (t : Thread) (a old b : Obj)
+    (h_root_a : s.roots t a) :
+    Reachable (writeBarrier s a old b) b := by
+  apply Reachable.step a b
+  · exact Reachable.root t a h_root_a
+  · simp [writeBarrier]
+
+/-! ### Non-atomic write barrier window safety
+
+  The write barrier does three steps non-atomically:
+    1. atomicStore(dest, src)     — graph update
+    2. buffer inc(src)            — deferred
+    3. buffer dec(old)            — deferred
+
+  If the collector runs between steps 1 and 2 (inc not yet buffered):
+  - src has a new incoming heap edge not yet reflected in RCs
+  - But src is reachable from the mutator's stack (mutator held a ref to store it)
+  - So src has an external ref → trialRC ≥ 1 → scanBlack rescues src ✓
+
+  If the collector runs between steps 2 and 3 (dec not yet buffered):
+  - old's RC is inflated by 1 (the dec hasn't arrived)
+  - This is conservative: old appears to have more refs than it does
+  - Trial deletion won't spuriously free it ✓
+-/
+
+/-- Model the state between steps 1-2: graph updated, inc not yet buffered.
+    `src` has new edge but RC doesn't reflect it yet. -/
+def stateAfterStore (s : State) (a old src : Obj) : State :=
+  { s with
+    edges := fun x y =>
+      if x = a ∧ y = src then True
+      else if x = a ∧ y = old then False
+      else s.edges x y }
+
+/-- Even in the window between atomic store and buffered inc,
+    src is still reachable (from the mutator's stack via a→src). -/
+theorem src_reachable_in_window (s : State) (t : Thread) (a old src : Obj)
+    (h_root_a : s.roots t a) :
+    Reachable (stateAfterStore s a old src) src := by
+  apply Reachable.step a src
+  · exact Reachable.root t a h_root_a
+  · simp [stateAfterStore]
+
+/-- Therefore src is anchored in the window → collector won't free it. -/
+theorem src_safe_in_window (s : State) (t : Thread) (a old src : Obj)
+    (h_root_a : s.roots t a) :
+    ¬ collectorFrees (stateAfterStore s a old src) src := by
+  intro h_freed
+  exact h_freed (reachable_is_anchored _ _ (src_reachable_in_window s t a old src h_root_a))
+
+/-! ### Deadlock freedom
+
+  YRC uses three classes of locks:
+    • gYrcGlobalLock           (level 0)
+    • stripes[i].lockInc       (level 2*i + 1,  for i in 0..N-1)
+    • stripes[i].lockDec       (level 2*i + 2,  for i in 0..N-1)
+
+  Total order:  global < lockInc[0] < lockDec[0] < lockInc[1] < lockDec[1] < ...
+
+  Every code path in yrc.nim acquires locks in strictly ascending level order:
+
+  **nimIncRefCyclic** (mutator fast path):
+    acquire lockInc[myStripe] → release → done.
+    Holds exactly one lock. ✓
+
+  **nimIncRefCyclic** (overflow path):
+    acquire gYrcGlobalLock (level 0), then for i=0..N-1: acquire lockInc[i] → release.
+    Ascending: 0 < 1 < 3 < 5 < ... ✓
+
+  **nimDecRefIsLastCyclic{Dyn,Static}** (fast path):
+    acquire lockDec[myStripe] → release → done.
+    Holds exactly one lock. ✓
+
+  **nimDecRefIsLastCyclic{Dyn,Static}** (overflow path):
+    calls collectCycles → acquire gYrcGlobalLock (level 0),
+    then mergePendingRoots which for i=0..N-1:
+      acquire lockInc[i] → release, acquire lockDec[i] → release.
+    Ascending: 0 < 1 < 2 < 3 < 4 < ... ✓
+
+  **collectCycles / GC_runOrc** (collector):
+    acquire gYrcGlobalLock (level 0),
+    then mergePendingRoots (same ascending pattern as above). ✓
+
+  **nimAsgnYrc / nimSinkYrc** (write barrier):
+    Calls nimIncRefCyclic then nimDecRefIsLastCyclic*.
+    Each call acquires and releases its lock independently.
+    No nesting between the two calls. ✓
+
+  Since every path follows the total order, deadlock is impossible.
+-/
+
+/-- Lock levels in YRC. Each lock maps to a unique natural number. -/
+inductive LockId (n : Nat) where
+  | global : LockId n
+  | lockInc (i : Nat) (h : i < n) : LockId n
+  | lockDec (i : Nat) (h : i < n) : LockId n
+
+/-- The level (priority) of each lock in the total order. -/
+def lockLevel {n : Nat} : LockId n → Nat
+  | .global => 0
+  | .lockInc i _ => 2 * i + 1
+  | .lockDec i _ => 2 * i + 2
+
+/-- All lock levels are distinct (the level function is injective). -/
+theorem lockLevel_injective {n : Nat} (a b : LockId n)
+    (h : lockLevel a = lockLevel b) : a = b := by
+  cases a with
+  | global =>
+    cases b with
+    | global => rfl
+    | lockInc j hj => simp [lockLevel] at h
+    | lockDec j hj => simp [lockLevel] at h
+  | lockInc i hi =>
+    cases b with
+    | global => simp [lockLevel] at h
+    | lockInc j hj =>
+      have : i = j := by simp [lockLevel] at h; omega
+      subst this; rfl
+    | lockDec j hj => simp [lockLevel] at h; omega
+  | lockDec i hi =>
+    cases b with
+    | global => simp [lockLevel] at h
+    | lockInc j hj => simp [lockLevel] at h; omega
+    | lockDec j hj =>
+      have : i = j := by simp [lockLevel] at h; omega
+      subst this; rfl
+
+/-- Helper: stripe lock levels are strictly ascending across stripes. -/
+theorem stripe_levels_ascending (i : Nat) :
+    2 * i + 1 < 2 * i + 2 ∧ 2 * i + 2 < 2 * (i + 1) + 1 := by
+  constructor <;> omega
+
+/-- lockInc levels are strictly ascending with index. -/
+theorem lockInc_level_strict_mono {n : Nat} (i j : Nat) (hi : i < n) (hj : j < n)
+    (hij : i < j) : lockLevel (.lockInc i hi : LockId n) < lockLevel (.lockInc j hj) := by
+  simp [lockLevel]; omega
+
+/-- lockDec levels are strictly ascending with index. -/
+theorem lockDec_level_strict_mono {n : Nat} (i j : Nat) (hi : i < n) (hj : j < n)
+    (hij : i < j) : lockLevel (.lockDec i hi : LockId n) < lockLevel (.lockDec j hj) := by
+  simp [lockLevel]; omega
+
+/-- Global lock has the lowest level (level 0). -/
+theorem global_level_min {n : Nat} (l : LockId n) (h : l ≠ .global) :
+    lockLevel (.global : LockId n) < lockLevel l := by
+  cases l with
+  | global => exact absurd rfl h
+  | lockInc i hi => simp [lockLevel]
+  | lockDec i hi => simp [lockLevel]
+
+/-- **Deadlock Freedom**: Any sequence of lock acquisitions that follows the
+    "acquire in ascending level order" discipline cannot deadlock.
+
+    This is a standard result: a total order on locks with the invariant that
+    every thread acquires locks in strictly ascending order prevents cycles
+    in the wait-for graph, which is necessary and sufficient for deadlock.
+
+    We prove the 2-thread case (the general N-thread case follows by the
+    same transitivity argument on the wait-for cycle). -/
+theorem no_deadlock_from_total_order {n : Nat}
+    -- Two threads each hold a lock and wait for another
+    (held₁ waited₁ held₂ waited₂ : LockId n)
+    -- Thread 1 holds held₁ and wants waited₁ (ascending order)
+    (h1 : lockLevel held₁ < lockLevel waited₁)
+    -- Thread 2 holds held₂ and wants waited₂ (ascending order)
+    (h2 : lockLevel held₂ < lockLevel waited₂)
+    -- Deadlock requires: thread 1 waits for what thread 2 holds,
+    -- and thread 2 waits for what thread 1 holds
+    (h_wait1 : waited₁ = held₂)
+    (h_wait2 : waited₂ = held₁) :
+    False := by
+  subst h_wait1; subst h_wait2
+  omega
+
+/-! ### Summary of verified properties (all QED, no sorry)
+
+  1. `reachable_is_anchored`: Every reachable object is anchored
+     (has a path from an externally-referenced object via heap edges).
+
+  2. `yrc_safety`: The collector only frees unanchored objects,
+     which are unreachable by all threads. **No use-after-free.**
+
+  3. `no_lost_object`: After `a.field = b`, `b` is reachable
+     (atomic store makes the edge visible immediately).
+
+  4. `src_safe_in_window`: Even between the atomic store and
+     the buffered inc, the collector cannot free src.
+
+  5. `lockLevel_injective`: All lock levels are distinct (well-defined total order).
+
+  6. `global_level_min`: The global lock has the lowest level.
+
+  7. `lockInc_level_strict_mono`, `lockDec_level_strict_mono`:
+     Stripe locks are strictly ordered by index.
+
+  8. `no_deadlock_from_total_order`: A 2-thread deadlock cycle is impossible
+     when both threads acquire locks in ascending level order.
+
+  Together these establish that YRC's write barrier protocol
+  (atomic store → buffer inc → buffer dec) is safe under concurrent
+  collection, and the locking discipline prevents deadlock.
+
+  ## What is NOT proved: Completeness (liveness)
+
+  This proof covers **safety** (no use-after-free) and **deadlock-freedom**,
+  but does NOT prove **completeness** — that all garbage cycles are eventually
+  collected.
+
+  Completeness depends on the trial deletion algorithm (Bacon 2001) correctly
+  identifying closed cycles. Specifically it requires proving:
+
+  1. After `mergePendingRoots`, merged RCs equal logical RCs
+     (buffered inc/dec exactly compensate graph changes since last merge).
+  2. `markGray` subtracts exactly the internal (heap→heap) edge count from
+     each node's merged RC, yielding `trialRC(x) = externalRefCount(x)`.
+  3. `scan` correctly partitions: nodes with `trialRC ≥ 0` are rescued by
+     `scanBlack`; nodes with `trialRC < 0` remain white.
+  4. White nodes form closed subgraphs with zero external refs → garbage.
+
+  These properties follow from the well-known Bacon trial-deletion algorithm
+  and are assumed here rather than re-proved. The YRC-specific contribution
+  (buffered RCs, striped queues, concurrent mutators) is what our safety
+  proof covers — showing that concurrency does not break the preconditions
+  that trial deletion relies on (physical graph consistency, eventual RC
+  consistency after merge).
+
+  Reference: D.F. Bacon and V.T. Rajan, "Concurrent Cycle Collection in
+  Reference Counted Systems", ECOOP 2001.
+-/
--- a/lib/system/yrc_proof.tla
+++ b/lib/system/yrc_proof.tla
@@ -0,0 +1,761 @@
+---- MODULE yrc_proof ----
+\* TLA+ specification of YRC (Thread-safe ORC cycle collector)
+\* Models the fine details of barriers, striped queues, and synchronization
+\*
+\* ## Key Barrier Semantics Modeled
+\*
+\* ### Write Barrier (nimAsgnYrc)
+\*   1. atomicStoreN(dest, src, ATOMIC_RELEASE)
+\*      - Graph update is immediately visible to all threads (including collector)
+\*      - ATOMIC_RELEASE ensures all prior writes are visible before this store
+\*      - No lock required for graph updates (lock-free)
+\*
+\*   2. nimIncRefCyclic(src, true)
+\*      - Acquires per-stripe lockInc[stripe] (fine-grained)
+\*      - Buffers increment in toInc[stripe] queue
+\*      - On overflow: acquires global lock, merges all stripes, applies increment
+\*
+\*   3. yrcDec(tmp, desc)
+\*      - Acquires per-stripe lockDec[stripe] (fine-grained)
+\*      - Buffers decrement in toDec[stripe] queue
+\*      - On overflow: acquires global lock, merges all stripes, applies decrement,
+\*        adds to roots array if not already present
+\*
+\* ### Merge Operation (mergePendingRoots)
+\*   - Acquires global lock (exclusive access)
+\*   - Sequentially acquires each stripe's lockInc and lockDec
+\*   - Drains all buffers, applies RC adjustments
+\*   - Adds decremented objects to roots array
+\*   - After merge: mergedRC = logicalRC (current graph state)
+\*
+\* ### Collection Cycle (under global lock)
+\*   1. mergePendingRoots: reconcile buffered changes
+\*   2. markGray: trial deletion (subtract internal edges)
+\*   3. scan: rescue objects with RC >= 0 (scanBlack follows current graph)
+\*   4. collectColor: free white objects (closed cycles)
+\*
+\* ## Safety Argument
+\*
+\* The collector only frees closed cycles (zero external refs). Concurrent writes
+\* cannot cause "lost objects" because:
+\*   - Graph updates are atomic and immediately visible
+\*   - Mutator must hold stack ref to modify object (external ref)
+\*   - scanBlack follows current physical edges (rescues newly written objects)
+\*   - Only objects unreachable from any stack root are freed
+
+EXTENDS Naturals, Integers, Sequences, FiniteSets, TLC
+
+CONSTANTS NumStripes, QueueSize, RootsThreshold, Objects, Threads, ObjTypes
+ASSUME NumStripes \in Nat /\ NumStripes > 0
+ASSUME QueueSize \in Nat /\ QueueSize > 0
+ASSUME RootsThreshold \in Nat
+ASSUME IsFiniteSet(Objects)
+ASSUME IsFiniteSet(Threads)
+ASSUME IsFiniteSet(ObjTypes)
+
+\* NULL constant (represents "no thread" for locks)
+\* We use a sentinel value that's guaranteed not to be in Threads or Objects
+NULL == "NULL"  \* String literal that won't conflict with Threads/Objects
+ASSUME NULL \notin Threads /\ NULL \notin Objects
+
+\* Helper functions
+\* Note: GetStripeIdx is not used, GetStripe is used instead
+
+\* Color constants
+colBlack == 0
+colGray == 1
+colWhite == 2
+maybeCycle == 4
+inRootsFlag == 8
+colorMask == 3
+
+\* State variables
+VARIABLES
+    \* Physical heap graph (always up-to-date, atomic stores)
+    edges,              \* edges[obj1][obj2] = TRUE if obj1.field points to obj2
+    \* Stack roots per thread
+    roots,              \* roots[thread][obj] = TRUE if thread has local var pointing to obj
+    \* Reference counts (stored in object header)
+    rc,                 \* rc[obj] = reference count (logical, after merge)
+    \* Color markers (stored in object header, bits 0-2)
+    color,              \* color[obj] \in {colBlack, colGray, colWhite}
+    \* Root tracking flags
+    inRoots,            \* inRoots[obj] = TRUE if obj is in roots array
+    \* Striped increment queues
+    toIncLen,           \* toIncLen[stripe] = current length of increment queue
+    toInc,              \* toInc[stripe][i] = object to increment
+    \* Striped decrement queues
+    toDecLen,           \* toDecLen[stripe] = current length of decrement queue
+    toDec,              \* toDec[stripe][i] = (object, type) pair to decrement
+    \* Per-stripe locks
+    lockInc,            \* lockInc[stripe] = thread holding increment lock (or NULL)
+    lockDec,            \* lockDec[stripe] = thread holding decrement lock (or NULL)
+    \* Global lock
+    globalLock,         \* thread holding global lock (or NULL)
+    \* Merged roots array (used during collection)
+    mergedRoots,        \* sequence of (object, type) pairs
+    \* Collection state
+    collecting,         \* TRUE if collection is in progress
+    gcEnv,              \* GC environment: {touched, edges, rcSum, toFree, ...}
+    \* Pending operations (for modeling atomicity)
+    pendingWrites       \* set of pending write barrier operations
+
+\* Type invariants
+TypeOK ==
+    /\ edges \in [Objects -> [Objects -> BOOLEAN]]
+    /\ roots \in [Threads -> [Objects -> BOOLEAN]]
+    /\ rc \in [Objects -> Int]
+    /\ color \in [Objects -> {colBlack, colGray, colWhite}]
+    /\ inRoots \in [Objects -> BOOLEAN]
+    /\ toIncLen \in [0..(NumStripes-1) -> 0..QueueSize]
+    /\ toInc \in [0..(NumStripes-1) -> Seq(Objects)]
+    /\ toDecLen \in [0..(NumStripes-1) -> 0..QueueSize]
+    /\ toDec \in [0..(NumStripes-1) -> Seq([obj: Objects, desc: ObjTypes])]
+    /\ lockInc \in [0..(NumStripes-1) -> Threads \cup {NULL}]
+    /\ lockDec \in [0..(NumStripes-1) -> Threads \cup {NULL}]
+    /\ globalLock \in Threads \cup {NULL}
+    /\ mergedRoots \in Seq([obj: Objects, desc: ObjTypes])
+    /\ collecting \in BOOLEAN
+    /\ pendingWrites \in SUBSET ([thread: Threads, dest: Objects, old: Objects \cup {NULL}, src: Objects \cup {NULL}, phase: {"store", "inc", "dec"}])
+
+\* Helper: internal reference count (heap-to-heap edges)
+InternalRC(obj) ==
+    Cardinality({src \in Objects : edges[src][obj]})
+
+\* Helper: external reference count (stack roots)
+ExternalRC(obj) ==
+    Cardinality({t \in Threads : roots[t][obj]})
+
+\* Helper: logical reference count
+LogicalRC(obj) ==
+    InternalRC(obj) + ExternalRC(obj)
+
+\* Helper: get stripe index for thread
+\* Map threads to stripe indices deterministically
+\* Since threads are ModelValues, we use a simple deterministic mapping:
+\* Assign each thread to stripe 0 (for small models, this is fine)
+\* For larger models, TLC will handle the mapping deterministically
+GetStripe(thread) == 0
+
+\* ============================================================================
+\* Write Barrier: nimAsgnYrc
+\* ============================================================================
+\* The write barrier does:
+\*   1. atomicStoreN(dest, src, ATOMIC_RELEASE)  -- graph update is immediate
+\*   2. nimIncRefCyclic(src, true)                -- buffer inc(src)
+\*   3. yrcDec(tmp, desc)                         -- buffer dec(old)
+\*
+\* Key barrier semantics:
+\* - ATOMIC_RELEASE on store ensures all prior writes are visible before the graph update
+\* - The graph update is immediately visible to all threads (including collector)
+\* - RC adjustments are buffered and only applied during merge
+
+\* ============================================================================
+\* Phase 1: Atomic Store (Topology Update)
+\* ============================================================================
+\* The atomic store always happens first, updating the graph topology.
+\* This is independent of RC operations and never blocks.
+MutatorWriteAtomicStore(thread, destObj, destField, oldVal, newVal, desc) ==
+    \* Atomic store with RELEASE barrier - updates graph topology immediately
+    \* Clear ALL edges from destObj first (atomic store replaces old value completely),
+    \* then set the new edge. This ensures destObj.field can only point to one object.
+    /\ edges' = [edges EXCEPT ![destObj] = [x \in Objects |->
+                                                 IF x = newVal /\ newVal # NULL
+                                                 THEN TRUE
+                                                 ELSE FALSE]]
+    /\ UNCHANGED <<roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Phase 2: RC Buffering (if space available)
+\* ============================================================================
+\* Buffers increment/decrement if there's space. If overflow would happen,
+\* this action is disabled (blocked) until merge can happen.
+WriteBarrier(thread, destObj, destField, oldVal, newVal, desc) ==
+    LET stripe == GetStripe(thread)
+    IN
+    \* Determine if overflow happens for increment or decrement
+    /\ LET
+          incOverflow == (newVal # NULL) /\ (toIncLen[stripe] >= QueueSize)
+          decOverflow == (oldVal # NULL) /\ (toDecLen[stripe] >= QueueSize)
+       IN
+       \* Buffering: only enabled if no overflow (otherwise blocked until merge can happen)
+       /\ ~incOverflow  \* Precondition: increment buffer has space (blocks if full)
+       /\ ~decOverflow  \* Precondition: decrement buffer has space (blocks if full)
+       /\ toIncLen' = IF newVal # NULL /\ toIncLen[stripe] < QueueSize
+                      THEN [toIncLen EXCEPT ![stripe] = toIncLen[stripe] + 1]
+                      ELSE toIncLen
+       /\ toInc' = IF newVal # NULL /\ toIncLen[stripe] < QueueSize
+                   THEN [toInc EXCEPT ![stripe] = Append(toInc[stripe], newVal)]
+                   ELSE toInc
+       /\ toDecLen' = IF oldVal # NULL /\ toDecLen[stripe] < QueueSize
+                      THEN [toDecLen EXCEPT ![stripe] = toDecLen[stripe] + 1]
+                      ELSE toDecLen
+       /\ toDec' = IF oldVal # NULL /\ toDecLen[stripe] < QueueSize
+                   THEN [toDec EXCEPT ![stripe] = Append(toDec[stripe], [obj |-> oldVal, desc |-> desc])]
+                   ELSE toDec
+       /\ UNCHANGED <<edges, roots, rc, color, inRoots, mergedRoots, lockInc, lockDec, globalLock, collecting, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Phase 3: Overflow Handling (separate actions that can block)
+\* ============================================================================
+
+\* Handle increment overflow: merge increment buffers when lock is available
+\* This merges ALL increment buffers (for all stripes), not just the one that overflowed
+MutatorWriteMergeInc(thread) ==
+    LET stripe == GetStripe(thread)
+    IN
+    /\ \E s \in 0..(NumStripes-1): toIncLen[s] >= QueueSize  \* Some stripe has increment overflow
+    /\ globalLock = NULL  \* Lock must be available (blocks if held)
+    /\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0]
+    /\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>]
+    /\ rc' = \* Compute RC from LogicalRC of current graph (increment buffers merged)
+             \* The graph is already updated by atomic store, so we compute from current edges
+             [x \in Objects |->
+              LET internalRC == Cardinality({src \in Objects : edges[src][x]})
+                  externalRC == Cardinality({t \in Threads : roots[t][x]})
+              IN internalRC + externalRC]
+    /\ globalLock' = NULL  \* Release lock after merge
+    /\ UNCHANGED <<edges, roots, color, inRoots, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
+
+\* Handle decrement overflow: merge ALL buffers when lock is available
+\* This calls collectCycles() which merges both increment and decrement buffers
+\* We inline MergePendingRoots here. The entire withLock block is atomic:
+\* lock is acquired, merge happens, lock is released.
+MutatorWriteMergeDec(thread) ==
+    LET stripe == GetStripe(thread)
+    IN
+    /\ \E s \in 0..(NumStripes-1): toDecLen[s] >= QueueSize  \* Some stripe has decrement overflow
+    /\ globalLock = NULL  \* Lock must be available (blocks if held)
+    /\ \* Merge all buffers (inlined MergePendingRoots logic)
+       LET \* Compute new RC by merging all buffered increments and decrements
+           \* For each object, count buffered increments and decrements
+           bufferedInc == UNION {{toInc[s][i] : i \in 1..toIncLen[s]} : s \in 0..(NumStripes-1)}
+           bufferedDec == UNION {{toDec[s][i].obj : i \in 1..toDecLen[s]} : s \in 0..(NumStripes-1)}
+           \* Compute RC: current graph state (edges) + roots - buffered decrements + buffered increments
+           \* Actually, we compute from LogicalRC of current graph (buffers are merged)
+           newRC == [x \in Objects |->
+                      LET internalRC == Cardinality({src \in Objects : edges[src][x]})
+                          externalRC == Cardinality({t \in Threads : roots[t][x]})
+                      IN internalRC + externalRC]
+           \* Collect objects from decrement buffers for mergedRoots
+           newRootsSet == UNION {{toDec[s][i].obj : i \in 1..toDecLen[s]} : s \in 0..(NumStripes-1)}
+           newRootsSeq == IF newRootsSet = {}
+                          THEN <<>>
+                          ELSE LET ordered == CHOOSE f \in [1..Cardinality(newRootsSet) -> newRootsSet] :
+                                     \A i, j \in DOMAIN f : i # j => f[i] # f[j]
+                               IN [i \in 1..Cardinality(newRootsSet) |-> ordered[i]]
+       IN
+       /\ rc' = newRC
+       /\ mergedRoots' = mergedRoots \o newRootsSeq
+       /\ inRoots' = [x \in Objects |->
+                      IF newRootsSet = {}
+                      THEN inRoots[x]
+                      ELSE LET rootObjs == UNION {{mergedRoots'[i].obj : i \in DOMAIN mergedRoots'}}
+                           IN IF x \in rootObjs THEN TRUE ELSE inRoots[x]]
+       /\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0]
+       /\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>]
+       /\ toDecLen' = [s \in 0..(NumStripes-1) |-> 0]
+       /\ toDec' = [s \in 0..(NumStripes-1) |-> <<>>]
+    /\ globalLock' = NULL  \* Lock acquired, merge done, lock released (entire withLock block is atomic)
+    /\ UNCHANGED <<edges, roots, color, lockInc, lockDec, collecting, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Merge Operation: mergePendingRoots
+\* ============================================================================
+\* Drains all stripe buffers under global lock.
+\* Sequentially acquires each stripe's lockInc and lockDec to drain buffers.
+\* This reconciles buffered RC adjustments with the current graph state.
+\*
+\* Key invariant: After merge, mergedRC = logicalRC (current graph + buffered changes)
+
+MergePendingRoots ==
+    /\ globalLock # NULL
+    /\ LET
+          \* Count pending increments per object (across all stripes)
+          pendingInc == [x \in Objects |->
+              Cardinality(UNION {{i \in DOMAIN toInc[s] : toInc[s][i] = x} :
+                                  s \in 0..(NumStripes-1)})]
+          \* Count pending decrements per object (across all stripes)
+          pendingDec == [x \in Objects |->
+              Cardinality(UNION {{i \in DOMAIN toDec[s] : toDec[s][i].obj = x} :
+                                  s \in 0..(NumStripes-1)})]
+          \* After merge, RC should equal LogicalRC (current graph state)
+          \* The buffered changes compensate for graph changes that already happened,
+          \* so: mergedRC = currentRC + pendingInc - pendingDec = LogicalRC(current graph)
+          \* But to ensure correctness, we compute directly from the current graph:
+          newRC == [x \in Objects |->
+              LogicalRC(x)]  \* RC after merge equals logical RC of current graph
+          \* Add decremented objects to roots if not already there (check inRootsFlag)
+          \* Collect all new roots as a set, then convert to sequence
+          \* Build set by iterating over all (stripe, index) pairs
+          \* Use UNION with explicit per-stripe sets (avoiding function enumeration issues)
+          newRootsSet == UNION {UNION {IF inRoots[toDec[s][i].obj] = FALSE
+                                      THEN {[obj |-> toDec[s][i].obj, desc |-> toDec[s][i].desc]}
+                                      ELSE {} : i \in DOMAIN toDec[s]} : s \in 0..(NumStripes-1)}
+          newRootsSeq == IF newRootsSet = {}
+                         THEN <<>>
+                         ELSE LET ordered == CHOOSE f \in [1..Cardinality(newRootsSet) -> newRootsSet] :
+                                    \A i, j \in DOMAIN f : i # j => f[i] # f[j]
+                              IN [i \in 1..Cardinality(newRootsSet) |-> ordered[i]]
+       IN
+       /\ rc' = newRC
+       /\ mergedRoots' = mergedRoots \o newRootsSeq  \* Append new roots to sequence
+       /\ \* Update inRoots: mark objects in mergedRoots' as being in roots
+          \* Use explicit iteration to avoid enumeration issues
+          inRoots' = [x \in Objects |->
+                      IF mergedRoots' = <<>>
+                      THEN inRoots[x]
+                      ELSE LET rootObjs == UNION {{mergedRoots'[i].obj : i \in DOMAIN mergedRoots'}}
+                           IN IF x \in rootObjs THEN TRUE ELSE inRoots[x]]
+       /\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0]
+       /\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>]
+       /\ toDecLen' = [s \in 0..(NumStripes-1) |-> 0]
+       /\ toDec' = [s \in 0..(NumStripes-1) |-> <<>>]
+    /\ UNCHANGED <<edges, roots, color, lockInc, lockDec, globalLock, collecting, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Trial Deletion: markGray
+\* ============================================================================
+\* Subtracts internal (heap-to-heap) edges from reference counts.
+\* This isolates external references (stack roots).
+\*
+\* Algorithm:
+\*   1. Mark obj gray
+\*   2. Trace obj's fields (via traceImpl)
+\*   3. For each child c: decrement c.rc (subtract internal edge)
+\*   4. Recursively markGray all children
+\*
+\* After markGray: trialRC(obj) = mergedRC(obj) - internalRefCount(obj)
+\*                 = externalRefCount(obj) (if merge was correct)
+
+MarkGray(obj, desc) ==
+    /\ globalLock # NULL
+    /\ collecting = TRUE
+    /\ color[obj] # colGray
+    /\ \* Compute transitive closure of all objects reachable from obj
+       \* This models the recursive traversal in the actual implementation
+       LET children == {c \in Objects : edges[obj][c]}
+          \* Compute all objects reachable from obj via heap edges
+          \* This is the transitive closure starting from obj's direct children
+          allReachable == {c \in Objects :
+              \E path \in Seq(Objects):
+                 Len(path) > 0 /\
+                 path[1] \in children /\
+                 path[Len(path)] = c /\
+                 \A i \in 1..(Len(path)-1):
+                    edges[path[i]][path[i+1]]}
+          \* All objects to mark gray: obj itself + all reachable descendants
+          objectsToMarkGray == {obj} \cup allReachable
+          \* For each reachable object, count internal edges pointing to it
+          \* from within the subgraph (obj + allReachable)
+          \* This is the number of times its RC should be decremented
+          subgraph == {obj} \cup allReachable
+          internalEdgeCount == [x \in Objects |->
+              IF x \in allReachable
+              THEN Cardinality({y \in subgraph : edges[y][x]})
+              ELSE 0]
+       IN
+       /\ \* Mark obj and all reachable objects gray
+          color' = [x \in Objects |->
+              IF x \in objectsToMarkGray THEN colGray ELSE color[x]]
+       /\ \* Subtract internal edges: for each reachable object, decrement its RC
+          \* by the number of internal edges pointing to it from within the subgraph.
+          \* This matches the Nim implementation which decrements once per edge traversed.
+          \* Note: obj's RC is not decremented here (it has no parent in this subgraph).
+          \* For roots, the RC includes external refs which survive trial deletion.
+          rc' = [x \in Objects |->
+              IF x \in allReachable THEN rc[x] - internalEdgeCount[x] ELSE rc[x]]
+    /\ UNCHANGED <<edges, roots, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Scan Phase
+\* ============================================================================
+\* Objects with RC >= 0 after trial deletion are rescued (scanBlack).
+\* Objects with RC < 0 remain white (part of closed cycle).
+\*
+\* Key insight: scanBlack follows the *current* physical edges (which may have
+\* changed since merge due to concurrent writes). This ensures objects written
+\* during collection are still rescued.
+\*
+\* Algorithm:
+\*   IF rc[obj] >= 0:
+\*     scanBlack(obj): mark black, restore RC, trace and rescue all children
+\*   ELSE:
+\*     mark white (closed cycle with zero external refs)
+
+Scan(obj, desc) ==
+    /\ globalLock # NULL
+    /\ collecting = TRUE
+    /\ color[obj] = colGray
+    /\ IF rc[obj] >= 0
+       THEN \* scanBlack: rescue obj and all reachable objects
+            \* This follows the current physical graph (atomic stores are visible)
+            \* Restore RC for all reachable objects by incrementing by the number of
+            \* internal edges pointing to each (matching what markGray subtracted)
+            LET children == {c \in Objects : edges[obj][c]}
+               allReachable == {c \in Objects :
+                   \E path \in Seq(Objects):
+                      Len(path) > 0 /\
+                      path[1] \in children /\
+                      path[Len(path)] = c /\
+                      \A i \in 1..(Len(path)-1):
+                         edges[path[i]][path[i+1]]}
+               objectsToMarkBlack == {obj} \cup allReachable
+               \* For each reachable object, count internal edges pointing to it
+               \* from within the subgraph (obj + allReachable)
+               \* This is the number of times its RC should be incremented (restored)
+               subgraph == {obj} \cup allReachable
+               internalEdgeCount == [x \in Objects |->
+                   IF x \in allReachable
+                   THEN Cardinality({y \in subgraph : edges[y][x]})
+                   ELSE 0]
+            IN
+            /\ \* Restore RC: increment by the number of internal edges pointing to each
+               \* reachable object. This restores what markGray subtracted.
+               \* Note: obj's RC is not incremented here (it wasn't decremented in markGray).
+               \* The root's RC already reflects external refs which survived trial deletion.
+               rc' = [x \in Objects |->
+                   IF x \in allReachable THEN rc[x] + internalEdgeCount[x] ELSE rc[x]]
+            /\ \* Mark obj and all reachable objects black in one assignment
+               color' = [x \in Objects |->
+                   IF x \in objectsToMarkBlack THEN colBlack ELSE color[x]]
+       ELSE \* Mark white (part of closed cycle)
+            /\ color' = [color EXCEPT ![obj] = colWhite]
+            /\ UNCHANGED <<rc>>
+    /\ UNCHANGED <<edges, roots, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Collection Phase: collectColor
+\* ============================================================================
+\* Frees objects of the target color that are not in roots.
+\*
+\* Safety: Only objects with color = targetColor AND ~inRoots[obj] are freed.
+\* These are closed cycles (zero external refs, not reachable from roots).
+
+CollectColor(obj, desc, targetColor) ==
+    /\ globalLock # NULL
+    /\ collecting = TRUE
+    /\ color[obj] = targetColor
+    /\ ~inRoots[obj]
+    /\ \* Free obj: nullify all its outgoing edges (prevents use-after-free)
+       \* In the actual implementation, this happens during trace() when freeing
+       edges' = [edges EXCEPT ![obj] = [x \in Objects |->
+           IF x = obj THEN FALSE ELSE edges[obj][x]]]
+    /\ color' = [color EXCEPT ![obj] = colBlack]  \* Mark as freed
+    /\ UNCHANGED <<roots, rc, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Collection Cycle: collectCyclesBacon
+\* ============================================================================
+
+StartCollection ==
+    /\ globalLock # NULL
+    /\ ~collecting
+    /\ Len(mergedRoots) >= RootsThreshold
+    /\ collecting' = TRUE
+    /\ gcEnv' = [touched |-> 0, edges |-> 0, rcSum |-> 0, toFree |-> {}]
+    /\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, pendingWrites>>
+
+EndCollection ==
+    /\ globalLock # NULL
+    /\ collecting = TRUE
+    /\ \* Clear root flags
+       inRoots' = [x \in Objects |->
+           IF x \in {r.obj : r \in mergedRoots} THEN FALSE ELSE inRoots[x]]
+    /\ mergedRoots' = <<>>
+    /\ collecting' = FALSE
+    /\ UNCHANGED <<edges, roots, rc, color, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Mutator Actions
+\* ============================================================================
+
+\* Mutator can write at any time (graph updates are lock-free)
+\* The ATOMIC_RELEASE barrier ensures proper ordering
+\* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always
+\* matches the current graph state (as read before the atomic store).
+\* This prevents races at the user level - the GC itself is lock-free.
+MutatorWrite(thread, destObj, destField, oldVal, newVal, desc) ==
+    \* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always matches
+    \* the value read before the atomic store. This prevents races at the user level.
+    \* The precondition is enforced in the Next relation.
+    \* Phase 1: Atomic store (topology update) - ALWAYS happens first
+    /\ MutatorWriteAtomicStore(thread, destObj, destField, oldVal, newVal, desc)
+    \* Phase 2: RC buffering - happens if no overflow, otherwise overflow is handled separately
+    \* Note: In reality, if overflow happens, the thread blocks waiting for lock.
+    \* We model this as: atomic store happens, buffering is deferred (handled by merge actions).
+    /\ LET stripe == GetStripe(thread)
+          incOverflow == (newVal # NULL) /\ (toIncLen[stripe] >= QueueSize)
+          decOverflow == (oldVal # NULL) /\ (toDecLen[stripe] >= QueueSize)
+       IN
+       IF incOverflow \/ decOverflow
+       THEN \* Overflow: atomic store happened, but buffering is deferred
+            \* Buffers stay full, merge will happen when lock is available (via MutatorWriteMergeInc/Dec)
+            /\ UNCHANGED <<roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
+       ELSE \* No overflow: buffer normally
+            /\ WriteBarrier(thread, destObj, destField, oldVal, newVal, desc)
+            /\ UNCHANGED <<roots, collecting, pendingWrites>>
+
+\* Stack root assignment: immediate RC increment (not buffered)
+\* When assigning val to a root variable named obj, we set roots[thread][val] = TRUE
+\* to indicate that thread has a stack reference to val
+\* Semantics: obj is root variable name, val is the object being assigned
+\* When val=NULL, obj was the old root value, so we decrement rc[obj]
+MutatorRootAssign(thread, obj, val) ==
+    /\ IF val # NULL
+       THEN /\ roots' = [roots EXCEPT ![thread][val] = TRUE]
+            /\ rc' = [rc EXCEPT ![val] = IF roots[thread][val] THEN @ ELSE @ + 1]  \* Increment only if not already a root
+       ELSE /\ roots' = [roots EXCEPT ![thread][obj] = FALSE]  \* Clear root when assigning NULL
+            /\ rc' = [rc EXCEPT ![obj] = IF roots[thread][obj] THEN @ - 1 ELSE @]  \* Decrement old root value
+    /\ edges' = edges
+    /\ color' = color
+    /\ inRoots' = inRoots
+    /\ toIncLen' = toIncLen
+    /\ toInc' = toInc
+    /\ toDecLen' = toDecLen
+    /\ toDec' = toDec
+    /\ lockInc' = lockInc
+    /\ lockDec' = lockDec
+    /\ globalLock' = globalLock
+    /\ mergedRoots' = mergedRoots
+    /\ collecting' = collecting
+    /\ gcEnv' = gcEnv
+    /\ pendingWrites' = pendingWrites
+
+\* ============================================================================
+\* Collector Actions
+\* ============================================================================
+
+\* Collector acquires global lock for entire collection cycle
+CollectorAcquireLock(thread) ==
+    /\ globalLock = NULL
+    /\ globalLock' = thread
+    /\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
+
+CollectorMerge ==
+    /\ globalLock # NULL
+    /\ MergePendingRoots
+
+CollectorStart ==
+    /\ globalLock # NULL
+    /\ StartCollection
+
+\* Mark all roots gray (trial deletion phase)
+CollectorMarkGray ==
+    /\ globalLock # NULL
+    /\ collecting = TRUE
+    /\ \E rootIdx \in DOMAIN mergedRoots:
+        LET root == mergedRoots[rootIdx]
+        IN MarkGray(root.obj, root.desc)
+
+\* Scan all roots (rescue phase)
+CollectorScan ==
+    /\ globalLock # NULL
+    /\ collecting = TRUE
+    /\ \E rootIdx \in DOMAIN mergedRoots:
+        LET root == mergedRoots[rootIdx]
+        IN Scan(root.obj, root.desc)
+
+\* Collect white/gray objects (free phase)
+CollectorCollect ==
+    /\ globalLock # NULL
+    /\ collecting = TRUE
+    /\ \E rootIdx \in DOMAIN mergedRoots, targetColor \in {colGray, colWhite}:
+        LET root == mergedRoots[rootIdx]
+        IN CollectColor(root.obj, root.desc, targetColor)
+
+CollectorEnd ==
+    /\ globalLock # NULL
+    /\ EndCollection
+
+CollectorReleaseLock(thread) ==
+    /\ globalLock = thread
+    /\ globalLock' = NULL
+    /\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
+
+\* ============================================================================
+\* Next State Relation
+\* ============================================================================
+
+Next ==
+    \/ \E thread \in Threads:
+         \E destObj \in Objects, oldVal, newVal \in Objects \cup {NULL}, desc \in ObjTypes:
+             \* Precondition: oldVal must match current graph state (user-level synchronization)
+             \* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always matches
+             \* the value read before the atomic store. This prevents races at the user level.
+             /\ LET oldValMatches == CASE oldVal = NULL -> TRUE
+                                        [] oldVal \in Objects -> edges[destObj][oldVal]
+                                        [] OTHER -> FALSE
+                IN oldValMatches
+             /\ MutatorWrite(thread, destObj, "field", oldVal, newVal, desc)
+    \/ \E thread \in Threads:
+         \* Handle increment overflow: merge increment buffers when lock becomes available
+         MutatorWriteMergeInc(thread)
+    \/ \E thread \in Threads:
+         \* Handle decrement overflow: merge all buffers when lock becomes available
+         MutatorWriteMergeDec(thread)
+    \/ \E thread \in Threads:
+         \E obj, val \in Objects \cup {NULL}:
+             MutatorRootAssign(thread, obj, val)
+    \/ \E thread \in Threads:
+         CollectorAcquireLock(thread)
+    \/ CollectorMerge
+    \/ CollectorStart
+    \/ CollectorMarkGray
+    \/ CollectorScan
+    \/ CollectorCollect
+    \/ CollectorEnd
+    \/ \E thread \in Threads:
+         CollectorReleaseLock(thread)
+
+\* ============================================================================
+\* Initial State
+\* ============================================================================
+
+Init ==
+    /\ edges = [x \in Objects |->
+                [y \in Objects |->
+                    IF x = y THEN FALSE ELSE FALSE]]  \* Empty graph initially
+    /\ roots = [t \in Threads |->
+                [x \in Objects |->
+                    FALSE]]  \* No stack roots initially
+    /\ rc = [x \in Objects |->
+             0]  \* Zero reference counts
+    /\ color = [x \in Objects |->
+                colBlack]  \* All objects black initially
+    /\ inRoots = [x \in Objects |->
+                  FALSE]  \* No objects in roots array
+    /\ toIncLen = [s \in 0..(NumStripes-1) |->
+                   0]
+    /\ toInc = [s \in 0..(NumStripes-1) |->
+                <<>>]
+    /\ toDecLen = [s \in 0..(NumStripes-1) |->
+                   0]
+    /\ toDec = [s \in 0..(NumStripes-1) |->
+                <<>>]
+    /\ lockInc = [s \in 0..(NumStripes-1) |->
+                  NULL]
+    /\ lockDec = [s \in 0..(NumStripes-1) |->
+                  NULL]
+    /\ globalLock = NULL
+    /\ mergedRoots = <<>>
+    /\ collecting = FALSE
+    /\ gcEnv = [touched |-> 0, edges |-> 0, rcSum |-> 0, toFree |-> {}]
+    /\ pendingWrites = {}
+    /\ TypeOK
+
+\* ============================================================================
+\* Safety Properties
+\* ============================================================================
+
+\* Safety: Objects are only freed if they are unreachable from any thread's stack
+\*
+\* An object is reachable if:
+\*   - It is a direct stack root (roots[t][obj] = TRUE), OR
+\*   - There exists a path from a stack root to obj via heap edges
+\*
+\* Safety guarantee: If an object is reachable, then:
+\*   - It is not white (not marked for collection), OR
+\*   - It is in roots array (protected from collection), OR
+\*   - It is reachable from an object that will be rescued by scanBlack
+\*
+\* More precisely: Only closed cycles (zero external refs, unreachable) are freed.
+
+\* Helper: Compute next set of reachable objects (one step of transitive closure)
+ReachableStep(current) ==
+    current \cup UNION {{y \in Objects : edges[x][y]} : x \in current}
+
+\* Compute the set of all reachable objects using bounded iteration
+\* Since Objects is finite, we iterate at most Cardinality(Objects) times
+\* This computes the transitive closure of edges starting from stack roots
+\* We unroll the iteration explicitly to avoid recursion issues with TLC
+ReachableSet ==
+    LET StackRoots == {x \in Objects : \E t \in Threads : roots[t][x]}
+        Step1 == ReachableStep(StackRoots)
+        Step2 == ReachableStep(Step1)
+        Step3 == ReachableStep(Step2)
+        Step4 == ReachableStep(Step3)
+        \* Add more steps if needed for larger object sets
+        \* For small models (2 objects), 4 steps is sufficient
+    IN Step4
+
+\* Check if an object is reachable
+Reachable(obj) == obj \in ReachableSet
+
+\* Helper: Check if there's a path from 'from' to 'to'
+\* For small object sets, we check all possible paths by checking
+\* all combinations of intermediate objects
+\* Path of length 0: from = to
+\* Path of length 1: edges[from][to]
+\* Path of length 2: \E i1: edges[from][i1] /\ edges[i1][to]
+\* Path of length 3: \E i1, i2: edges[from][i1] /\ edges[i1][i2] /\ edges[i2][to]
+\* etc. up to Cardinality(Objects)
+HasPath(from, to) ==
+    \/ from = to
+    \/ edges[from][to]
+    \/ \E i1 \in Objects:
+         edges[from][i1] /\ (edges[i1][to] \/ \E i2 \in Objects:
+             edges[i1][i2] /\ (edges[i2][to] \/ \E i3 \in Objects:
+                 edges[i2][i3] /\ edges[i3][to]))
+
+\* Helper: Compute set of objects reachable from a given starting object
+\* Uses the same iterative approach as ReachableSet
+ReachableFrom(start) ==
+    LET Step1 == ReachableStep({start})
+        Step2 == ReachableStep(Step1)
+        Step3 == ReachableStep(Step2)
+        Step4 == ReachableStep(Step3)
+    IN Step4
+
+\* Safety: Reachable objects are never freed (remain white without being collected)
+\* A reachable object is safe if:
+\*   - It's not white (not marked for collection), OR
+\*   - It's in roots array (protected from collection), OR
+\*   - There exists a black object in ReachableSet such that obj is reachable from it
+\*     (the black object will be rescued by scanBlack, which rescues all white objects
+\*      reachable from black objects)
+Safety ==
+    \A obj \in Objects:
+        IF obj \in ReachableSet
+        THEN \/ color[obj] # colWhite  \* Not marked for collection
+             \/ inRoots[obj]  \* Protected in roots array
+             \/ \E blackObj \in ReachableSet:
+                  /\ color[blackObj] = colBlack  \* Black object will be rescued by scanBlack
+                  /\ obj \in ReachableFrom(blackObj)  \* obj is reachable from blackObj
+        ELSE TRUE  \* Unreachable objects may be freed (this is safe)
+
+\* Invariant: Reference counts match logical counts after merge
+\* (This is maintained by MergePendingRoots)
+\* Note: Between merge and collection, RC = logicalRC.
+\* During collection (after markGray), RC may be modified by trial deletion.
+\* RC may be inconsistent when:
+\*   - globalLock = NULL (buffered changes pending)
+\*   - globalLock # NULL but merge hasn't happened yet (buffers still have pending changes)
+\* RC must equal LogicalRC when:
+\*   - After merge (buffers are empty) and before collection starts
+RCInvariant ==
+    IF globalLock = NULL
+    THEN TRUE  \* Not in collection, RC may be inconsistent (buffered changes pending)
+    ELSE IF collecting = FALSE /\ \A s \in 0..(NumStripes-1): toIncLen[s] = 0 /\ toDecLen[s] = 0
+         THEN \A obj \in Objects: rc[obj] = LogicalRC(obj)  \* After merge, buffers empty, RC = logical RC
+         ELSE TRUE  \* During collection or before merge, RC may differ from logicalRC
+
+\* Invariant: Only closed cycles are collected
+\* (Objects with external refs are rescued by scanBlack)
+CycleInvariant ==
+    \A obj \in Objects:
+        IF color[obj] = colWhite /\ ~inRoots[obj]
+        THEN ExternalRC(obj) = 0
+        ELSE TRUE
+
+\* ============================================================================
+\* Specification
+\* ============================================================================
+
+Spec == Init /\ [][Next]_<<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
+
+THEOREM Spec => []Safety
+THEOREM Spec => []RCInvariant
+THEOREM Spec => []CycleInvariant
+
+====
--- a/tests/yrc/tyrc_cas_race.nim
+++ b/tests/yrc/tyrc_cas_race.nim
@@ -0,0 +1,98 @@
+discard """
+  cmd: "nim c --mm:yrc -d:useMalloc --threads:on $file"
+  output: "ok"
+  valgrind: "leaks"
+  disabled: "windows"
+  disabled: "freebsd"
+  disabled: "openbsd"
+"""
+
+# Test concurrent traversal and mutation of a shared cyclic list under YRC.
+# Multiple threads race to replace nodes using a lock for synchronization.
+# This exercises YRC's write barrier and cycle collection under contention.
+
+import std/locks
+
+type
+  Node = ref object
+    value: int
+    next: Node
+
+proc newCycle(start, count: int): Node =
+  result = Node(value: start)
+  var cur = result
+  for i in 1..<count:
+    cur.next = Node(value: start + i)
+    cur = cur.next
+  cur.next = result  # close the cycle
+
+proc sumCycle(head: Node; count: int): int =
+  var cur = head
+  for i in 0..<count:
+    result += cur.value
+    cur = cur.next
+
+const
+  NumThreads = 4
+  CycleLen = 6
+  Iterations = 50
+
+var
+  shared: Node
+  sharedLock: Lock
+  threads: array[NumThreads, Thread[int]]
+  wins: array[NumThreads, int]
+
+proc worker(id: int) {.thread.} =
+  {.cast(gcsafe).}:
+    for iter in 0..<Iterations:
+      # Under the lock, walk the shared list and replace a node's next pointer
+      withLock sharedLock:
+        var cur = shared
+        if cur == nil: continue
+        for step in 0..<CycleLen:
+          let nxt = cur.next
+          if nxt == nil: break
+          # Replace cur.next with a fresh node that points to nxt.next
+          let replacement = Node(value: id * 1000 + iter, next: nxt.next)
+          cur.next = replacement
+          wins[id] += 1
+          cur = cur.next
+          if cur == nil: break
+
+      # Outside the lock, create a local cycle to exercise the collector
+      let local = newCycle(id * 100 + iter, 3)
+      discard sumCycle(local, 3)
+
+# Create initial shared cyclic list: 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 0
+initLock(sharedLock)
+shared = newCycle(0, CycleLen)
+
+for i in 0..<NumThreads:
+  createThread(threads[i], worker, i)
+
+for i in 0..<NumThreads:
+  joinThread(threads[i])
+
+# Verify: the list is still traversable (no crashes, no dangling pointers).
+var totalWins = 0
+for i in 0..<NumThreads:
+  totalWins += wins[i]
+
+# Walk the list to verify it's still a valid cycle (or chain)
+var cur = shared
+var seen = 0
+var maxSteps = CycleLen * 3  # generous bound
+while cur != nil and seen < maxSteps:
+  seen += 1
+  cur = cur.next
+  if cur == shared: break  # completed the cycle
+
+shared = nil
+GC_fullCollect()
+deinitLock(sharedLock)
+
+if totalWins > 0 and seen > 0:
+  echo "ok"
+else:
+  echo "FAIL: wins=", totalWins, " seen=", seen
--- a/tests/yrc/tyrc_shared_cycle.nim
+++ b/tests/yrc/tyrc_shared_cycle.nim
@@ -0,0 +1,74 @@
+discard """
+  cmd: "nim c --mm:yrc -d:useMalloc --threads:on $file"
+  output: "ok"
+  valgrind: "leaks"
+  disabled: "windows"
+  disabled: "freebsd"
+  disabled: "openbsd"
+"""
+
+# Test sharing a cyclic list between threads under YRC.
+
+type
+  Node = ref object
+    value: int
+    next: Node
+
+proc newCycle(start, count: int): Node =
+  ## Create a cyclic linked list: start -> start+1 -> ... -> start+count-1 -> start
+  result = Node(value: start)
+  var cur = result
+  for i in 1..<count:
+    cur.next = Node(value: start + i)
+    cur = cur.next
+  cur.next = result  # close the cycle
+
+proc sumCycle(head: Node; count: int): int =
+  var cur = head
+  for i in 0..<count:
+    result += cur.value
+    cur = cur.next
+
+const
+  NumThreads = 4
+  NodesPerCycle = 5
+
+var
+  shared: Node
+  threads: array[NumThreads, Thread[int]]
+  results: array[NumThreads, int]
+
+proc worker(id: int) {.thread.} =
+  # Each thread reads the shared cycle and computes a sum.
+  # Also creates its own local cycle to exercise the collector.
+  {.cast(gcsafe).}:
+    let local = newCycle(id * 100, NodesPerCycle)
+    let localSum = sumCycle(local, NodesPerCycle)
+
+    let sharedSum = sumCycle(shared, NodesPerCycle)
+    results[id] = sharedSum + localSum
+
+# Create a shared cyclic list: 0 -> 1 -> 2 -> 3 -> 4 -> 0
+shared = newCycle(0, NodesPerCycle)
+let expectedSharedSum = 0 + 1 + 2 + 3 + 4  # = 10
+
+for i in 0..<NumThreads:
+  createThread(threads[i], worker, i)
+
+for i in 0..<NumThreads:
+  joinThread(threads[i])
+
+var allOk = true
+for i in 0..<NumThreads:
+  let expectedLocal = i * 100 * NodesPerCycle + (NodesPerCycle * (NodesPerCycle - 1) div 2)
+    # sum of id*100, id*100+1, ..., id*100+4
+  let expected = expectedSharedSum + expectedLocal
+  if results[i] != expected:
+    echo "FAIL thread ", i, ": got ", results[i], " expected ", expected
+    allOk = false
+
+shared = nil  # drop the shared cycle, collector should reclaim it
+GC_fullCollect()
+
+if allOk:
+  echo "ok"