YRC: threadsafe cycle collection for Nim (#25495)

First performance numbers:

time tests/arc/torcbench   -- YRC
true peak memory: true

real    0m0,163s
user    0m0,161s
sys     0m0,002s


time tests/arc/torcbench   -- ORC
true peak memory: true

real    0m0,107s
user    0m0,104s
sys     0m0,003s


So it's 1.6x slower. But it's threadsafe and provably correct. (Lean and
model checking via TLA+ used.)

Of course there is always the chance that the implementation is wrong
and doesn't match the model.
This commit is contained in:
Andreas Rumpf
2026-02-10 00:04:11 +01:00
committed by GitHub
parent 9225d9e9e6
commit a690a9ac90
17 changed files with 1934 additions and 41 deletions

View File

@@ -16,7 +16,7 @@
## implementation.
template detectVersion(field, corename) =
if m.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc, gcHooks}:
if m.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc, gcHooks}:
result = 2
else:
result = 1

View File

@@ -277,7 +277,7 @@ proc isInvalidReturnType(conf: ConfigRef; typ: PType, isProc = true): bool =
of ctStruct:
let t = skipTypes(rettype, typedescInst)
if rettype.isImportedCppType or t.isImportedCppType or
(typ.callConv == ccCDecl and conf.selectedGC in {gcArc, gcAtomicArc, gcOrc}):
(typ.callConv == ccCDecl and conf.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}):
# prevents nrvo for cdecl procs; # bug #23401
result = false
else:
@@ -1692,7 +1692,7 @@ proc genHook(m: BModule; t: PType; info: TLineInfo; op: TTypeAttachedOp; result:
echo "ayclic but has this =trace ", t, " ", theProc.ast
else:
when false:
if op == attachedTrace and m.config.selectedGC == gcOrc and
if op == attachedTrace and m.config.selectedGC in {gcOrc, gcYrc} and
containsGarbageCollectedRef(t):
# unfortunately this check is wrong for an object type that only contains
# .cursor fields like 'Node' inside 'cycleleak'.

View File

@@ -245,7 +245,7 @@ proc processCompile(conf: ConfigRef; filename: string) =
extccomp.addExternalFileToCompile(conf, found)
const
errNoneBoehmRefcExpectedButXFound = "'arc', 'orc', 'atomicArc', 'markAndSweep', 'boehm', 'go', 'none', 'regions', or 'refc' expected, but '$1' found"
errNoneBoehmRefcExpectedButXFound = "'arc', 'orc', 'yrc', 'atomicArc', 'markAndSweep', 'boehm', 'go', 'none', 'regions', or 'refc' expected, but '$1' found"
errNoneSpeedOrSizeExpectedButXFound = "'none', 'speed' or 'size' expected, but '$1' found"
errGuiConsoleOrLibExpectedButXFound = "'gui', 'console', 'lib' or 'staticlib' expected, but '$1' found"
errInvalidExceptionSystem = "'goto', 'setjmp', 'cpp' or 'quirky' expected, but '$1' found"
@@ -266,6 +266,7 @@ proc testCompileOptionArg*(conf: ConfigRef; switch, arg: string, info: TLineInfo
of "markandsweep": result = conf.selectedGC == gcMarkAndSweep
of "destructors", "arc": result = conf.selectedGC == gcArc
of "orc": result = conf.selectedGC == gcOrc
of "yrc": result = conf.selectedGC == gcYrc
of "hooks": result = conf.selectedGC == gcHooks
of "go": result = conf.selectedGC == gcGo
of "none": result = conf.selectedGC == gcNone
@@ -570,6 +571,7 @@ proc unregisterArcOrc*(conf: ConfigRef) =
undefSymbol(conf.symbols, "gcdestructors")
undefSymbol(conf.symbols, "gcarc")
undefSymbol(conf.symbols, "gcorc")
undefSymbol(conf.symbols, "gcyrc")
undefSymbol(conf.symbols, "gcatomicarc")
undefSymbol(conf.symbols, "nimSeqsV2")
undefSymbol(conf.symbols, "nimV2")
@@ -603,6 +605,10 @@ proc processMemoryManagementOption(switch, arg: string, pass: TCmdLinePass,
conf.selectedGC = gcOrc
defineSymbol(conf.symbols, "gcorc")
registerArcOrc(pass, conf)
of "yrc":
conf.selectedGC = gcYrc
defineSymbol(conf.symbols, "gcyrc")
registerArcOrc(pass, conf)
of "atomicarc":
conf.selectedGC = gcAtomicArc
defineSymbol(conf.symbols, "gcatomicarc")

View File

@@ -69,7 +69,7 @@ proc hasDestructor(c: Con; t: PType): bool {.inline.} =
result = ast.hasDestructor(t)
when toDebug.len > 0:
# for more effective debugging
if not result and c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
if not result and c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
assert(not containsGarbageCollectedRef(t))
proc getTemp(c: var Con; s: var Scope; typ: PType; info: TLineInfo): PNode =
@@ -165,7 +165,7 @@ proc isLastReadImpl(n: PNode; c: var Con; scope: var Scope): bool =
template hasDestructorOrAsgn(c: var Con, typ: PType): bool =
# bug #23354; an object type could have a non-trivial assignements when it is passed to a sink parameter
hasDestructor(c, typ) or (c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and
hasDestructor(c, typ) or (c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and
typ.kind == tyObject and not isTrivial(getAttachedOp(c.graph, typ, attachedAsgn)))
proc isLastRead(n: PNode; c: var Con; s: var Scope): bool =
@@ -329,14 +329,14 @@ proc isCriticalLink(dest: PNode): bool {.inline.} =
result = dest.kind != nkSym
proc finishCopy(c: var Con; result, dest: PNode; flags: set[MoveOrCopyFlag]; isFromSink: bool) =
if c.graph.config.selectedGC == gcOrc and IsExplicitSink notin flags:
if c.graph.config.selectedGC in {gcOrc, gcYrc} and IsExplicitSink notin flags:
# add cyclic flag, but not to sink calls, which IsExplicitSink generates
let t = dest.typ.skipTypes(tyUserTypeClasses + {tyGenericInst, tyAlias, tySink, tyDistinct})
if cyclicType(c.graph, t):
result.add boolLit(c.graph, result.info, isFromSink or isCriticalLink(dest))
proc genMarkCyclic(c: var Con; result, dest: PNode) =
if c.graph.config.selectedGC == gcOrc:
if c.graph.config.selectedGC in {gcOrc, gcYrc}:
let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct})
if cyclicType(c.graph, t):
if t.kind == tyRef:
@@ -495,7 +495,7 @@ proc passCopyToSink(n: PNode; c: var Con; s: var Scope): PNode =
localError(c.graph.config, n.info, errFailedMove,
("cannot move '$1', passing '$1' to a sink parameter introduces an implicit copy") % $n)
else:
if c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
if c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
assert(not containsManagedMemory(nTyp))
if nTyp.skipTypes(abstractInst).kind in {tyOpenArray, tyVarargs}:
localError(c.graph.config, n.info, "cannot create an implicit openArray copy to be passed to a sink parameter")
@@ -926,7 +926,7 @@ proc p(n: PNode; c: var Con; s: var Scope; mode: ProcessMode; tmpFlags = {sfSing
if n[0].kind == nkSym and n[0].sym.magic in {mNew, mNewFinalize}:
result[0] = copyTree(n[0])
if c.graph.config.selectedGC in {gcHooks, gcArc, gcAtomicArc, gcOrc}:
if c.graph.config.selectedGC in {gcHooks, gcArc, gcAtomicArc, gcOrc, gcYrc}:
let destroyOld = c.genDestroy(result[1])
result = newTree(nkStmtList, destroyOld, result)
else:

View File

@@ -163,7 +163,7 @@ proc fillBodyObj(c: var TLiftCtx; n, body, x, y: PNode; enforceDefaultOp: bool,
if c.filterDiscriminator != nil: return
let f = n.sym
let b = if c.kind == attachedTrace: y else: y.dotField(f)
if (sfCursor in f.flags and c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcHooks}) or
if (sfCursor in f.flags and c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc, gcHooks}) or
enforceDefaultOp:
defaultOp(c, f.typ, body, x.dotField(f), b)
else:
@@ -730,14 +730,43 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
dest[] = source
decRef tmp
For YRC the write barrier is more complicated still and must be:
let tmp = dest
# assignment must come first so that the collector sees the most-recent graph:
atomic: dest[] = source
# Then teach the cycle collector about the changes edge (these use locks, see yrc.nim):
incRef source
decRef tmp
This is implemented as a single runtime call (nimAsgnYrc / nimSinkYrc).
]#
var actions = newNodeI(nkStmtList, c.info)
let elemType = t.elementType
createTypeBoundOps(c.g, c.c, elemType, c.info, c.idgen)
let isCyclic = c.g.config.selectedGC == gcOrc and types.canFormAcycle(c.g, elemType)
let isInheritableAcyclicRef = c.g.config.selectedGC == gcOrc and
# YRC uses dedicated runtime procs for the entire write barrier:
if c.g.config.selectedGC == gcYrc:
let desc =
if isFinal(elemType):
let ti = genBuiltin(c, mGetTypeInfoV2, "getTypeInfoV2", newNodeIT(nkType, x.info, elemType))
ti.typ = getSysType(c.g, c.info, tyPointer)
ti
else:
newNodeIT(nkNilLit, c.info, getSysType(c.g, c.info, tyPointer))
case c.kind
of attachedAsgn, attachedDup:
body.add callCodegenProc(c.g, "nimAsgnYrc", c.info, genAddr(c, x), y, desc)
return
of attachedSink:
body.add callCodegenProc(c.g, "nimSinkYrc", c.info, genAddr(c, x), y, desc)
return
else: discard # fall through for destructor, trace, wasMoved
let isCyclic = c.g.config.selectedGC in {gcOrc, gcYrc} and types.canFormAcycle(c.g, elemType)
let isInheritableAcyclicRef = c.g.config.selectedGC in {gcOrc, gcYrc} and
(not isPureObject(elemType)) and
tfAcyclic in skipTypes(elemType, abstractInst+{tyOwned}-{tyTypeDesc}).flags
# dynamic Acyclic refs need to use dyn decRef
@@ -819,7 +848,26 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
let xenv = genBuiltin(c, mAccessEnv, "accessEnv", x)
xenv.typ = getSysType(c.g, c.info, tyPointer)
let isCyclic = c.g.config.selectedGC == gcOrc
# Closures are (fnPtr, env) pairs. nimAsgnYrc/nimSinkYrc handle the env pointer
# (atomic store + buffered inc/dec). We also need newAsgnStmt to copy the fnPtr.
if c.g.config.selectedGC == gcYrc:
let nilDesc = newNodeIT(nkNilLit, c.info, getSysType(c.g, c.info, tyPointer))
let yenv = genBuiltin(c, mAccessEnv, "accessEnv", y)
yenv.typ = getSysType(c.g, c.info, tyPointer)
case c.kind
of attachedAsgn, attachedDup:
# nimAsgnYrc: save old env, atomic store new env, inc new env, dec old env
body.add callCodegenProc(c.g, "nimAsgnYrc", c.info, genAddr(c, xenv), yenv, nilDesc)
# Raw struct copy to also update the function pointer (env write is redundant but benign)
body.add newAsgnStmt(x, y)
return
of attachedSink:
body.add callCodegenProc(c.g, "nimSinkYrc", c.info, genAddr(c, xenv), yenv, nilDesc)
body.add newAsgnStmt(x, y)
return
else: discard # fall through for destructor, trace, wasMoved
let isCyclic = c.g.config.selectedGC in {gcOrc, gcYrc}
let tmp =
if isCyclic and c.kind in {attachedAsgn, attachedSink, attachedDup}:
declareTempOf(c, body, xenv)
@@ -852,7 +900,6 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
body.add genIf(c, cond, actions)
else:
body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRef", c.info, yenv))
body.add genIf(c, cond, actions)
body.add newAsgnStmt(x, y)
of attachedDup:
@@ -937,7 +984,7 @@ proc closureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
call[1] = y
body.add newAsgnStmt(x, call)
elif (optOwnedRefs in c.g.config.globalOptions and
optRefCheck in c.g.config.options) or c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc}:
optRefCheck in c.g.config.options) or c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}:
let xx = genBuiltin(c, mAccessEnv, "accessEnv", x)
xx.typ = getSysType(c.g, c.info, tyPointer)
case c.kind
@@ -992,7 +1039,7 @@ proc fillBody(c: var TLiftCtx; t: PType; body, x, y: PNode) =
tyPtr, tyUncheckedArray, tyVar, tyLent:
defaultOp(c, t, body, x, y)
of tyRef:
if c.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
if c.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
atomicRefOp(c, t, body, x, y)
elif (optOwnedRefs in c.g.config.globalOptions and
optRefCheck in c.g.config.options):
@@ -1001,7 +1048,7 @@ proc fillBody(c: var TLiftCtx; t: PType; body, x, y: PNode) =
defaultOp(c, t, body, x, y)
of tyProc:
if t.callConv == ccClosure:
if c.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
if c.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
atomicClosureOp(c, t, body, x, y)
else:
closureOp(c, t, body, x, y)
@@ -1125,7 +1172,7 @@ proc symDupPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttache
result.typ.addParam src
if g.config.selectedGC == gcOrc and
if g.config.selectedGC in {gcOrc, gcYrc} and
cyclicType(g, typ.skipTypes(abstractInst)):
let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"),
idgen, result, info)
@@ -1152,7 +1199,7 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp
let src = newSym(skParam, getIdent(g.cache, if kind == attachedTrace: "env" else: "src"),
idgen, result, info)
if kind == attachedDestructor and g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and
if kind == attachedDestructor and g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and
((g.config.isDefined("nimPreviewNonVarDestructor") and not isDiscriminant) or (typ.kind in {tyRef, tyString, tySequence})):
dest.typ = typ
else:
@@ -1168,7 +1215,7 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp
if kind notin {attachedDestructor, attachedWasMoved}:
result.typ.addParam src
if kind == attachedAsgn and g.config.selectedGC == gcOrc and
if kind == attachedAsgn and g.config.selectedGC in {gcOrc, gcYrc} and
cyclicType(g, typ.skipTypes(abstractInst)):
let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"),
idgen, result, info)
@@ -1226,7 +1273,7 @@ proc produceSym(g: ModuleGraph; c: PContext; typ: PType; kind: TTypeAttachedOp;
else:
var tk: TTypeKind
var skipped: PType = nil
if g.config.selectedGC in {gcArc, gcOrc, gcHooks, gcAtomicArc}:
if g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcHooks, gcAtomicArc}:
skipped = skipTypes(typ, {tyOrdinal, tyRange, tyInferred, tyGenericInst, tyStatic, tyAlias, tySink})
tk = skipped.kind
else:
@@ -1348,7 +1395,7 @@ proc createTypeBoundOps(g: ModuleGraph; c: PContext; orig: PType; info: TLineInf
# we do not generate '=trace' procs if we
# have the cycle detection disabled, saves code size.
let lastAttached = if g.config.selectedGC == gcOrc: attachedTrace
let lastAttached = if g.config.selectedGC in {gcOrc, gcYrc}: attachedTrace
else: attachedSink
# bug #15122: We need to produce all prototypes before entering the

View File

@@ -195,6 +195,7 @@ type
gcRegions = "regions"
gcArc = "arc"
gcOrc = "orc"
gcYrc = "yrc" # thread-safe ORC (concurrent cycle collector)
gcAtomicArc = "atomicArc"
gcMarkAndSweep = "markAndSweep"
gcHooks = "hooks"

View File

@@ -231,7 +231,7 @@ proc runNimScript*(cache: IdentCache; scriptName: AbsoluteFile;
if optOwnedRefs in oldGlobalOptions:
conf.globalOptions.incl {optTinyRtti, optOwnedRefs, optSeqDestructors}
defineSymbol(conf.symbols, "nimv2")
if conf.selectedGC in {gcArc, gcOrc, gcAtomicArc}:
if conf.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}:
conf.globalOptions.incl {optTinyRtti, optSeqDestructors}
defineSymbol(conf.symbols, "nimv2")
defineSymbol(conf.symbols, "gcdestructors")
@@ -241,6 +241,8 @@ proc runNimScript*(cache: IdentCache; scriptName: AbsoluteFile;
defineSymbol(conf.symbols, "gcarc")
of gcOrc:
defineSymbol(conf.symbols, "gcorc")
of gcYrc:
defineSymbol(conf.symbols, "gcyrc")
of gcAtomicArc:
defineSymbol(conf.symbols, "gcatomicarc")
else:

View File

@@ -1756,7 +1756,7 @@ proc trackProc*(c: PContext; s: PSym, body: PNode) =
let param = params[i].sym
let typ = param.typ
if isSinkTypeForParam(typ) or
(t.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and
(t.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and
(isClosure(typ.skipTypes(abstractInst)) or param.id in t.escapingParams)):
createTypeBoundOps(t, typ, param.info)
if isOutParam(typ) and param.id notin t.init and s.magic == mNone:

View File

@@ -2175,7 +2175,7 @@ proc bindTypeHook(c: PContext; s: PSym; n: PNode; op: TTypeAttachedOp) =
template notRefc: bool =
# fixes refc with non-var destructor; cancel warnings (#23156)
c.config.backend == backendJs or
c.config.selectedGC in {gcArc, gcAtomicArc, gcOrc}
c.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}
let cond = case op
of attachedWasMoved:
t.len == 2 and t.returnType == nil and t.firstParamType.kind == tyVar

View File

@@ -125,7 +125,7 @@ proc unsafeAddr*[T](x: T): ptr T {.magic: "Addr", noSideEffect.} =
const ThisIsSystem = true
const arcLikeMem = defined(gcArc) or defined(gcAtomicArc) or defined(gcOrc)
const arcLikeMem = defined(gcArc) or defined(gcAtomicArc) or defined(gcOrc) or defined(gcYrc)
when defined(nimAllowNonVarDestructor) and arcLikeMem:
proc new*[T](a: var ref T, finalizer: proc (x: T) {.nimcall.}) {.
@@ -356,7 +356,7 @@ proc low*(x: string): int {.magic: "Low", noSideEffect.}
## See also:
## * `high(string) <#high,string>`_
when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc):
when not defined(gcArc) and not defined(gcOrc) and not defined(gcYrc) and not defined(gcAtomicArc):
proc shallowCopy*[T](x: var T, y: T) {.noSideEffect, magic: "ShallowCopy".}
## Use this instead of `=` for a `shallow copy`:idx:.
##
@@ -407,7 +407,7 @@ when defined(nimHasDup):
proc `=sink`*[T](x: var T; y: T) {.inline, nodestroy, magic: "Asgn".} =
## Generic `sink`:idx: implementation that can be overridden.
when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc):
when defined(gcArc) or defined(gcOrc) or defined(gcYrc) or defined(gcAtomicArc):
x = y
else:
shallowCopy(x, y)
@@ -2561,7 +2561,7 @@ when compileOption("rangechecks"):
else:
template rangeCheck*(cond) = discard
when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc):
when not defined(gcArc) and not defined(gcOrc) and not defined(gcYrc) and not defined(gcAtomicArc):
proc shallow*[T](s: var seq[T]) {.noSideEffect, inline.} =
## Marks a sequence `s` as `shallow`:idx:. Subsequent assignments will not
## perform deep copies of `s`.
@@ -2630,7 +2630,7 @@ when hasAlloc or defined(nimscript):
setLen(x, xl+item.len)
var j = xl-1
while j >= i:
when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc):
when defined(gcArc) or defined(gcOrc) or defined(gcYrc) or defined(gcAtomicArc):
x[j+item.len] = move x[j]
else:
shallowCopy(x[j+item.len], x[j])

View File

@@ -16,7 +16,7 @@ runtime type and only contains a reference count.
{.push raises: [], rangeChecks: off.}
when defined(gcOrc):
when defined(gcOrc) or defined(gcYrc):
const
rcIncrement = 0b10000 # so that lowest 4 bits are not touched
rcMask = 0b1111
@@ -36,12 +36,12 @@ type
rc: int # the object header is now a single RC field.
# we could remove it in non-debug builds for the 'owned ref'
# design but this seems unwise.
when defined(gcOrc):
when defined(gcOrc) or defined(gcYrc):
rootIdx: int # thanks to this we can delete potential cycle roots
# in O(1) without doubly linked lists
when defined(nimArcDebug) or defined(nimArcIds):
refId: int
when defined(gcOrc) and orcLeakDetector:
when (defined(gcOrc) or defined(gcYrc)) and orcLeakDetector:
filename: cstring
line: int
@@ -74,7 +74,7 @@ elif defined(nimArcIds):
const traceId = -1
when defined(gcAtomicArc) and hasThreadSupport:
when (defined(gcAtomicArc) or defined(gcYrc)) and hasThreadSupport:
template decrement(cell: Cell): untyped =
discard atomicDec(cell.rc, rcIncrement)
template increment(cell: Cell): untyped =
@@ -119,7 +119,7 @@ proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
else:
result = cast[ptr RefHeader](alignedAlloc(s, alignment) +! hdrSize)
head(result).rc = 0
when defined(gcOrc):
when defined(gcOrc) or defined(gcYrc):
head(result).rootIdx = 0
when defined(nimArcDebug):
head(result).refId = gRefId
@@ -157,7 +157,7 @@ proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
when traceCollector:
cprintf("[INCREF] %p\n", head(p))
when not defined(gcOrc) or defined(nimThinout):
when not (defined(gcOrc) or defined(gcYrc)) or defined(nimThinout):
proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
# This is only used by the old RTTI mechanism and we know
# that 'dest[]' is nil and needs no destruction. Which is really handy
@@ -208,7 +208,9 @@ proc nimDestroyAndDispose(p: pointer) {.compilerRtl, quirky, raises: [].} =
cstderr.rawWrite "has destructor!\n"
nimRawDispose(p, rti.align)
when defined(gcOrc):
when defined(gcYrc):
include yrc
elif defined(gcOrc):
when defined(nimThinout):
include cyclebreaker
else:
@@ -225,7 +227,7 @@ proc nimDecRefIsLast(p: pointer): bool {.compilerRtl, inl.} =
writeStackTrace()
cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.count)
when defined(gcAtomicArc) and hasThreadSupport:
when (defined(gcAtomicArc) or defined(gcYrc)) and hasThreadSupport:
# `atomicDec` returns the new value
if atomicDec(cell.rc, rcIncrement) == -rcIncrement:
result = true
@@ -251,7 +253,7 @@ proc GC_ref*[T](x: ref T) =
## New runtime only supports this operation for 'ref T'.
if x != nil: nimIncRef(cast[pointer](x))
when not defined(gcOrc):
when not (defined(gcOrc) or defined(gcYrc)):
template GC_fullCollect* =
## Forces a full garbage collection pass. With `--mm:arc` a nop.
discard

View File

@@ -50,7 +50,7 @@ proc deallocSharedImpl(p: pointer) = deallocImpl(p)
proc GC_disable() = discard
proc GC_enable() = discard
when not defined(gcOrc):
when not defined(gcOrc) and not defined(gcYrc):
proc GC_fullCollect() = discard
proc GC_enableMarkAndSweep() = discard
proc GC_disableMarkAndSweep() = discard

549
lib/system/yrc.nim Normal file
View File

@@ -0,0 +1,549 @@
#
# YRC: Thread-safe ORC (concurrent cycle collector).
# Same API as orc.nim but with striped queues and global lock for merge/collect.
# Destructors for refs run at collection time, not immediately on last decRef.
# See yrc_proof.lean for a Lean 4 proof of safety and deadlock freedom.
#
# ## Key Invariant: Topology vs. Reference Counts
#
# Only `obj.field = x` can change the topology of the heap graph (heap-to-heap
# edges). Local variable assignments (`var local = someRef`) affect reference
# counts but never create heap-to-heap edges and thus cannot create cycles.
#
# The actual pointer write in `obj.field = x` happens immediately and lock-free —
# the graph topology is always up-to-date in memory. Only the RC adjustments are
# deferred: increments and decrements are buffered into per-stripe queues
# (`toInc`, `toDec`) protected by fine-grained per-stripe locks.
#
# When `collectCycles` runs it takes the global lock, drains all stripe buffers
# via `mergePendingRoots`, and then traces the physical pointer graph (via
# `traceImpl`) to detect cycles. This is sound because `trace` follows the actual
# pointer values in memory — which are always current — and uses the reconciled
# RCs only to identify candidate roots and confirm garbage.
#
# In summary: the physical pointer graph is always consistent (writes are
# immediate); only the reference counts are eventually consistent (writes are
# buffered). The per-stripe locks are cheap; the expensive global lock is only
# needed when interpreting the RCs during collection.
#
# ## Why No Write Barrier Is Needed
#
# The classic concurrent-GC hazard is the "lost object" problem: during
# collection the mutator executes `A.field = B` where A is already scanned
# (black), B is reachable only through an unscanned (gray) object C, and then
# C's reference to B is removed. The collector never discovers B and frees it
# while A still points to it. Traditional concurrent collectors need write
# barriers to prevent this.
#
# This problem structurally cannot arise in YRC because the cycle collector only
# frees *closed cycles* — subgraphs where every reference to every member comes
# from within the group, with zero external references. To execute `A.field = B`
# the mutator must hold a reference to A, which means A has an external reference
# (from the stack) that is not a heap-to-heap edge. During trial deletion
# (`markGray`) only internal edges are subtracted from RCs, so A's external
# reference survives, `scan` finds A's RC >= 0, calls `scanBlack`, and rescues A
# and everything reachable from it — including B. In short: the mutator can only
# modify objects it can reach, but the cycle collector only frees objects nothing
# external can reach. The two conditions are mutually exclusive.
#
#[
The problem described in Bacon01 is: during markGray/scan, a mutator concurrently
does X.field = Z (was X→Y), changing the physical graph while the collector is tracing
it. The collector might see stale or new edges. The reasons this is still safe:
Stale edges cancel with unbuffered decrements: If the collector sees old edge X→Y
(mutator already wrote X→Z and buffered dec(Y)), the phantom trial deletion and the
unbuffered dec cancel — Y's effective RC is correct.
scanBlack rescues via current physical edges: If X has external refs (merged RC reflects
the mutator's access), scanBlack(X) re-traces X and follows the current physical edge X→Z,
incrementing Z's RC and marking it black. Z survives.
rcSum==edges fast path is conservative: Any discrepancy between physical graph and merged
state (stale or new edges) causes rcSum != edges, falling back to the slow path which
rescues anything with RC >= 0.
Unreachable cycles are truly unreachable: The mutator can only reach objects through chains
rooted in merged references. If a cycle has zero external refs at merge time, no mutator
can reach it.
]#
{.push raises: [].}
include cellseqs_v2
import std/locks
const
NumStripes = 64
QueueSize = 128
RootsThreshold = 10
colBlack = 0b000
colGray = 0b001
colWhite = 0b010
maybeCycle = 0b100
inRootsFlag = 0b1000
colorMask = 0b011
logOrc = defined(nimArcIds)
type
TraceProc = proc (p, env: pointer) {.nimcall, benign, raises: [].}
DisposeProc = proc (p: pointer) {.nimcall, benign, raises: [].}
template color(c): untyped = c.rc and colorMask
template setColor(c, col) =
when col == colBlack:
c.rc = c.rc and not colorMask
else:
c.rc = c.rc and not colorMask or col
const
optimizedOrc = false
useJumpStack = false
type
GcEnv = object
traceStack: CellSeq[ptr pointer]
when useJumpStack:
jumpStack: CellSeq[ptr pointer]
toFree: CellSeq[Cell]
freed, touched, edges, rcSum: int
keepThreshold: bool
proc trace(s: Cell; desc: PNimTypeV2; j: var GcEnv) {.inline.} =
if desc.traceImpl != nil:
var p = s +! sizeof(RefHeader)
cast[TraceProc](desc.traceImpl)(p, addr(j))
include threadids
type
Stripe = object
when not defined(yrcAtomics):
lockInc: Lock
toIncLen: int
toInc: array[QueueSize, Cell]
lockDec: Lock
toDecLen: int
toDec: array[QueueSize, (Cell, PNimTypeV2)]
type
PreventThreadFromCollectProc* = proc(): bool {.nimcall, benign, raises: [].}
## Callback run before this thread runs the cycle collector.
## Return `true` to allow collection, `false` to skip (e.g. real-time thread).
## Invoked while holding the global lock; must not call back into YRC.
var
gYrcGlobalLock: Lock
roots: CellSeq[Cell] # merged roots, used under global lock
stripes: array[NumStripes, Stripe]
rootsThreshold: int = 128
defaultThreshold = when defined(nimFixedOrc): 10_000 else: 128
gPreventThreadFromCollectProc: PreventThreadFromCollectProc = nil
proc GC_setPreventThreadFromCollectProc*(cb: PreventThreadFromCollectProc) =
##[ Can be used to customize the cycle collector for a thread. For example,
to ensure that a hard realtime thread cannot run the cycle collector use:
```nim
var hardRealTimeThread: int
GC_setPreventThreadFromCollectProc(proc(): bool {.nimcall.} = hardRealTimeThread == getThreadId())
```
To ensure that a hard realtime thread cannot by involved in any cycle collector activity use:
```nim
GC_setPreventThreadFromCollectProc(proc(): bool {.nimcall.} =
if hardRealTimeThread == getThreadId():
writeStackTrace()
echo "Realtime thread involved in inpredictable cycle collector activity!"
result = false
```
]##
gPreventThreadFromCollectProc = cb
proc GC_getPreventThreadFromCollectProc*(): PreventThreadFromCollectProc =
## Returns the current "prevent thread from collecting proc".
## Typically `nil` if not set.
result = gPreventThreadFromCollectProc
proc mayRunCycleCollect(): bool {.inline.} =
if gPreventThreadFromCollectProc == nil: true
else: not gPreventThreadFromCollectProc()
proc getStripeIdx(): int {.inline.} =
getThreadId() and (NumStripes - 1)
proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
let h = head(p)
when optimizedOrc:
if cyclic: h.rc = h.rc or maybeCycle
when defined(yrcAtomics):
let s = getStripeIdx()
let slot = atomicFetchAdd(addr stripes[s].toIncLen, 1, ATOMIC_ACQ_REL)
if slot < QueueSize:
atomicStoreN(addr stripes[s].toInc[slot], h, ATOMIC_RELEASE)
else:
withLock gYrcGlobalLock:
h.rc = h.rc +% rcIncrement
for i in 0..<NumStripes:
let len = atomicExchangeN(addr stripes[i].toIncLen, 0, ATOMIC_ACQUIRE)
for j in 0..<min(len, QueueSize):
let x = atomicLoadN(addr stripes[i].toInc[j], ATOMIC_ACQUIRE)
x.rc = x.rc +% rcIncrement
else:
let idx = getStripeIdx()
while true:
var overflow = false
withLock stripes[idx].lockInc:
if stripes[idx].toIncLen < QueueSize:
stripes[idx].toInc[stripes[idx].toIncLen] = h
stripes[idx].toIncLen += 1
else:
overflow = true
if overflow:
withLock gYrcGlobalLock:
for i in 0..<NumStripes:
withLock stripes[i].lockInc:
for j in 0..<stripes[i].toIncLen:
let x = stripes[i].toInc[j]
x.rc = x.rc +% rcIncrement
stripes[i].toIncLen = 0
else:
break
proc mergePendingRoots() =
for i in 0..<NumStripes:
when defined(yrcAtomics):
let incLen = atomicExchangeN(addr stripes[i].toIncLen, 0, ATOMIC_ACQUIRE)
for j in 0..<min(incLen, QueueSize):
let x = atomicLoadN(addr stripes[i].toInc[j], ATOMIC_ACQUIRE)
x.rc = x.rc +% rcIncrement
else:
withLock stripes[i].lockInc:
for j in 0..<stripes[i].toIncLen:
let x = stripes[i].toInc[j]
x.rc = x.rc +% rcIncrement
stripes[i].toIncLen = 0
withLock stripes[i].lockDec:
for j in 0..<stripes[i].toDecLen:
let (c, desc) = stripes[i].toDec[j]
c.rc = c.rc -% rcIncrement
if (c.rc and inRootsFlag) == 0:
c.rc = c.rc or inRootsFlag
if roots.d == nil: init(roots)
add(roots, c, desc)
stripes[i].toDecLen = 0
proc collectCycles()
when logOrc or orcLeakDetector:
proc writeCell(msg: cstring; s: Cell; desc: PNimTypeV2) =
when orcLeakDetector:
cfprintf(cstderr, "%s %s file: %s:%ld; color: %ld; thread: %ld\n",
msg, desc.name, s.filename, s.line, s.color, getThreadId())
else:
cfprintf(cstderr, "%s %s %ld root index: %ld; RC: %ld; color: %ld; thread: %ld\n",
msg, desc.name, s.refId, (if (s.rc and inRootsFlag) != 0: 1 else: 0), s.rc shr rcShift, s.color, getThreadId())
proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
when traceCollector:
cprintf("[From ] %p rc %ld color %ld\n", s, s.rc shr rcShift, s.color)
let p = s +! sizeof(RefHeader)
when logOrc: writeCell("free", s, desc)
if desc.destructor != nil:
cast[DestructorProc](desc.destructor)(p)
nimRawDispose(p, desc.align)
template orcAssert(cond, msg) =
when logOrc:
if not cond:
cfprintf(cstderr, "[Bug!] %s\n", msg)
rawQuit 1
when logOrc:
proc strstr(s, sub: cstring): cstring {.header: "<string.h>", importc.}
proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl, inl.} =
let p = cast[ptr pointer](q)
if p[] != nil:
orcAssert strstr(desc.name, "TType") == nil, "following a TType but it's acyclic!"
var j = cast[ptr GcEnv](env)
j.traceStack.add(p, desc)
proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl, inl.} =
let p = cast[ptr pointer](q)
if p[] != nil:
var j = cast[ptr GcEnv](env)
j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[])
proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
s.setColor colBlack
let until = j.traceStack.len
trace(s, desc, j)
when logOrc: writeCell("root still alive", s, desc)
while j.traceStack.len > until:
let (entry, desc) = j.traceStack.pop()
let t = head entry[]
t.rc = t.rc +% rcIncrement
if t.color != colBlack:
t.setColor colBlack
trace(t, desc, j)
when logOrc: writeCell("child still alive", t, desc)
proc markGray(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
if s.color != colGray:
s.setColor colGray
j.touched = j.touched +% 1
j.rcSum = j.rcSum +% (s.rc shr rcShift) +% 1
orcAssert(j.traceStack.len == 0, "markGray: trace stack not empty")
trace(s, desc, j)
while j.traceStack.len > 0:
let (entry, desc) = j.traceStack.pop()
let t = head entry[]
t.rc = t.rc -% rcIncrement
j.edges = j.edges +% 1
if t.color != colGray:
t.setColor colGray
j.touched = j.touched +% 1
j.rcSum = j.rcSum +% (t.rc shr rcShift) +% 2
trace(t, desc, j)
proc scan(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
if s.color == colGray:
if (s.rc shr rcShift) >= 0:
scanBlack(s, desc, j)
else:
orcAssert(j.traceStack.len == 0, "scan: trace stack not empty")
s.setColor(colWhite)
trace(s, desc, j)
while j.traceStack.len > 0:
let (entry, desc) = j.traceStack.pop()
let t = head entry[]
if t.color == colGray:
if (t.rc shr rcShift) >= 0:
scanBlack(t, desc, j)
else:
t.setColor(colWhite)
trace(t, desc, j)
proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) =
if s.color == col and (s.rc and inRootsFlag) == 0:
orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
s.setColor(colBlack)
j.toFree.add(s, desc)
trace(s, desc, j)
while j.traceStack.len > 0:
let (entry, desc) = j.traceStack.pop()
let t = head entry[]
entry[] = nil
if t.color == col and (t.rc and inRootsFlag) == 0:
j.toFree.add(t, desc)
t.setColor(colBlack)
trace(t, desc, j)
proc collectCyclesBacon(j: var GcEnv; lowMark: int) =
let last = roots.len -% 1
when logOrc:
for i in countdown(last, lowMark):
writeCell("root", roots.d[i][0], roots.d[i][1])
for i in countdown(last, lowMark):
markGray(roots.d[i][0], roots.d[i][1], j)
var colToCollect = colWhite
if j.rcSum == j.edges:
colToCollect = colGray
j.keepThreshold = true
else:
for i in countdown(last, lowMark):
scan(roots.d[i][0], roots.d[i][1], j)
init j.toFree
for i in 0 ..< roots.len:
let s = roots.d[i][0]
s.rc = s.rc and not inRootsFlag
collectColor(s, roots.d[i][1], colToCollect, j)
when not defined(nimStressOrc):
let oldThreshold = rootsThreshold
rootsThreshold = high(int)
roots.len = 0
for i in 0 ..< j.toFree.len:
when orcLeakDetector:
writeCell("CYCLIC OBJECT FREED", j.toFree.d[i][0], j.toFree.d[i][1])
free(j.toFree.d[i][0], j.toFree.d[i][1])
when not defined(nimStressOrc):
rootsThreshold = oldThreshold
j.freed = j.freed +% j.toFree.len
deinit j.toFree
when defined(nimOrcStats):
var freedCyclicObjects {.threadvar.}: int
proc collectCycles() =
when logOrc:
cfprintf(cstderr, "[collectCycles] begin\n")
withLock gYrcGlobalLock:
mergePendingRoots()
if roots.len >= RootsThreshold and mayRunCycleCollect():
var j: GcEnv
init j.traceStack
collectCyclesBacon(j, 0)
if roots.len == 0 and roots.d != nil:
deinit roots
when not defined(nimStressOrc):
if j.keepThreshold:
discard
elif j.freed *% 2 >= j.touched:
when not defined(nimFixedOrc):
rootsThreshold = max(rootsThreshold div 3 *% 2, 16)
else:
rootsThreshold = 0
elif rootsThreshold < high(int) div 4:
rootsThreshold = (if rootsThreshold <= 0: defaultThreshold else: rootsThreshold)
rootsThreshold = rootsThreshold div 2 +% rootsThreshold
when logOrc:
cfprintf(cstderr, "[collectCycles] end; freed %ld new threshold %ld\n", j.freed, rootsThreshold)
when defined(nimOrcStats):
inc freedCyclicObjects, j.freed
deinit j.traceStack
when defined(nimOrcStats):
type
OrcStats* = object
freedCyclicObjects*: int
proc GC_orcStats*(): OrcStats =
result = OrcStats(freedCyclicObjects: freedCyclicObjects)
proc GC_runOrc* =
withLock gYrcGlobalLock:
mergePendingRoots()
if mayRunCycleCollect():
var j: GcEnv
init j.traceStack
collectCyclesBacon(j, 0)
deinit j.traceStack
roots.len = 0
when logOrc: orcAssert roots.len == 0, "roots not empty!"
proc GC_enableOrc*() =
when not defined(nimStressOrc):
rootsThreshold = 0
proc GC_disableOrc*() =
when not defined(nimStressOrc):
rootsThreshold = high(int)
proc GC_prepareOrc*(): int {.inline.} =
withLock gYrcGlobalLock:
mergePendingRoots()
result = roots.len
proc GC_partialCollect*(limit: int) =
withLock gYrcGlobalLock:
mergePendingRoots()
if roots.len > limit and mayRunCycleCollect():
var j: GcEnv
init j.traceStack
collectCyclesBacon(j, limit)
deinit j.traceStack
roots.len = limit
proc GC_fullCollect* =
GC_runOrc()
proc GC_enableMarkAndSweep*() = GC_enableOrc()
proc GC_disableMarkAndSweep*() = GC_disableOrc()
const acyclicFlag = 1
when optimizedOrc:
template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
(desc.flags and acyclicFlag) == 0 and (s.rc and maybeCycle) != 0
else:
template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool =
(desc.flags and acyclicFlag) == 0
proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} =
result = false
if p != nil:
let cell = head(p)
let desc = cast[ptr PNimTypeV2](p)[]
let idx = getStripeIdx()
while true:
var overflow = false
withLock stripes[idx].lockDec:
if stripes[idx].toDecLen < QueueSize:
stripes[idx].toDec[stripes[idx].toDecLen] = (cell, desc)
stripes[idx].toDecLen += 1
else:
overflow = true
if overflow:
collectCycles()
else:
break
proc nimDecRefIsLastDyn(p: pointer): bool {.compilerRtl, inl.} =
nimDecRefIsLastCyclicDyn(p)
proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} =
result = false
if p != nil:
let cell = head(p)
let idx = getStripeIdx()
while true:
var overflow = false
withLock stripes[idx].lockDec:
if stripes[idx].toDecLen < QueueSize:
stripes[idx].toDec[stripes[idx].toDecLen] = (cell, desc)
stripes[idx].toDecLen += 1
else:
overflow = true
if overflow:
collectCycles()
else:
break
proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
dest[] = src
if src != nil: nimIncRefCyclic(src, true)
proc yrcDec(tmp: pointer; desc: PNimTypeV2) {.inline.} =
if desc != nil:
discard nimDecRefIsLastCyclicStatic(tmp, desc)
else:
discard nimDecRefIsLastCyclicDyn(tmp)
proc nimAsgnYrc(dest: ptr pointer; src: pointer; desc: PNimTypeV2) {.compilerRtl.} =
## YRC write barrier for ref copy assignment.
## Atomically stores src into dest, then buffers RC adjustments.
## Freeing is always done by the cycle collector, never inline.
let tmp = dest[]
atomicStoreN(dest, src, ATOMIC_RELEASE)
if src != nil:
nimIncRefCyclic(src, true)
if tmp != nil:
yrcDec(tmp, desc)
proc nimSinkYrc(dest: ptr pointer; src: pointer; desc: PNimTypeV2) {.compilerRtl.} =
## YRC write barrier for ref sink (move). No incRef on source.
## Freeing is always done by the cycle collector, never inline.
let tmp = dest[]
atomicStoreN(dest, src, ATOMIC_RELEASE)
if tmp != nil:
yrcDec(tmp, desc)
proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
when optimizedOrc:
if p != nil:
let h = head(p)
h.rc = h.rc or maybeCycle
# Initialize locks at module load
initLock(gYrcGlobalLock)
for i in 0..<NumStripes:
when not defined(yrcAtomics):
initLock(stripes[i].lockInc)
initLock(stripes[i].lockDec)
{.pop.}

353
lib/system/yrc_proof.lean Normal file
View File

@@ -0,0 +1,353 @@
/-
YRC Safety Proof (self-contained, no Mathlib)
==============================================
Formal model of YRC's key invariant: the cycle collector never frees
an object that any mutator thread can reach.
## Model overview
We model the heap as a set of objects with directed edges (ref fields).
Each thread owns a set of *stack roots* — objects reachable from local variables.
The write barrier (nimAsgnYrc) does:
1. atomic store dest ← src (graph is immediately current)
2. buffer inc(src) (deferred)
3. buffer dec(old) (deferred)
The collector (under global lock) does:
1. Merge all buffered inc/dec into merged RCs
2. Trial deletion (markGray): subtract internal edges from merged RCs
3. scan: objects with RC ≥ 0 after trial deletion are rescued (scanBlack)
4. Free objects that remain white (closed cycles with zero external refs)
-/
-- Objects and threads are just natural numbers for simplicity.
abbrev Obj := Nat
abbrev Thread := Nat
/-! ### State -/
/-- The state of the heap and collector at a point in time. -/
structure State where
/-- Physical heap edges: `edges x y` means object `x` has a ref field pointing to `y`.
Always up-to-date (atomic stores). -/
edges : Obj Obj Prop
/-- Stack roots per thread. `roots t x` means thread `t` has a local variable pointing to `x`. -/
roots : Thread Obj Prop
/-- Pending buffered increments (not yet merged). -/
pendingInc : Obj Nat
/-- Pending buffered decrements (not yet merged). -/
pendingDec : Obj Nat
/-! ### Reachability -/
/-- An object is *reachable* if some thread can reach it via stack roots + heap edges. -/
inductive Reachable (s : State) : Obj Prop where
| root (t : Thread) (x : Obj) : s.roots t x Reachable s x
| step (x y : Obj) : Reachable s x s.edges x y Reachable s y
/-- Directed reachability between heap objects (following physical edges only). -/
inductive HeapReachable (s : State) : Obj Obj Prop where
| refl (x : Obj) : HeapReachable s x x
| step (x y z : Obj) : HeapReachable s x y s.edges y z HeapReachable s x z
/-- If a root reaches `r` and `r` heap-reaches `x`, then `x` is Reachable. -/
theorem heapReachable_of_reachable (s : State) (r x : Obj)
(hr : Reachable s r) (hp : HeapReachable s r x) :
Reachable s x := by
induction hp with
| refl => exact hr
| step _ _ _ hedge ih => exact Reachable.step _ _ ih hedge
/-! ### What the collector frees -/
/-- An object has an *external reference* if some thread's stack roots point to it. -/
def hasExternalRef (s : State) (x : Obj) : Prop :=
t, s.roots t x
/-- An object is *externally anchored* if it is heap-reachable from some
object that has an external reference. This is what scanBlack computes:
it starts from objects with trialRC ≥ 0 (= has external refs) and traces
the current physical graph. -/
def anchored (s : State) (x : Obj) : Prop :=
r, hasExternalRef s r HeapReachable s r x
/-- The collector frees `x` only if `x` is *not anchored*:
no external ref, and not reachable from any externally-referenced object.
This models: after trial deletion, x remained white, and scanBlack
didn't rescue it. -/
def collectorFrees (s : State) (x : Obj) : Prop :=
¬ anchored s x
/-! ### Main safety theorem -/
/-- **Lemma**: Every reachable object is anchored.
If thread `t` reaches `x`, then there is a chain from a stack root
(which has an external ref) through heap edges to `x`. -/
theorem reachable_is_anchored (s : State) (x : Obj)
(h : Reachable s x) : anchored s x := by
induction h with
| root t x hroot =>
exact x, t, hroot, HeapReachable.refl x
| step a b h_reach_a h_edge ih =>
obtain r, h_ext_r, h_path_r_a := ih
exact r, h_ext_r, HeapReachable.step r a b h_path_r_a h_edge
/-- **Main Safety Theorem**: If the collector frees `x`, then no thread
can reach `x`. Freed objects are unreachable.
This is the contrapositive of `reachable_is_anchored`. -/
theorem yrc_safety (s : State) (x : Obj)
(h_freed : collectorFrees s x) : ¬ Reachable s x := by
intro h_reach
exact h_freed (reachable_is_anchored s x h_reach)
/-! ### The write barrier preserves reachability -/
/-- Model of `nimAsgnYrc(dest_field_of_a, src)`:
Object `a` had a field pointing to `old`, now points to `src`.
Graph update is immediate. The new edge takes priority (handles src = old). -/
def writeBarrier (s : State) (a old src : Obj) : State :=
{ s with
edges := fun x y =>
if x = a y = src then True
else if x = a y = old then False
else s.edges x y
pendingInc := fun x => if x = src then s.pendingInc x + 1 else s.pendingInc x
pendingDec := fun x => if x = old then s.pendingDec x + 1 else s.pendingDec x }
/-- **No Lost Object Theorem**: If thread `t` holds a stack ref to `a` and
executes `a.field = b` (replacing old), then `b` is reachable afterward.
This is why the "lost object" problem from concurrent GC literature
doesn't arise in YRC: the atomic store makes `a→b` visible immediately,
and `a` is anchored (thread `t` holds it), so scanBlack traces `a→b`
and rescues `b`. -/
theorem no_lost_object (s : State) (t : Thread) (a old b : Obj)
(h_root_a : s.roots t a) :
Reachable (writeBarrier s a old b) b := by
apply Reachable.step a b
· exact Reachable.root t a h_root_a
· simp [writeBarrier]
/-! ### Non-atomic write barrier window safety
The write barrier does three steps non-atomically:
1. atomicStore(dest, src) — graph update
2. buffer inc(src) — deferred
3. buffer dec(old) — deferred
If the collector runs between steps 1 and 2 (inc not yet buffered):
- src has a new incoming heap edge not yet reflected in RCs
- But src is reachable from the mutator's stack (mutator held a ref to store it)
- So src has an external ref → trialRC ≥ 1 → scanBlack rescues src ✓
If the collector runs between steps 2 and 3 (dec not yet buffered):
- old's RC is inflated by 1 (the dec hasn't arrived)
- This is conservative: old appears to have more refs than it does
- Trial deletion won't spuriously free it ✓
-/
/-- Model the state between steps 1-2: graph updated, inc not yet buffered.
`src` has new edge but RC doesn't reflect it yet. -/
def stateAfterStore (s : State) (a old src : Obj) : State :=
{ s with
edges := fun x y =>
if x = a y = src then True
else if x = a y = old then False
else s.edges x y }
/-- Even in the window between atomic store and buffered inc,
src is still reachable (from the mutator's stack via a→src). -/
theorem src_reachable_in_window (s : State) (t : Thread) (a old src : Obj)
(h_root_a : s.roots t a) :
Reachable (stateAfterStore s a old src) src := by
apply Reachable.step a src
· exact Reachable.root t a h_root_a
· simp [stateAfterStore]
/-- Therefore src is anchored in the window → collector won't free it. -/
theorem src_safe_in_window (s : State) (t : Thread) (a old src : Obj)
(h_root_a : s.roots t a) :
¬ collectorFrees (stateAfterStore s a old src) src := by
intro h_freed
exact h_freed (reachable_is_anchored _ _ (src_reachable_in_window s t a old src h_root_a))
/-! ### Deadlock freedom
YRC uses three classes of locks:
• gYrcGlobalLock (level 0)
• stripes[i].lockInc (level 2*i + 1, for i in 0..N-1)
• stripes[i].lockDec (level 2*i + 2, for i in 0..N-1)
Total order: global < lockInc[0] < lockDec[0] < lockInc[1] < lockDec[1] < ...
Every code path in yrc.nim acquires locks in strictly ascending level order:
**nimIncRefCyclic** (mutator fast path):
acquire lockInc[myStripe] → release → done.
Holds exactly one lock. ✓
**nimIncRefCyclic** (overflow path):
acquire gYrcGlobalLock (level 0), then for i=0..N-1: acquire lockInc[i] → release.
Ascending: 0 < 1 < 3 < 5 < ... ✓
**nimDecRefIsLastCyclic{Dyn,Static}** (fast path):
acquire lockDec[myStripe] → release → done.
Holds exactly one lock. ✓
**nimDecRefIsLastCyclic{Dyn,Static}** (overflow path):
calls collectCycles → acquire gYrcGlobalLock (level 0),
then mergePendingRoots which for i=0..N-1:
acquire lockInc[i] → release, acquire lockDec[i] → release.
Ascending: 0 < 1 < 2 < 3 < 4 < ... ✓
**collectCycles / GC_runOrc** (collector):
acquire gYrcGlobalLock (level 0),
then mergePendingRoots (same ascending pattern as above). ✓
**nimAsgnYrc / nimSinkYrc** (write barrier):
Calls nimIncRefCyclic then nimDecRefIsLastCyclic*.
Each call acquires and releases its lock independently.
No nesting between the two calls. ✓
Since every path follows the total order, deadlock is impossible.
-/
/-- Lock levels in YRC. Each lock maps to a unique natural number. -/
inductive LockId (n : Nat) where
| global : LockId n
| lockInc (i : Nat) (h : i < n) : LockId n
| lockDec (i : Nat) (h : i < n) : LockId n
/-- The level (priority) of each lock in the total order. -/
def lockLevel {n : Nat} : LockId n Nat
| .global => 0
| .lockInc i _ => 2 * i + 1
| .lockDec i _ => 2 * i + 2
/-- All lock levels are distinct (the level function is injective). -/
theorem lockLevel_injective {n : Nat} (a b : LockId n)
(h : lockLevel a = lockLevel b) : a = b := by
cases a with
| global =>
cases b with
| global => rfl
| lockInc j hj => simp [lockLevel] at h
| lockDec j hj => simp [lockLevel] at h
| lockInc i hi =>
cases b with
| global => simp [lockLevel] at h
| lockInc j hj =>
have : i = j := by simp [lockLevel] at h; omega
subst this; rfl
| lockDec j hj => simp [lockLevel] at h; omega
| lockDec i hi =>
cases b with
| global => simp [lockLevel] at h
| lockInc j hj => simp [lockLevel] at h; omega
| lockDec j hj =>
have : i = j := by simp [lockLevel] at h; omega
subst this; rfl
/-- Helper: stripe lock levels are strictly ascending across stripes. -/
theorem stripe_levels_ascending (i : Nat) :
2 * i + 1 < 2 * i + 2 2 * i + 2 < 2 * (i + 1) + 1 := by
constructor <;> omega
/-- lockInc levels are strictly ascending with index. -/
theorem lockInc_level_strict_mono {n : Nat} (i j : Nat) (hi : i < n) (hj : j < n)
(hij : i < j) : lockLevel (.lockInc i hi : LockId n) < lockLevel (.lockInc j hj) := by
simp [lockLevel]; omega
/-- lockDec levels are strictly ascending with index. -/
theorem lockDec_level_strict_mono {n : Nat} (i j : Nat) (hi : i < n) (hj : j < n)
(hij : i < j) : lockLevel (.lockDec i hi : LockId n) < lockLevel (.lockDec j hj) := by
simp [lockLevel]; omega
/-- Global lock has the lowest level (level 0). -/
theorem global_level_min {n : Nat} (l : LockId n) (h : l .global) :
lockLevel (.global : LockId n) < lockLevel l := by
cases l with
| global => exact absurd rfl h
| lockInc i hi => simp [lockLevel]
| lockDec i hi => simp [lockLevel]
/-- **Deadlock Freedom**: Any sequence of lock acquisitions that follows the
"acquire in ascending level order" discipline cannot deadlock.
This is a standard result: a total order on locks with the invariant that
every thread acquires locks in strictly ascending order prevents cycles
in the wait-for graph, which is necessary and sufficient for deadlock.
We prove the 2-thread case (the general N-thread case follows by the
same transitivity argument on the wait-for cycle). -/
theorem no_deadlock_from_total_order {n : Nat}
-- Two threads each hold a lock and wait for another
(held₁ waited₁ held₂ waited₂ : LockId n)
-- Thread 1 holds held₁ and wants waited₁ (ascending order)
(h1 : lockLevel held₁ < lockLevel waited₁)
-- Thread 2 holds held₂ and wants waited₂ (ascending order)
(h2 : lockLevel held₂ < lockLevel waited₂)
-- Deadlock requires: thread 1 waits for what thread 2 holds,
-- and thread 2 waits for what thread 1 holds
(h_wait1 : waited₁ = held₂)
(h_wait2 : waited₂ = held₁) :
False := by
subst h_wait1; subst h_wait2
omega
/-! ### Summary of verified properties (all QED, no sorry)
1. `reachable_is_anchored`: Every reachable object is anchored
(has a path from an externally-referenced object via heap edges).
2. `yrc_safety`: The collector only frees unanchored objects,
which are unreachable by all threads. **No use-after-free.**
3. `no_lost_object`: After `a.field = b`, `b` is reachable
(atomic store makes the edge visible immediately).
4. `src_safe_in_window`: Even between the atomic store and
the buffered inc, the collector cannot free src.
5. `lockLevel_injective`: All lock levels are distinct (well-defined total order).
6. `global_level_min`: The global lock has the lowest level.
7. `lockInc_level_strict_mono`, `lockDec_level_strict_mono`:
Stripe locks are strictly ordered by index.
8. `no_deadlock_from_total_order`: A 2-thread deadlock cycle is impossible
when both threads acquire locks in ascending level order.
Together these establish that YRC's write barrier protocol
(atomic store → buffer inc → buffer dec) is safe under concurrent
collection, and the locking discipline prevents deadlock.
## What is NOT proved: Completeness (liveness)
This proof covers **safety** (no use-after-free) and **deadlock-freedom**,
but does NOT prove **completeness** — that all garbage cycles are eventually
collected.
Completeness depends on the trial deletion algorithm (Bacon 2001) correctly
identifying closed cycles. Specifically it requires proving:
1. After `mergePendingRoots`, merged RCs equal logical RCs
(buffered inc/dec exactly compensate graph changes since last merge).
2. `markGray` subtracts exactly the internal (heap→heap) edge count from
each node's merged RC, yielding `trialRC(x) = externalRefCount(x)`.
3. `scan` correctly partitions: nodes with `trialRC ≥ 0` are rescued by
`scanBlack`; nodes with `trialRC < 0` remain white.
4. White nodes form closed subgraphs with zero external refs → garbage.
These properties follow from the well-known Bacon trial-deletion algorithm
and are assumed here rather than re-proved. The YRC-specific contribution
(buffered RCs, striped queues, concurrent mutators) is what our safety
proof covers — showing that concurrency does not break the preconditions
that trial deletion relies on (physical graph consistency, eventual RC
consistency after merge).
Reference: D.F. Bacon and V.T. Rajan, "Concurrent Cycle Collection in
Reference Counted Systems", ECOOP 2001.
-/

761
lib/system/yrc_proof.tla Normal file
View File

@@ -0,0 +1,761 @@
---- MODULE yrc_proof ----
\* TLA+ specification of YRC (Thread-safe ORC cycle collector)
\* Models the fine details of barriers, striped queues, and synchronization
\*
\* ## Key Barrier Semantics Modeled
\*
\* ### Write Barrier (nimAsgnYrc)
\* 1. atomicStoreN(dest, src, ATOMIC_RELEASE)
\* - Graph update is immediately visible to all threads (including collector)
\* - ATOMIC_RELEASE ensures all prior writes are visible before this store
\* - No lock required for graph updates (lock-free)
\*
\* 2. nimIncRefCyclic(src, true)
\* - Acquires per-stripe lockInc[stripe] (fine-grained)
\* - Buffers increment in toInc[stripe] queue
\* - On overflow: acquires global lock, merges all stripes, applies increment
\*
\* 3. yrcDec(tmp, desc)
\* - Acquires per-stripe lockDec[stripe] (fine-grained)
\* - Buffers decrement in toDec[stripe] queue
\* - On overflow: acquires global lock, merges all stripes, applies decrement,
\* adds to roots array if not already present
\*
\* ### Merge Operation (mergePendingRoots)
\* - Acquires global lock (exclusive access)
\* - Sequentially acquires each stripe's lockInc and lockDec
\* - Drains all buffers, applies RC adjustments
\* - Adds decremented objects to roots array
\* - After merge: mergedRC = logicalRC (current graph state)
\*
\* ### Collection Cycle (under global lock)
\* 1. mergePendingRoots: reconcile buffered changes
\* 2. markGray: trial deletion (subtract internal edges)
\* 3. scan: rescue objects with RC >= 0 (scanBlack follows current graph)
\* 4. collectColor: free white objects (closed cycles)
\*
\* ## Safety Argument
\*
\* The collector only frees closed cycles (zero external refs). Concurrent writes
\* cannot cause "lost objects" because:
\* - Graph updates are atomic and immediately visible
\* - Mutator must hold stack ref to modify object (external ref)
\* - scanBlack follows current physical edges (rescues newly written objects)
\* - Only objects unreachable from any stack root are freed
EXTENDS Naturals, Integers, Sequences, FiniteSets, TLC
CONSTANTS NumStripes, QueueSize, RootsThreshold, Objects, Threads, ObjTypes
ASSUME NumStripes \in Nat /\ NumStripes > 0
ASSUME QueueSize \in Nat /\ QueueSize > 0
ASSUME RootsThreshold \in Nat
ASSUME IsFiniteSet(Objects)
ASSUME IsFiniteSet(Threads)
ASSUME IsFiniteSet(ObjTypes)
\* NULL constant (represents "no thread" for locks)
\* We use a sentinel value that's guaranteed not to be in Threads or Objects
NULL == "NULL" \* String literal that won't conflict with Threads/Objects
ASSUME NULL \notin Threads /\ NULL \notin Objects
\* Helper functions
\* Note: GetStripeIdx is not used, GetStripe is used instead
\* Color constants
colBlack == 0
colGray == 1
colWhite == 2
maybeCycle == 4
inRootsFlag == 8
colorMask == 3
\* State variables
VARIABLES
\* Physical heap graph (always up-to-date, atomic stores)
edges, \* edges[obj1][obj2] = TRUE if obj1.field points to obj2
\* Stack roots per thread
roots, \* roots[thread][obj] = TRUE if thread has local var pointing to obj
\* Reference counts (stored in object header)
rc, \* rc[obj] = reference count (logical, after merge)
\* Color markers (stored in object header, bits 0-2)
color, \* color[obj] \in {colBlack, colGray, colWhite}
\* Root tracking flags
inRoots, \* inRoots[obj] = TRUE if obj is in roots array
\* Striped increment queues
toIncLen, \* toIncLen[stripe] = current length of increment queue
toInc, \* toInc[stripe][i] = object to increment
\* Striped decrement queues
toDecLen, \* toDecLen[stripe] = current length of decrement queue
toDec, \* toDec[stripe][i] = (object, type) pair to decrement
\* Per-stripe locks
lockInc, \* lockInc[stripe] = thread holding increment lock (or NULL)
lockDec, \* lockDec[stripe] = thread holding decrement lock (or NULL)
\* Global lock
globalLock, \* thread holding global lock (or NULL)
\* Merged roots array (used during collection)
mergedRoots, \* sequence of (object, type) pairs
\* Collection state
collecting, \* TRUE if collection is in progress
gcEnv, \* GC environment: {touched, edges, rcSum, toFree, ...}
\* Pending operations (for modeling atomicity)
pendingWrites \* set of pending write barrier operations
\* Type invariants
TypeOK ==
/\ edges \in [Objects -> [Objects -> BOOLEAN]]
/\ roots \in [Threads -> [Objects -> BOOLEAN]]
/\ rc \in [Objects -> Int]
/\ color \in [Objects -> {colBlack, colGray, colWhite}]
/\ inRoots \in [Objects -> BOOLEAN]
/\ toIncLen \in [0..(NumStripes-1) -> 0..QueueSize]
/\ toInc \in [0..(NumStripes-1) -> Seq(Objects)]
/\ toDecLen \in [0..(NumStripes-1) -> 0..QueueSize]
/\ toDec \in [0..(NumStripes-1) -> Seq([obj: Objects, desc: ObjTypes])]
/\ lockInc \in [0..(NumStripes-1) -> Threads \cup {NULL}]
/\ lockDec \in [0..(NumStripes-1) -> Threads \cup {NULL}]
/\ globalLock \in Threads \cup {NULL}
/\ mergedRoots \in Seq([obj: Objects, desc: ObjTypes])
/\ collecting \in BOOLEAN
/\ pendingWrites \in SUBSET ([thread: Threads, dest: Objects, old: Objects \cup {NULL}, src: Objects \cup {NULL}, phase: {"store", "inc", "dec"}])
\* Helper: internal reference count (heap-to-heap edges)
InternalRC(obj) ==
Cardinality({src \in Objects : edges[src][obj]})
\* Helper: external reference count (stack roots)
ExternalRC(obj) ==
Cardinality({t \in Threads : roots[t][obj]})
\* Helper: logical reference count
LogicalRC(obj) ==
InternalRC(obj) + ExternalRC(obj)
\* Helper: get stripe index for thread
\* Map threads to stripe indices deterministically
\* Since threads are ModelValues, we use a simple deterministic mapping:
\* Assign each thread to stripe 0 (for small models, this is fine)
\* For larger models, TLC will handle the mapping deterministically
GetStripe(thread) == 0
\* ============================================================================
\* Write Barrier: nimAsgnYrc
\* ============================================================================
\* The write barrier does:
\* 1. atomicStoreN(dest, src, ATOMIC_RELEASE) -- graph update is immediate
\* 2. nimIncRefCyclic(src, true) -- buffer inc(src)
\* 3. yrcDec(tmp, desc) -- buffer dec(old)
\*
\* Key barrier semantics:
\* - ATOMIC_RELEASE on store ensures all prior writes are visible before the graph update
\* - The graph update is immediately visible to all threads (including collector)
\* - RC adjustments are buffered and only applied during merge
\* ============================================================================
\* Phase 1: Atomic Store (Topology Update)
\* ============================================================================
\* The atomic store always happens first, updating the graph topology.
\* This is independent of RC operations and never blocks.
MutatorWriteAtomicStore(thread, destObj, destField, oldVal, newVal, desc) ==
\* Atomic store with RELEASE barrier - updates graph topology immediately
\* Clear ALL edges from destObj first (atomic store replaces old value completely),
\* then set the new edge. This ensures destObj.field can only point to one object.
/\ edges' = [edges EXCEPT ![destObj] = [x \in Objects |->
IF x = newVal /\ newVal # NULL
THEN TRUE
ELSE FALSE]]
/\ UNCHANGED <<roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
\* ============================================================================
\* Phase 2: RC Buffering (if space available)
\* ============================================================================
\* Buffers increment/decrement if there's space. If overflow would happen,
\* this action is disabled (blocked) until merge can happen.
WriteBarrier(thread, destObj, destField, oldVal, newVal, desc) ==
LET stripe == GetStripe(thread)
IN
\* Determine if overflow happens for increment or decrement
/\ LET
incOverflow == (newVal # NULL) /\ (toIncLen[stripe] >= QueueSize)
decOverflow == (oldVal # NULL) /\ (toDecLen[stripe] >= QueueSize)
IN
\* Buffering: only enabled if no overflow (otherwise blocked until merge can happen)
/\ ~incOverflow \* Precondition: increment buffer has space (blocks if full)
/\ ~decOverflow \* Precondition: decrement buffer has space (blocks if full)
/\ toIncLen' = IF newVal # NULL /\ toIncLen[stripe] < QueueSize
THEN [toIncLen EXCEPT ![stripe] = toIncLen[stripe] + 1]
ELSE toIncLen
/\ toInc' = IF newVal # NULL /\ toIncLen[stripe] < QueueSize
THEN [toInc EXCEPT ![stripe] = Append(toInc[stripe], newVal)]
ELSE toInc
/\ toDecLen' = IF oldVal # NULL /\ toDecLen[stripe] < QueueSize
THEN [toDecLen EXCEPT ![stripe] = toDecLen[stripe] + 1]
ELSE toDecLen
/\ toDec' = IF oldVal # NULL /\ toDecLen[stripe] < QueueSize
THEN [toDec EXCEPT ![stripe] = Append(toDec[stripe], [obj |-> oldVal, desc |-> desc])]
ELSE toDec
/\ UNCHANGED <<edges, roots, rc, color, inRoots, mergedRoots, lockInc, lockDec, globalLock, collecting, gcEnv, pendingWrites>>
\* ============================================================================
\* Phase 3: Overflow Handling (separate actions that can block)
\* ============================================================================
\* Handle increment overflow: merge increment buffers when lock is available
\* This merges ALL increment buffers (for all stripes), not just the one that overflowed
MutatorWriteMergeInc(thread) ==
LET stripe == GetStripe(thread)
IN
/\ \E s \in 0..(NumStripes-1): toIncLen[s] >= QueueSize \* Some stripe has increment overflow
/\ globalLock = NULL \* Lock must be available (blocks if held)
/\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0]
/\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>]
/\ rc' = \* Compute RC from LogicalRC of current graph (increment buffers merged)
\* The graph is already updated by atomic store, so we compute from current edges
[x \in Objects |->
LET internalRC == Cardinality({src \in Objects : edges[src][x]})
externalRC == Cardinality({t \in Threads : roots[t][x]})
IN internalRC + externalRC]
/\ globalLock' = NULL \* Release lock after merge
/\ UNCHANGED <<edges, roots, color, inRoots, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
\* Handle decrement overflow: merge ALL buffers when lock is available
\* This calls collectCycles() which merges both increment and decrement buffers
\* We inline MergePendingRoots here. The entire withLock block is atomic:
\* lock is acquired, merge happens, lock is released.
MutatorWriteMergeDec(thread) ==
LET stripe == GetStripe(thread)
IN
/\ \E s \in 0..(NumStripes-1): toDecLen[s] >= QueueSize \* Some stripe has decrement overflow
/\ globalLock = NULL \* Lock must be available (blocks if held)
/\ \* Merge all buffers (inlined MergePendingRoots logic)
LET \* Compute new RC by merging all buffered increments and decrements
\* For each object, count buffered increments and decrements
bufferedInc == UNION {{toInc[s][i] : i \in 1..toIncLen[s]} : s \in 0..(NumStripes-1)}
bufferedDec == UNION {{toDec[s][i].obj : i \in 1..toDecLen[s]} : s \in 0..(NumStripes-1)}
\* Compute RC: current graph state (edges) + roots - buffered decrements + buffered increments
\* Actually, we compute from LogicalRC of current graph (buffers are merged)
newRC == [x \in Objects |->
LET internalRC == Cardinality({src \in Objects : edges[src][x]})
externalRC == Cardinality({t \in Threads : roots[t][x]})
IN internalRC + externalRC]
\* Collect objects from decrement buffers for mergedRoots
newRootsSet == UNION {{toDec[s][i].obj : i \in 1..toDecLen[s]} : s \in 0..(NumStripes-1)}
newRootsSeq == IF newRootsSet = {}
THEN <<>>
ELSE LET ordered == CHOOSE f \in [1..Cardinality(newRootsSet) -> newRootsSet] :
\A i, j \in DOMAIN f : i # j => f[i] # f[j]
IN [i \in 1..Cardinality(newRootsSet) |-> ordered[i]]
IN
/\ rc' = newRC
/\ mergedRoots' = mergedRoots \o newRootsSeq
/\ inRoots' = [x \in Objects |->
IF newRootsSet = {}
THEN inRoots[x]
ELSE LET rootObjs == UNION {{mergedRoots'[i].obj : i \in DOMAIN mergedRoots'}}
IN IF x \in rootObjs THEN TRUE ELSE inRoots[x]]
/\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0]
/\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>]
/\ toDecLen' = [s \in 0..(NumStripes-1) |-> 0]
/\ toDec' = [s \in 0..(NumStripes-1) |-> <<>>]
/\ globalLock' = NULL \* Lock acquired, merge done, lock released (entire withLock block is atomic)
/\ UNCHANGED <<edges, roots, color, lockInc, lockDec, collecting, gcEnv, pendingWrites>>
\* ============================================================================
\* Merge Operation: mergePendingRoots
\* ============================================================================
\* Drains all stripe buffers under global lock.
\* Sequentially acquires each stripe's lockInc and lockDec to drain buffers.
\* This reconciles buffered RC adjustments with the current graph state.
\*
\* Key invariant: After merge, mergedRC = logicalRC (current graph + buffered changes)
MergePendingRoots ==
/\ globalLock # NULL
/\ LET
\* Count pending increments per object (across all stripes)
pendingInc == [x \in Objects |->
Cardinality(UNION {{i \in DOMAIN toInc[s] : toInc[s][i] = x} :
s \in 0..(NumStripes-1)})]
\* Count pending decrements per object (across all stripes)
pendingDec == [x \in Objects |->
Cardinality(UNION {{i \in DOMAIN toDec[s] : toDec[s][i].obj = x} :
s \in 0..(NumStripes-1)})]
\* After merge, RC should equal LogicalRC (current graph state)
\* The buffered changes compensate for graph changes that already happened,
\* so: mergedRC = currentRC + pendingInc - pendingDec = LogicalRC(current graph)
\* But to ensure correctness, we compute directly from the current graph:
newRC == [x \in Objects |->
LogicalRC(x)] \* RC after merge equals logical RC of current graph
\* Add decremented objects to roots if not already there (check inRootsFlag)
\* Collect all new roots as a set, then convert to sequence
\* Build set by iterating over all (stripe, index) pairs
\* Use UNION with explicit per-stripe sets (avoiding function enumeration issues)
newRootsSet == UNION {UNION {IF inRoots[toDec[s][i].obj] = FALSE
THEN {[obj |-> toDec[s][i].obj, desc |-> toDec[s][i].desc]}
ELSE {} : i \in DOMAIN toDec[s]} : s \in 0..(NumStripes-1)}
newRootsSeq == IF newRootsSet = {}
THEN <<>>
ELSE LET ordered == CHOOSE f \in [1..Cardinality(newRootsSet) -> newRootsSet] :
\A i, j \in DOMAIN f : i # j => f[i] # f[j]
IN [i \in 1..Cardinality(newRootsSet) |-> ordered[i]]
IN
/\ rc' = newRC
/\ mergedRoots' = mergedRoots \o newRootsSeq \* Append new roots to sequence
/\ \* Update inRoots: mark objects in mergedRoots' as being in roots
\* Use explicit iteration to avoid enumeration issues
inRoots' = [x \in Objects |->
IF mergedRoots' = <<>>
THEN inRoots[x]
ELSE LET rootObjs == UNION {{mergedRoots'[i].obj : i \in DOMAIN mergedRoots'}}
IN IF x \in rootObjs THEN TRUE ELSE inRoots[x]]
/\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0]
/\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>]
/\ toDecLen' = [s \in 0..(NumStripes-1) |-> 0]
/\ toDec' = [s \in 0..(NumStripes-1) |-> <<>>]
/\ UNCHANGED <<edges, roots, color, lockInc, lockDec, globalLock, collecting, gcEnv, pendingWrites>>
\* ============================================================================
\* Trial Deletion: markGray
\* ============================================================================
\* Subtracts internal (heap-to-heap) edges from reference counts.
\* This isolates external references (stack roots).
\*
\* Algorithm:
\* 1. Mark obj gray
\* 2. Trace obj's fields (via traceImpl)
\* 3. For each child c: decrement c.rc (subtract internal edge)
\* 4. Recursively markGray all children
\*
\* After markGray: trialRC(obj) = mergedRC(obj) - internalRefCount(obj)
\* = externalRefCount(obj) (if merge was correct)
MarkGray(obj, desc) ==
/\ globalLock # NULL
/\ collecting = TRUE
/\ color[obj] # colGray
/\ \* Compute transitive closure of all objects reachable from obj
\* This models the recursive traversal in the actual implementation
LET children == {c \in Objects : edges[obj][c]}
\* Compute all objects reachable from obj via heap edges
\* This is the transitive closure starting from obj's direct children
allReachable == {c \in Objects :
\E path \in Seq(Objects):
Len(path) > 0 /\
path[1] \in children /\
path[Len(path)] = c /\
\A i \in 1..(Len(path)-1):
edges[path[i]][path[i+1]]}
\* All objects to mark gray: obj itself + all reachable descendants
objectsToMarkGray == {obj} \cup allReachable
\* For each reachable object, count internal edges pointing to it
\* from within the subgraph (obj + allReachable)
\* This is the number of times its RC should be decremented
subgraph == {obj} \cup allReachable
internalEdgeCount == [x \in Objects |->
IF x \in allReachable
THEN Cardinality({y \in subgraph : edges[y][x]})
ELSE 0]
IN
/\ \* Mark obj and all reachable objects gray
color' = [x \in Objects |->
IF x \in objectsToMarkGray THEN colGray ELSE color[x]]
/\ \* Subtract internal edges: for each reachable object, decrement its RC
\* by the number of internal edges pointing to it from within the subgraph.
\* This matches the Nim implementation which decrements once per edge traversed.
\* Note: obj's RC is not decremented here (it has no parent in this subgraph).
\* For roots, the RC includes external refs which survive trial deletion.
rc' = [x \in Objects |->
IF x \in allReachable THEN rc[x] - internalEdgeCount[x] ELSE rc[x]]
/\ UNCHANGED <<edges, roots, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
\* ============================================================================
\* Scan Phase
\* ============================================================================
\* Objects with RC >= 0 after trial deletion are rescued (scanBlack).
\* Objects with RC < 0 remain white (part of closed cycle).
\*
\* Key insight: scanBlack follows the *current* physical edges (which may have
\* changed since merge due to concurrent writes). This ensures objects written
\* during collection are still rescued.
\*
\* Algorithm:
\* IF rc[obj] >= 0:
\* scanBlack(obj): mark black, restore RC, trace and rescue all children
\* ELSE:
\* mark white (closed cycle with zero external refs)
Scan(obj, desc) ==
/\ globalLock # NULL
/\ collecting = TRUE
/\ color[obj] = colGray
/\ IF rc[obj] >= 0
THEN \* scanBlack: rescue obj and all reachable objects
\* This follows the current physical graph (atomic stores are visible)
\* Restore RC for all reachable objects by incrementing by the number of
\* internal edges pointing to each (matching what markGray subtracted)
LET children == {c \in Objects : edges[obj][c]}
allReachable == {c \in Objects :
\E path \in Seq(Objects):
Len(path) > 0 /\
path[1] \in children /\
path[Len(path)] = c /\
\A i \in 1..(Len(path)-1):
edges[path[i]][path[i+1]]}
objectsToMarkBlack == {obj} \cup allReachable
\* For each reachable object, count internal edges pointing to it
\* from within the subgraph (obj + allReachable)
\* This is the number of times its RC should be incremented (restored)
subgraph == {obj} \cup allReachable
internalEdgeCount == [x \in Objects |->
IF x \in allReachable
THEN Cardinality({y \in subgraph : edges[y][x]})
ELSE 0]
IN
/\ \* Restore RC: increment by the number of internal edges pointing to each
\* reachable object. This restores what markGray subtracted.
\* Note: obj's RC is not incremented here (it wasn't decremented in markGray).
\* The root's RC already reflects external refs which survived trial deletion.
rc' = [x \in Objects |->
IF x \in allReachable THEN rc[x] + internalEdgeCount[x] ELSE rc[x]]
/\ \* Mark obj and all reachable objects black in one assignment
color' = [x \in Objects |->
IF x \in objectsToMarkBlack THEN colBlack ELSE color[x]]
ELSE \* Mark white (part of closed cycle)
/\ color' = [color EXCEPT ![obj] = colWhite]
/\ UNCHANGED <<rc>>
/\ UNCHANGED <<edges, roots, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
\* ============================================================================
\* Collection Phase: collectColor
\* ============================================================================
\* Frees objects of the target color that are not in roots.
\*
\* Safety: Only objects with color = targetColor AND ~inRoots[obj] are freed.
\* These are closed cycles (zero external refs, not reachable from roots).
CollectColor(obj, desc, targetColor) ==
/\ globalLock # NULL
/\ collecting = TRUE
/\ color[obj] = targetColor
/\ ~inRoots[obj]
/\ \* Free obj: nullify all its outgoing edges (prevents use-after-free)
\* In the actual implementation, this happens during trace() when freeing
edges' = [edges EXCEPT ![obj] = [x \in Objects |->
IF x = obj THEN FALSE ELSE edges[obj][x]]]
/\ color' = [color EXCEPT ![obj] = colBlack] \* Mark as freed
/\ UNCHANGED <<roots, rc, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
\* ============================================================================
\* Collection Cycle: collectCyclesBacon
\* ============================================================================
StartCollection ==
/\ globalLock # NULL
/\ ~collecting
/\ Len(mergedRoots) >= RootsThreshold
/\ collecting' = TRUE
/\ gcEnv' = [touched |-> 0, edges |-> 0, rcSum |-> 0, toFree |-> {}]
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, pendingWrites>>
EndCollection ==
/\ globalLock # NULL
/\ collecting = TRUE
/\ \* Clear root flags
inRoots' = [x \in Objects |->
IF x \in {r.obj : r \in mergedRoots} THEN FALSE ELSE inRoots[x]]
/\ mergedRoots' = <<>>
/\ collecting' = FALSE
/\ UNCHANGED <<edges, roots, rc, color, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, gcEnv, pendingWrites>>
\* ============================================================================
\* Mutator Actions
\* ============================================================================
\* Mutator can write at any time (graph updates are lock-free)
\* The ATOMIC_RELEASE barrier ensures proper ordering
\* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always
\* matches the current graph state (as read before the atomic store).
\* This prevents races at the user level - the GC itself is lock-free.
MutatorWrite(thread, destObj, destField, oldVal, newVal, desc) ==
\* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always matches
\* the value read before the atomic store. This prevents races at the user level.
\* The precondition is enforced in the Next relation.
\* Phase 1: Atomic store (topology update) - ALWAYS happens first
/\ MutatorWriteAtomicStore(thread, destObj, destField, oldVal, newVal, desc)
\* Phase 2: RC buffering - happens if no overflow, otherwise overflow is handled separately
\* Note: In reality, if overflow happens, the thread blocks waiting for lock.
\* We model this as: atomic store happens, buffering is deferred (handled by merge actions).
/\ LET stripe == GetStripe(thread)
incOverflow == (newVal # NULL) /\ (toIncLen[stripe] >= QueueSize)
decOverflow == (oldVal # NULL) /\ (toDecLen[stripe] >= QueueSize)
IN
IF incOverflow \/ decOverflow
THEN \* Overflow: atomic store happened, but buffering is deferred
\* Buffers stay full, merge will happen when lock is available (via MutatorWriteMergeInc/Dec)
/\ UNCHANGED <<roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
ELSE \* No overflow: buffer normally
/\ WriteBarrier(thread, destObj, destField, oldVal, newVal, desc)
/\ UNCHANGED <<roots, collecting, pendingWrites>>
\* Stack root assignment: immediate RC increment (not buffered)
\* When assigning val to a root variable named obj, we set roots[thread][val] = TRUE
\* to indicate that thread has a stack reference to val
\* Semantics: obj is root variable name, val is the object being assigned
\* When val=NULL, obj was the old root value, so we decrement rc[obj]
MutatorRootAssign(thread, obj, val) ==
/\ IF val # NULL
THEN /\ roots' = [roots EXCEPT ![thread][val] = TRUE]
/\ rc' = [rc EXCEPT ![val] = IF roots[thread][val] THEN @ ELSE @ + 1] \* Increment only if not already a root
ELSE /\ roots' = [roots EXCEPT ![thread][obj] = FALSE] \* Clear root when assigning NULL
/\ rc' = [rc EXCEPT ![obj] = IF roots[thread][obj] THEN @ - 1 ELSE @] \* Decrement old root value
/\ edges' = edges
/\ color' = color
/\ inRoots' = inRoots
/\ toIncLen' = toIncLen
/\ toInc' = toInc
/\ toDecLen' = toDecLen
/\ toDec' = toDec
/\ lockInc' = lockInc
/\ lockDec' = lockDec
/\ globalLock' = globalLock
/\ mergedRoots' = mergedRoots
/\ collecting' = collecting
/\ gcEnv' = gcEnv
/\ pendingWrites' = pendingWrites
\* ============================================================================
\* Collector Actions
\* ============================================================================
\* Collector acquires global lock for entire collection cycle
CollectorAcquireLock(thread) ==
/\ globalLock = NULL
/\ globalLock' = thread
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
CollectorMerge ==
/\ globalLock # NULL
/\ MergePendingRoots
CollectorStart ==
/\ globalLock # NULL
/\ StartCollection
\* Mark all roots gray (trial deletion phase)
CollectorMarkGray ==
/\ globalLock # NULL
/\ collecting = TRUE
/\ \E rootIdx \in DOMAIN mergedRoots:
LET root == mergedRoots[rootIdx]
IN MarkGray(root.obj, root.desc)
\* Scan all roots (rescue phase)
CollectorScan ==
/\ globalLock # NULL
/\ collecting = TRUE
/\ \E rootIdx \in DOMAIN mergedRoots:
LET root == mergedRoots[rootIdx]
IN Scan(root.obj, root.desc)
\* Collect white/gray objects (free phase)
CollectorCollect ==
/\ globalLock # NULL
/\ collecting = TRUE
/\ \E rootIdx \in DOMAIN mergedRoots, targetColor \in {colGray, colWhite}:
LET root == mergedRoots[rootIdx]
IN CollectColor(root.obj, root.desc, targetColor)
CollectorEnd ==
/\ globalLock # NULL
/\ EndCollection
CollectorReleaseLock(thread) ==
/\ globalLock = thread
/\ globalLock' = NULL
/\ UNCHANGED <<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, mergedRoots, collecting, gcEnv, pendingWrites>>
\* ============================================================================
\* Next State Relation
\* ============================================================================
Next ==
\/ \E thread \in Threads:
\E destObj \in Objects, oldVal, newVal \in Objects \cup {NULL}, desc \in ObjTypes:
\* Precondition: oldVal must match current graph state (user-level synchronization)
\* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always matches
\* the value read before the atomic store. This prevents races at the user level.
/\ LET oldValMatches == CASE oldVal = NULL -> TRUE
[] oldVal \in Objects -> edges[destObj][oldVal]
[] OTHER -> FALSE
IN oldValMatches
/\ MutatorWrite(thread, destObj, "field", oldVal, newVal, desc)
\/ \E thread \in Threads:
\* Handle increment overflow: merge increment buffers when lock becomes available
MutatorWriteMergeInc(thread)
\/ \E thread \in Threads:
\* Handle decrement overflow: merge all buffers when lock becomes available
MutatorWriteMergeDec(thread)
\/ \E thread \in Threads:
\E obj, val \in Objects \cup {NULL}:
MutatorRootAssign(thread, obj, val)
\/ \E thread \in Threads:
CollectorAcquireLock(thread)
\/ CollectorMerge
\/ CollectorStart
\/ CollectorMarkGray
\/ CollectorScan
\/ CollectorCollect
\/ CollectorEnd
\/ \E thread \in Threads:
CollectorReleaseLock(thread)
\* ============================================================================
\* Initial State
\* ============================================================================
Init ==
/\ edges = [x \in Objects |->
[y \in Objects |->
IF x = y THEN FALSE ELSE FALSE]] \* Empty graph initially
/\ roots = [t \in Threads |->
[x \in Objects |->
FALSE]] \* No stack roots initially
/\ rc = [x \in Objects |->
0] \* Zero reference counts
/\ color = [x \in Objects |->
colBlack] \* All objects black initially
/\ inRoots = [x \in Objects |->
FALSE] \* No objects in roots array
/\ toIncLen = [s \in 0..(NumStripes-1) |->
0]
/\ toInc = [s \in 0..(NumStripes-1) |->
<<>>]
/\ toDecLen = [s \in 0..(NumStripes-1) |->
0]
/\ toDec = [s \in 0..(NumStripes-1) |->
<<>>]
/\ lockInc = [s \in 0..(NumStripes-1) |->
NULL]
/\ lockDec = [s \in 0..(NumStripes-1) |->
NULL]
/\ globalLock = NULL
/\ mergedRoots = <<>>
/\ collecting = FALSE
/\ gcEnv = [touched |-> 0, edges |-> 0, rcSum |-> 0, toFree |-> {}]
/\ pendingWrites = {}
/\ TypeOK
\* ============================================================================
\* Safety Properties
\* ============================================================================
\* Safety: Objects are only freed if they are unreachable from any thread's stack
\*
\* An object is reachable if:
\* - It is a direct stack root (roots[t][obj] = TRUE), OR
\* - There exists a path from a stack root to obj via heap edges
\*
\* Safety guarantee: If an object is reachable, then:
\* - It is not white (not marked for collection), OR
\* - It is in roots array (protected from collection), OR
\* - It is reachable from an object that will be rescued by scanBlack
\*
\* More precisely: Only closed cycles (zero external refs, unreachable) are freed.
\* Helper: Compute next set of reachable objects (one step of transitive closure)
ReachableStep(current) ==
current \cup UNION {{y \in Objects : edges[x][y]} : x \in current}
\* Compute the set of all reachable objects using bounded iteration
\* Since Objects is finite, we iterate at most Cardinality(Objects) times
\* This computes the transitive closure of edges starting from stack roots
\* We unroll the iteration explicitly to avoid recursion issues with TLC
ReachableSet ==
LET StackRoots == {x \in Objects : \E t \in Threads : roots[t][x]}
Step1 == ReachableStep(StackRoots)
Step2 == ReachableStep(Step1)
Step3 == ReachableStep(Step2)
Step4 == ReachableStep(Step3)
\* Add more steps if needed for larger object sets
\* For small models (2 objects), 4 steps is sufficient
IN Step4
\* Check if an object is reachable
Reachable(obj) == obj \in ReachableSet
\* Helper: Check if there's a path from 'from' to 'to'
\* For small object sets, we check all possible paths by checking
\* all combinations of intermediate objects
\* Path of length 0: from = to
\* Path of length 1: edges[from][to]
\* Path of length 2: \E i1: edges[from][i1] /\ edges[i1][to]
\* Path of length 3: \E i1, i2: edges[from][i1] /\ edges[i1][i2] /\ edges[i2][to]
\* etc. up to Cardinality(Objects)
HasPath(from, to) ==
\/ from = to
\/ edges[from][to]
\/ \E i1 \in Objects:
edges[from][i1] /\ (edges[i1][to] \/ \E i2 \in Objects:
edges[i1][i2] /\ (edges[i2][to] \/ \E i3 \in Objects:
edges[i2][i3] /\ edges[i3][to]))
\* Helper: Compute set of objects reachable from a given starting object
\* Uses the same iterative approach as ReachableSet
ReachableFrom(start) ==
LET Step1 == ReachableStep({start})
Step2 == ReachableStep(Step1)
Step3 == ReachableStep(Step2)
Step4 == ReachableStep(Step3)
IN Step4
\* Safety: Reachable objects are never freed (remain white without being collected)
\* A reachable object is safe if:
\* - It's not white (not marked for collection), OR
\* - It's in roots array (protected from collection), OR
\* - There exists a black object in ReachableSet such that obj is reachable from it
\* (the black object will be rescued by scanBlack, which rescues all white objects
\* reachable from black objects)
Safety ==
\A obj \in Objects:
IF obj \in ReachableSet
THEN \/ color[obj] # colWhite \* Not marked for collection
\/ inRoots[obj] \* Protected in roots array
\/ \E blackObj \in ReachableSet:
/\ color[blackObj] = colBlack \* Black object will be rescued by scanBlack
/\ obj \in ReachableFrom(blackObj) \* obj is reachable from blackObj
ELSE TRUE \* Unreachable objects may be freed (this is safe)
\* Invariant: Reference counts match logical counts after merge
\* (This is maintained by MergePendingRoots)
\* Note: Between merge and collection, RC = logicalRC.
\* During collection (after markGray), RC may be modified by trial deletion.
\* RC may be inconsistent when:
\* - globalLock = NULL (buffered changes pending)
\* - globalLock # NULL but merge hasn't happened yet (buffers still have pending changes)
\* RC must equal LogicalRC when:
\* - After merge (buffers are empty) and before collection starts
RCInvariant ==
IF globalLock = NULL
THEN TRUE \* Not in collection, RC may be inconsistent (buffered changes pending)
ELSE IF collecting = FALSE /\ \A s \in 0..(NumStripes-1): toIncLen[s] = 0 /\ toDecLen[s] = 0
THEN \A obj \in Objects: rc[obj] = LogicalRC(obj) \* After merge, buffers empty, RC = logical RC
ELSE TRUE \* During collection or before merge, RC may differ from logicalRC
\* Invariant: Only closed cycles are collected
\* (Objects with external refs are rescued by scanBlack)
CycleInvariant ==
\A obj \in Objects:
IF color[obj] = colWhite /\ ~inRoots[obj]
THEN ExternalRC(obj) = 0
ELSE TRUE
\* ============================================================================
\* Specification
\* ============================================================================
Spec == Init /\ [][Next]_<<edges, roots, rc, color, inRoots, toIncLen, toInc, toDecLen, toDec, lockInc, lockDec, globalLock, mergedRoots, collecting, gcEnv, pendingWrites>>
THEOREM Spec => []Safety
THEOREM Spec => []RCInvariant
THEOREM Spec => []CycleInvariant
====

View File

@@ -0,0 +1,98 @@
discard """
cmd: "nim c --mm:yrc -d:useMalloc --threads:on $file"
output: "ok"
valgrind: "leaks"
disabled: "windows"
disabled: "freebsd"
disabled: "openbsd"
"""
# Test concurrent traversal and mutation of a shared cyclic list under YRC.
# Multiple threads race to replace nodes using a lock for synchronization.
# This exercises YRC's write barrier and cycle collection under contention.
import std/locks
type
Node = ref object
value: int
next: Node
proc newCycle(start, count: int): Node =
result = Node(value: start)
var cur = result
for i in 1..<count:
cur.next = Node(value: start + i)
cur = cur.next
cur.next = result # close the cycle
proc sumCycle(head: Node; count: int): int =
var cur = head
for i in 0..<count:
result += cur.value
cur = cur.next
const
NumThreads = 4
CycleLen = 6
Iterations = 50
var
shared: Node
sharedLock: Lock
threads: array[NumThreads, Thread[int]]
wins: array[NumThreads, int]
proc worker(id: int) {.thread.} =
{.cast(gcsafe).}:
for iter in 0..<Iterations:
# Under the lock, walk the shared list and replace a node's next pointer
withLock sharedLock:
var cur = shared
if cur == nil: continue
for step in 0..<CycleLen:
let nxt = cur.next
if nxt == nil: break
# Replace cur.next with a fresh node that points to nxt.next
let replacement = Node(value: id * 1000 + iter, next: nxt.next)
cur.next = replacement
wins[id] += 1
cur = cur.next
if cur == nil: break
# Outside the lock, create a local cycle to exercise the collector
let local = newCycle(id * 100 + iter, 3)
discard sumCycle(local, 3)
# Create initial shared cyclic list: 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 0
initLock(sharedLock)
shared = newCycle(0, CycleLen)
for i in 0..<NumThreads:
createThread(threads[i], worker, i)
for i in 0..<NumThreads:
joinThread(threads[i])
# Verify: the list is still traversable (no crashes, no dangling pointers).
var totalWins = 0
for i in 0..<NumThreads:
totalWins += wins[i]
# Walk the list to verify it's still a valid cycle (or chain)
var cur = shared
var seen = 0
var maxSteps = CycleLen * 3 # generous bound
while cur != nil and seen < maxSteps:
seen += 1
cur = cur.next
if cur == shared: break # completed the cycle
shared = nil
GC_fullCollect()
deinitLock(sharedLock)
if totalWins > 0 and seen > 0:
echo "ok"
else:
echo "FAIL: wins=", totalWins, " seen=", seen

View File

@@ -0,0 +1,74 @@
discard """
cmd: "nim c --mm:yrc -d:useMalloc --threads:on $file"
output: "ok"
valgrind: "leaks"
disabled: "windows"
disabled: "freebsd"
disabled: "openbsd"
"""
# Test sharing a cyclic list between threads under YRC.
type
Node = ref object
value: int
next: Node
proc newCycle(start, count: int): Node =
## Create a cyclic linked list: start -> start+1 -> ... -> start+count-1 -> start
result = Node(value: start)
var cur = result
for i in 1..<count:
cur.next = Node(value: start + i)
cur = cur.next
cur.next = result # close the cycle
proc sumCycle(head: Node; count: int): int =
var cur = head
for i in 0..<count:
result += cur.value
cur = cur.next
const
NumThreads = 4
NodesPerCycle = 5
var
shared: Node
threads: array[NumThreads, Thread[int]]
results: array[NumThreads, int]
proc worker(id: int) {.thread.} =
# Each thread reads the shared cycle and computes a sum.
# Also creates its own local cycle to exercise the collector.
{.cast(gcsafe).}:
let local = newCycle(id * 100, NodesPerCycle)
let localSum = sumCycle(local, NodesPerCycle)
let sharedSum = sumCycle(shared, NodesPerCycle)
results[id] = sharedSum + localSum
# Create a shared cyclic list: 0 -> 1 -> 2 -> 3 -> 4 -> 0
shared = newCycle(0, NodesPerCycle)
let expectedSharedSum = 0 + 1 + 2 + 3 + 4 # = 10
for i in 0..<NumThreads:
createThread(threads[i], worker, i)
for i in 0..<NumThreads:
joinThread(threads[i])
var allOk = true
for i in 0..<NumThreads:
let expectedLocal = i * 100 * NodesPerCycle + (NodesPerCycle * (NodesPerCycle - 1) div 2)
# sum of id*100, id*100+1, ..., id*100+4
let expected = expectedSharedSum + expectedLocal
if results[i] != expected:
echo "FAIL thread ", i, ": got ", results[i], " expected ", expected
allOk = false
shared = nil # drop the shared cycle, collector should reclaim it
GC_fullCollect()
if allOk:
echo "ok"