diff --git a/compiler/ccgliterals.nim b/compiler/ccgliterals.nim index 069ed48df7..a1ad3ae047 100644 --- a/compiler/ccgliterals.nim +++ b/compiler/ccgliterals.nim @@ -16,7 +16,7 @@ ## implementation. template detectVersion(field, corename) = - if m.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc, gcHooks}: + if m.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc, gcHooks}: result = 2 else: result = 1 diff --git a/compiler/ccgtypes.nim b/compiler/ccgtypes.nim index 11fe701c18..98b9ab9a60 100644 --- a/compiler/ccgtypes.nim +++ b/compiler/ccgtypes.nim @@ -277,7 +277,7 @@ proc isInvalidReturnType(conf: ConfigRef; typ: PType, isProc = true): bool = of ctStruct: let t = skipTypes(rettype, typedescInst) if rettype.isImportedCppType or t.isImportedCppType or - (typ.callConv == ccCDecl and conf.selectedGC in {gcArc, gcAtomicArc, gcOrc}): + (typ.callConv == ccCDecl and conf.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}): # prevents nrvo for cdecl procs; # bug #23401 result = false else: @@ -1692,7 +1692,7 @@ proc genHook(m: BModule; t: PType; info: TLineInfo; op: TTypeAttachedOp; result: echo "ayclic but has this =trace ", t, " ", theProc.ast else: when false: - if op == attachedTrace and m.config.selectedGC == gcOrc and + if op == attachedTrace and m.config.selectedGC in {gcOrc, gcYrc} and containsGarbageCollectedRef(t): # unfortunately this check is wrong for an object type that only contains # .cursor fields like 'Node' inside 'cycleleak'. diff --git a/compiler/commands.nim b/compiler/commands.nim index 9aa66f7887..3d2aabdc03 100644 --- a/compiler/commands.nim +++ b/compiler/commands.nim @@ -245,7 +245,7 @@ proc processCompile(conf: ConfigRef; filename: string) = extccomp.addExternalFileToCompile(conf, found) const - errNoneBoehmRefcExpectedButXFound = "'arc', 'orc', 'atomicArc', 'markAndSweep', 'boehm', 'go', 'none', 'regions', or 'refc' expected, but '$1' found" + errNoneBoehmRefcExpectedButXFound = "'arc', 'orc', 'yrc', 'atomicArc', 'markAndSweep', 'boehm', 'go', 'none', 'regions', or 'refc' expected, but '$1' found" errNoneSpeedOrSizeExpectedButXFound = "'none', 'speed' or 'size' expected, but '$1' found" errGuiConsoleOrLibExpectedButXFound = "'gui', 'console', 'lib' or 'staticlib' expected, but '$1' found" errInvalidExceptionSystem = "'goto', 'setjmp', 'cpp' or 'quirky' expected, but '$1' found" @@ -266,6 +266,7 @@ proc testCompileOptionArg*(conf: ConfigRef; switch, arg: string, info: TLineInfo of "markandsweep": result = conf.selectedGC == gcMarkAndSweep of "destructors", "arc": result = conf.selectedGC == gcArc of "orc": result = conf.selectedGC == gcOrc + of "yrc": result = conf.selectedGC == gcYrc of "hooks": result = conf.selectedGC == gcHooks of "go": result = conf.selectedGC == gcGo of "none": result = conf.selectedGC == gcNone @@ -570,6 +571,7 @@ proc unregisterArcOrc*(conf: ConfigRef) = undefSymbol(conf.symbols, "gcdestructors") undefSymbol(conf.symbols, "gcarc") undefSymbol(conf.symbols, "gcorc") + undefSymbol(conf.symbols, "gcyrc") undefSymbol(conf.symbols, "gcatomicarc") undefSymbol(conf.symbols, "nimSeqsV2") undefSymbol(conf.symbols, "nimV2") @@ -603,6 +605,10 @@ proc processMemoryManagementOption(switch, arg: string, pass: TCmdLinePass, conf.selectedGC = gcOrc defineSymbol(conf.symbols, "gcorc") registerArcOrc(pass, conf) + of "yrc": + conf.selectedGC = gcYrc + defineSymbol(conf.symbols, "gcyrc") + registerArcOrc(pass, conf) of "atomicarc": conf.selectedGC = gcAtomicArc defineSymbol(conf.symbols, "gcatomicarc") diff --git a/compiler/injectdestructors.nim b/compiler/injectdestructors.nim index e6ddf79a8a..f23e6ed04d 100644 --- a/compiler/injectdestructors.nim +++ b/compiler/injectdestructors.nim @@ -69,7 +69,7 @@ proc hasDestructor(c: Con; t: PType): bool {.inline.} = result = ast.hasDestructor(t) when toDebug.len > 0: # for more effective debugging - if not result and c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}: + if not result and c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}: assert(not containsGarbageCollectedRef(t)) proc getTemp(c: var Con; s: var Scope; typ: PType; info: TLineInfo): PNode = @@ -165,7 +165,7 @@ proc isLastReadImpl(n: PNode; c: var Con; scope: var Scope): bool = template hasDestructorOrAsgn(c: var Con, typ: PType): bool = # bug #23354; an object type could have a non-trivial assignements when it is passed to a sink parameter - hasDestructor(c, typ) or (c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and + hasDestructor(c, typ) or (c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and typ.kind == tyObject and not isTrivial(getAttachedOp(c.graph, typ, attachedAsgn))) proc isLastRead(n: PNode; c: var Con; s: var Scope): bool = @@ -329,14 +329,14 @@ proc isCriticalLink(dest: PNode): bool {.inline.} = result = dest.kind != nkSym proc finishCopy(c: var Con; result, dest: PNode; flags: set[MoveOrCopyFlag]; isFromSink: bool) = - if c.graph.config.selectedGC == gcOrc and IsExplicitSink notin flags: + if c.graph.config.selectedGC in {gcOrc, gcYrc} and IsExplicitSink notin flags: # add cyclic flag, but not to sink calls, which IsExplicitSink generates let t = dest.typ.skipTypes(tyUserTypeClasses + {tyGenericInst, tyAlias, tySink, tyDistinct}) if cyclicType(c.graph, t): result.add boolLit(c.graph, result.info, isFromSink or isCriticalLink(dest)) proc genMarkCyclic(c: var Con; result, dest: PNode) = - if c.graph.config.selectedGC == gcOrc: + if c.graph.config.selectedGC in {gcOrc, gcYrc}: let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct}) if cyclicType(c.graph, t): if t.kind == tyRef: @@ -495,7 +495,7 @@ proc passCopyToSink(n: PNode; c: var Con; s: var Scope): PNode = localError(c.graph.config, n.info, errFailedMove, ("cannot move '$1', passing '$1' to a sink parameter introduces an implicit copy") % $n) else: - if c.graph.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}: + if c.graph.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}: assert(not containsManagedMemory(nTyp)) if nTyp.skipTypes(abstractInst).kind in {tyOpenArray, tyVarargs}: localError(c.graph.config, n.info, "cannot create an implicit openArray copy to be passed to a sink parameter") @@ -926,7 +926,7 @@ proc p(n: PNode; c: var Con; s: var Scope; mode: ProcessMode; tmpFlags = {sfSing if n[0].kind == nkSym and n[0].sym.magic in {mNew, mNewFinalize}: result[0] = copyTree(n[0]) - if c.graph.config.selectedGC in {gcHooks, gcArc, gcAtomicArc, gcOrc}: + if c.graph.config.selectedGC in {gcHooks, gcArc, gcAtomicArc, gcOrc, gcYrc}: let destroyOld = c.genDestroy(result[1]) result = newTree(nkStmtList, destroyOld, result) else: diff --git a/compiler/liftdestructors.nim b/compiler/liftdestructors.nim index 24badb364a..1459a05da3 100644 --- a/compiler/liftdestructors.nim +++ b/compiler/liftdestructors.nim @@ -163,7 +163,7 @@ proc fillBodyObj(c: var TLiftCtx; n, body, x, y: PNode; enforceDefaultOp: bool, if c.filterDiscriminator != nil: return let f = n.sym let b = if c.kind == attachedTrace: y else: y.dotField(f) - if (sfCursor in f.flags and c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcHooks}) or + if (sfCursor in f.flags and c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc, gcHooks}) or enforceDefaultOp: defaultOp(c, f.typ, body, x.dotField(f), b) else: @@ -730,14 +730,43 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = dest[] = source decRef tmp + For YRC the write barrier is more complicated still and must be: + + let tmp = dest + # assignment must come first so that the collector sees the most-recent graph: + atomic: dest[] = source + # Then teach the cycle collector about the changes edge (these use locks, see yrc.nim): + incRef source + decRef tmp + + This is implemented as a single runtime call (nimAsgnYrc / nimSinkYrc). ]# var actions = newNodeI(nkStmtList, c.info) let elemType = t.elementType createTypeBoundOps(c.g, c.c, elemType, c.info, c.idgen) - let isCyclic = c.g.config.selectedGC == gcOrc and types.canFormAcycle(c.g, elemType) - let isInheritableAcyclicRef = c.g.config.selectedGC == gcOrc and + # YRC uses dedicated runtime procs for the entire write barrier: + if c.g.config.selectedGC == gcYrc: + let desc = + if isFinal(elemType): + let ti = genBuiltin(c, mGetTypeInfoV2, "getTypeInfoV2", newNodeIT(nkType, x.info, elemType)) + ti.typ = getSysType(c.g, c.info, tyPointer) + ti + else: + newNodeIT(nkNilLit, c.info, getSysType(c.g, c.info, tyPointer)) + case c.kind + of attachedAsgn, attachedDup: + body.add callCodegenProc(c.g, "nimAsgnYrc", c.info, genAddr(c, x), y, desc) + return + of attachedSink: + body.add callCodegenProc(c.g, "nimSinkYrc", c.info, genAddr(c, x), y, desc) + return + else: discard # fall through for destructor, trace, wasMoved + + let isCyclic = c.g.config.selectedGC in {gcOrc, gcYrc} and types.canFormAcycle(c.g, elemType) + + let isInheritableAcyclicRef = c.g.config.selectedGC in {gcOrc, gcYrc} and (not isPureObject(elemType)) and tfAcyclic in skipTypes(elemType, abstractInst+{tyOwned}-{tyTypeDesc}).flags # dynamic Acyclic refs need to use dyn decRef @@ -819,7 +848,26 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = let xenv = genBuiltin(c, mAccessEnv, "accessEnv", x) xenv.typ = getSysType(c.g, c.info, tyPointer) - let isCyclic = c.g.config.selectedGC == gcOrc + # Closures are (fnPtr, env) pairs. nimAsgnYrc/nimSinkYrc handle the env pointer + # (atomic store + buffered inc/dec). We also need newAsgnStmt to copy the fnPtr. + if c.g.config.selectedGC == gcYrc: + let nilDesc = newNodeIT(nkNilLit, c.info, getSysType(c.g, c.info, tyPointer)) + let yenv = genBuiltin(c, mAccessEnv, "accessEnv", y) + yenv.typ = getSysType(c.g, c.info, tyPointer) + case c.kind + of attachedAsgn, attachedDup: + # nimAsgnYrc: save old env, atomic store new env, inc new env, dec old env + body.add callCodegenProc(c.g, "nimAsgnYrc", c.info, genAddr(c, xenv), yenv, nilDesc) + # Raw struct copy to also update the function pointer (env write is redundant but benign) + body.add newAsgnStmt(x, y) + return + of attachedSink: + body.add callCodegenProc(c.g, "nimSinkYrc", c.info, genAddr(c, xenv), yenv, nilDesc) + body.add newAsgnStmt(x, y) + return + else: discard # fall through for destructor, trace, wasMoved + + let isCyclic = c.g.config.selectedGC in {gcOrc, gcYrc} let tmp = if isCyclic and c.kind in {attachedAsgn, attachedSink, attachedDup}: declareTempOf(c, body, xenv) @@ -852,7 +900,6 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = body.add genIf(c, cond, actions) else: body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRef", c.info, yenv)) - body.add genIf(c, cond, actions) body.add newAsgnStmt(x, y) of attachedDup: @@ -937,7 +984,7 @@ proc closureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = call[1] = y body.add newAsgnStmt(x, call) elif (optOwnedRefs in c.g.config.globalOptions and - optRefCheck in c.g.config.options) or c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc}: + optRefCheck in c.g.config.options) or c.g.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc}: let xx = genBuiltin(c, mAccessEnv, "accessEnv", x) xx.typ = getSysType(c.g, c.info, tyPointer) case c.kind @@ -992,7 +1039,7 @@ proc fillBody(c: var TLiftCtx; t: PType; body, x, y: PNode) = tyPtr, tyUncheckedArray, tyVar, tyLent: defaultOp(c, t, body, x, y) of tyRef: - if c.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}: + if c.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}: atomicRefOp(c, t, body, x, y) elif (optOwnedRefs in c.g.config.globalOptions and optRefCheck in c.g.config.options): @@ -1001,7 +1048,7 @@ proc fillBody(c: var TLiftCtx; t: PType; body, x, y: PNode) = defaultOp(c, t, body, x, y) of tyProc: if t.callConv == ccClosure: - if c.g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc}: + if c.g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}: atomicClosureOp(c, t, body, x, y) else: closureOp(c, t, body, x, y) @@ -1125,7 +1172,7 @@ proc symDupPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttache result.typ.addParam src - if g.config.selectedGC == gcOrc and + if g.config.selectedGC in {gcOrc, gcYrc} and cyclicType(g, typ.skipTypes(abstractInst)): let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"), idgen, result, info) @@ -1152,7 +1199,7 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp let src = newSym(skParam, getIdent(g.cache, if kind == attachedTrace: "env" else: "src"), idgen, result, info) - if kind == attachedDestructor and g.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and + if kind == attachedDestructor and g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and ((g.config.isDefined("nimPreviewNonVarDestructor") and not isDiscriminant) or (typ.kind in {tyRef, tyString, tySequence})): dest.typ = typ else: @@ -1168,7 +1215,7 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp if kind notin {attachedDestructor, attachedWasMoved}: result.typ.addParam src - if kind == attachedAsgn and g.config.selectedGC == gcOrc and + if kind == attachedAsgn and g.config.selectedGC in {gcOrc, gcYrc} and cyclicType(g, typ.skipTypes(abstractInst)): let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"), idgen, result, info) @@ -1226,7 +1273,7 @@ proc produceSym(g: ModuleGraph; c: PContext; typ: PType; kind: TTypeAttachedOp; else: var tk: TTypeKind var skipped: PType = nil - if g.config.selectedGC in {gcArc, gcOrc, gcHooks, gcAtomicArc}: + if g.config.selectedGC in {gcArc, gcOrc, gcYrc, gcHooks, gcAtomicArc}: skipped = skipTypes(typ, {tyOrdinal, tyRange, tyInferred, tyGenericInst, tyStatic, tyAlias, tySink}) tk = skipped.kind else: @@ -1348,7 +1395,7 @@ proc createTypeBoundOps(g: ModuleGraph; c: PContext; orig: PType; info: TLineInf # we do not generate '=trace' procs if we # have the cycle detection disabled, saves code size. - let lastAttached = if g.config.selectedGC == gcOrc: attachedTrace + let lastAttached = if g.config.selectedGC in {gcOrc, gcYrc}: attachedTrace else: attachedSink # bug #15122: We need to produce all prototypes before entering the diff --git a/compiler/options.nim b/compiler/options.nim index 993c90205d..fc15ee9792 100644 --- a/compiler/options.nim +++ b/compiler/options.nim @@ -195,6 +195,7 @@ type gcRegions = "regions" gcArc = "arc" gcOrc = "orc" + gcYrc = "yrc" # thread-safe ORC (concurrent cycle collector) gcAtomicArc = "atomicArc" gcMarkAndSweep = "markAndSweep" gcHooks = "hooks" diff --git a/compiler/scriptconfig.nim b/compiler/scriptconfig.nim index 10d3f73bc0..6b3f96cf03 100644 --- a/compiler/scriptconfig.nim +++ b/compiler/scriptconfig.nim @@ -231,7 +231,7 @@ proc runNimScript*(cache: IdentCache; scriptName: AbsoluteFile; if optOwnedRefs in oldGlobalOptions: conf.globalOptions.incl {optTinyRtti, optOwnedRefs, optSeqDestructors} defineSymbol(conf.symbols, "nimv2") - if conf.selectedGC in {gcArc, gcOrc, gcAtomicArc}: + if conf.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc}: conf.globalOptions.incl {optTinyRtti, optSeqDestructors} defineSymbol(conf.symbols, "nimv2") defineSymbol(conf.symbols, "gcdestructors") @@ -241,6 +241,8 @@ proc runNimScript*(cache: IdentCache; scriptName: AbsoluteFile; defineSymbol(conf.symbols, "gcarc") of gcOrc: defineSymbol(conf.symbols, "gcorc") + of gcYrc: + defineSymbol(conf.symbols, "gcyrc") of gcAtomicArc: defineSymbol(conf.symbols, "gcatomicarc") else: diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 02ec23fc3e..c206b7f075 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -1756,7 +1756,7 @@ proc trackProc*(c: PContext; s: PSym, body: PNode) = let param = params[i].sym let typ = param.typ if isSinkTypeForParam(typ) or - (t.config.selectedGC in {gcArc, gcOrc, gcAtomicArc} and + (t.config.selectedGC in {gcArc, gcOrc, gcYrc, gcAtomicArc} and (isClosure(typ.skipTypes(abstractInst)) or param.id in t.escapingParams)): createTypeBoundOps(t, typ, param.info) if isOutParam(typ) and param.id notin t.init and s.magic == mNone: diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim index 4ca9302d8d..7e07143837 100644 --- a/compiler/semstmts.nim +++ b/compiler/semstmts.nim @@ -2175,7 +2175,7 @@ proc bindTypeHook(c: PContext; s: PSym; n: PNode; op: TTypeAttachedOp) = template notRefc: bool = # fixes refc with non-var destructor; cancel warnings (#23156) c.config.backend == backendJs or - c.config.selectedGC in {gcArc, gcAtomicArc, gcOrc} + c.config.selectedGC in {gcArc, gcAtomicArc, gcOrc, gcYrc} let cond = case op of attachedWasMoved: t.len == 2 and t.returnType == nil and t.firstParamType.kind == tyVar diff --git a/lib/system.nim b/lib/system.nim index b19f6d828b..6104c1b928 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -125,7 +125,7 @@ proc unsafeAddr*[T](x: T): ptr T {.magic: "Addr", noSideEffect.} = const ThisIsSystem = true -const arcLikeMem = defined(gcArc) or defined(gcAtomicArc) or defined(gcOrc) +const arcLikeMem = defined(gcArc) or defined(gcAtomicArc) or defined(gcOrc) or defined(gcYrc) when defined(nimAllowNonVarDestructor) and arcLikeMem: proc new*[T](a: var ref T, finalizer: proc (x: T) {.nimcall.}) {. @@ -356,7 +356,7 @@ proc low*(x: string): int {.magic: "Low", noSideEffect.} ## See also: ## * `high(string) <#high,string>`_ -when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc): +when not defined(gcArc) and not defined(gcOrc) and not defined(gcYrc) and not defined(gcAtomicArc): proc shallowCopy*[T](x: var T, y: T) {.noSideEffect, magic: "ShallowCopy".} ## Use this instead of `=` for a `shallow copy`:idx:. ## @@ -407,7 +407,7 @@ when defined(nimHasDup): proc `=sink`*[T](x: var T; y: T) {.inline, nodestroy, magic: "Asgn".} = ## Generic `sink`:idx: implementation that can be overridden. - when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc): + when defined(gcArc) or defined(gcOrc) or defined(gcYrc) or defined(gcAtomicArc): x = y else: shallowCopy(x, y) @@ -2561,7 +2561,7 @@ when compileOption("rangechecks"): else: template rangeCheck*(cond) = discard -when not defined(gcArc) and not defined(gcOrc) and not defined(gcAtomicArc): +when not defined(gcArc) and not defined(gcOrc) and not defined(gcYrc) and not defined(gcAtomicArc): proc shallow*[T](s: var seq[T]) {.noSideEffect, inline.} = ## Marks a sequence `s` as `shallow`:idx:. Subsequent assignments will not ## perform deep copies of `s`. @@ -2630,7 +2630,7 @@ when hasAlloc or defined(nimscript): setLen(x, xl+item.len) var j = xl-1 while j >= i: - when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc): + when defined(gcArc) or defined(gcOrc) or defined(gcYrc) or defined(gcAtomicArc): x[j+item.len] = move x[j] else: shallowCopy(x[j+item.len], x[j]) diff --git a/lib/system/arc.nim b/lib/system/arc.nim index 14da1531c2..3ac84be3bb 100644 --- a/lib/system/arc.nim +++ b/lib/system/arc.nim @@ -16,7 +16,7 @@ runtime type and only contains a reference count. {.push raises: [], rangeChecks: off.} -when defined(gcOrc): +when defined(gcOrc) or defined(gcYrc): const rcIncrement = 0b10000 # so that lowest 4 bits are not touched rcMask = 0b1111 @@ -36,12 +36,12 @@ type rc: int # the object header is now a single RC field. # we could remove it in non-debug builds for the 'owned ref' # design but this seems unwise. - when defined(gcOrc): + when defined(gcOrc) or defined(gcYrc): rootIdx: int # thanks to this we can delete potential cycle roots # in O(1) without doubly linked lists when defined(nimArcDebug) or defined(nimArcIds): refId: int - when defined(gcOrc) and orcLeakDetector: + when (defined(gcOrc) or defined(gcYrc)) and orcLeakDetector: filename: cstring line: int @@ -74,7 +74,7 @@ elif defined(nimArcIds): const traceId = -1 -when defined(gcAtomicArc) and hasThreadSupport: +when (defined(gcAtomicArc) or defined(gcYrc)) and hasThreadSupport: template decrement(cell: Cell): untyped = discard atomicDec(cell.rc, rcIncrement) template increment(cell: Cell): untyped = @@ -119,7 +119,7 @@ proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} = else: result = cast[ptr RefHeader](alignedAlloc(s, alignment) +! hdrSize) head(result).rc = 0 - when defined(gcOrc): + when defined(gcOrc) or defined(gcYrc): head(result).rootIdx = 0 when defined(nimArcDebug): head(result).refId = gRefId @@ -157,7 +157,7 @@ proc nimIncRef(p: pointer) {.compilerRtl, inl.} = when traceCollector: cprintf("[INCREF] %p\n", head(p)) -when not defined(gcOrc) or defined(nimThinout): +when not (defined(gcOrc) or defined(gcYrc)) or defined(nimThinout): proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} = # This is only used by the old RTTI mechanism and we know # that 'dest[]' is nil and needs no destruction. Which is really handy @@ -208,7 +208,9 @@ proc nimDestroyAndDispose(p: pointer) {.compilerRtl, quirky, raises: [].} = cstderr.rawWrite "has destructor!\n" nimRawDispose(p, rti.align) -when defined(gcOrc): +when defined(gcYrc): + include yrc +elif defined(gcOrc): when defined(nimThinout): include cyclebreaker else: @@ -225,7 +227,7 @@ proc nimDecRefIsLast(p: pointer): bool {.compilerRtl, inl.} = writeStackTrace() cfprintf(cstderr, "[DecRef] %p %ld\n", p, cell.count) - when defined(gcAtomicArc) and hasThreadSupport: + when (defined(gcAtomicArc) or defined(gcYrc)) and hasThreadSupport: # `atomicDec` returns the new value if atomicDec(cell.rc, rcIncrement) == -rcIncrement: result = true @@ -251,7 +253,7 @@ proc GC_ref*[T](x: ref T) = ## New runtime only supports this operation for 'ref T'. if x != nil: nimIncRef(cast[pointer](x)) -when not defined(gcOrc): +when not (defined(gcOrc) or defined(gcYrc)): template GC_fullCollect* = ## Forces a full garbage collection pass. With `--mm:arc` a nop. discard diff --git a/lib/system/mm/malloc.nim b/lib/system/mm/malloc.nim index 47f1a95ae4..b3f81b5421 100644 --- a/lib/system/mm/malloc.nim +++ b/lib/system/mm/malloc.nim @@ -50,7 +50,7 @@ proc deallocSharedImpl(p: pointer) = deallocImpl(p) proc GC_disable() = discard proc GC_enable() = discard -when not defined(gcOrc): +when not defined(gcOrc) and not defined(gcYrc): proc GC_fullCollect() = discard proc GC_enableMarkAndSweep() = discard proc GC_disableMarkAndSweep() = discard diff --git a/lib/system/yrc.nim b/lib/system/yrc.nim new file mode 100644 index 0000000000..2ce6cf1111 --- /dev/null +++ b/lib/system/yrc.nim @@ -0,0 +1,549 @@ +# +# YRC: Thread-safe ORC (concurrent cycle collector). +# Same API as orc.nim but with striped queues and global lock for merge/collect. +# Destructors for refs run at collection time, not immediately on last decRef. +# See yrc_proof.lean for a Lean 4 proof of safety and deadlock freedom. +# +# ## Key Invariant: Topology vs. Reference Counts +# +# Only `obj.field = x` can change the topology of the heap graph (heap-to-heap +# edges). Local variable assignments (`var local = someRef`) affect reference +# counts but never create heap-to-heap edges and thus cannot create cycles. +# +# The actual pointer write in `obj.field = x` happens immediately and lock-free — +# the graph topology is always up-to-date in memory. Only the RC adjustments are +# deferred: increments and decrements are buffered into per-stripe queues +# (`toInc`, `toDec`) protected by fine-grained per-stripe locks. +# +# When `collectCycles` runs it takes the global lock, drains all stripe buffers +# via `mergePendingRoots`, and then traces the physical pointer graph (via +# `traceImpl`) to detect cycles. This is sound because `trace` follows the actual +# pointer values in memory — which are always current — and uses the reconciled +# RCs only to identify candidate roots and confirm garbage. +# +# In summary: the physical pointer graph is always consistent (writes are +# immediate); only the reference counts are eventually consistent (writes are +# buffered). The per-stripe locks are cheap; the expensive global lock is only +# needed when interpreting the RCs during collection. +# +# ## Why No Write Barrier Is Needed +# +# The classic concurrent-GC hazard is the "lost object" problem: during +# collection the mutator executes `A.field = B` where A is already scanned +# (black), B is reachable only through an unscanned (gray) object C, and then +# C's reference to B is removed. The collector never discovers B and frees it +# while A still points to it. Traditional concurrent collectors need write +# barriers to prevent this. +# +# This problem structurally cannot arise in YRC because the cycle collector only +# frees *closed cycles* — subgraphs where every reference to every member comes +# from within the group, with zero external references. To execute `A.field = B` +# the mutator must hold a reference to A, which means A has an external reference +# (from the stack) that is not a heap-to-heap edge. During trial deletion +# (`markGray`) only internal edges are subtracted from RCs, so A's external +# reference survives, `scan` finds A's RC >= 0, calls `scanBlack`, and rescues A +# and everything reachable from it — including B. In short: the mutator can only +# modify objects it can reach, but the cycle collector only frees objects nothing +# external can reach. The two conditions are mutually exclusive. +# +#[ + +The problem described in Bacon01 is: during markGray/scan, a mutator concurrently +does X.field = Z (was X→Y), changing the physical graph while the collector is tracing +it. The collector might see stale or new edges. The reasons this is still safe: + +Stale edges cancel with unbuffered decrements: If the collector sees old edge X→Y +(mutator already wrote X→Z and buffered dec(Y)), the phantom trial deletion and the +unbuffered dec cancel — Y's effective RC is correct. + +scanBlack rescues via current physical edges: If X has external refs (merged RC reflects +the mutator's access), scanBlack(X) re-traces X and follows the current physical edge X→Z, +incrementing Z's RC and marking it black. Z survives. + +rcSum==edges fast path is conservative: Any discrepancy between physical graph and merged +state (stale or new edges) causes rcSum != edges, falling back to the slow path which +rescues anything with RC >= 0. + +Unreachable cycles are truly unreachable: The mutator can only reach objects through chains +rooted in merged references. If a cycle has zero external refs at merge time, no mutator +can reach it. + +]# + +{.push raises: [].} + +include cellseqs_v2 + +import std/locks + +const + NumStripes = 64 + QueueSize = 128 + RootsThreshold = 10 + + colBlack = 0b000 + colGray = 0b001 + colWhite = 0b010 + maybeCycle = 0b100 + inRootsFlag = 0b1000 + colorMask = 0b011 + logOrc = defined(nimArcIds) + +type + TraceProc = proc (p, env: pointer) {.nimcall, benign, raises: [].} + DisposeProc = proc (p: pointer) {.nimcall, benign, raises: [].} + +template color(c): untyped = c.rc and colorMask +template setColor(c, col) = + when col == colBlack: + c.rc = c.rc and not colorMask + else: + c.rc = c.rc and not colorMask or col + +const + optimizedOrc = false + useJumpStack = false + +type + GcEnv = object + traceStack: CellSeq[ptr pointer] + when useJumpStack: + jumpStack: CellSeq[ptr pointer] + toFree: CellSeq[Cell] + freed, touched, edges, rcSum: int + keepThreshold: bool + +proc trace(s: Cell; desc: PNimTypeV2; j: var GcEnv) {.inline.} = + if desc.traceImpl != nil: + var p = s +! sizeof(RefHeader) + cast[TraceProc](desc.traceImpl)(p, addr(j)) + +include threadids + +type + Stripe = object + when not defined(yrcAtomics): + lockInc: Lock + toIncLen: int + toInc: array[QueueSize, Cell] + lockDec: Lock + toDecLen: int + toDec: array[QueueSize, (Cell, PNimTypeV2)] + +type + PreventThreadFromCollectProc* = proc(): bool {.nimcall, benign, raises: [].} + ## Callback run before this thread runs the cycle collector. + ## Return `true` to allow collection, `false` to skip (e.g. real-time thread). + ## Invoked while holding the global lock; must not call back into YRC. + +var + gYrcGlobalLock: Lock + roots: CellSeq[Cell] # merged roots, used under global lock + stripes: array[NumStripes, Stripe] + rootsThreshold: int = 128 + defaultThreshold = when defined(nimFixedOrc): 10_000 else: 128 + gPreventThreadFromCollectProc: PreventThreadFromCollectProc = nil + +proc GC_setPreventThreadFromCollectProc*(cb: PreventThreadFromCollectProc) = + ##[ Can be used to customize the cycle collector for a thread. For example, + to ensure that a hard realtime thread cannot run the cycle collector use: + + ```nim + var hardRealTimeThread: int + GC_setPreventThreadFromCollectProc(proc(): bool {.nimcall.} = hardRealTimeThread == getThreadId()) + ``` + + To ensure that a hard realtime thread cannot by involved in any cycle collector activity use: + + ```nim + GC_setPreventThreadFromCollectProc(proc(): bool {.nimcall.} = + if hardRealTimeThread == getThreadId(): + writeStackTrace() + echo "Realtime thread involved in inpredictable cycle collector activity!" + result = false + ``` + ]## + gPreventThreadFromCollectProc = cb + +proc GC_getPreventThreadFromCollectProc*(): PreventThreadFromCollectProc = + ## Returns the current "prevent thread from collecting proc". + ## Typically `nil` if not set. + result = gPreventThreadFromCollectProc + +proc mayRunCycleCollect(): bool {.inline.} = + if gPreventThreadFromCollectProc == nil: true + else: not gPreventThreadFromCollectProc() + +proc getStripeIdx(): int {.inline.} = + getThreadId() and (NumStripes - 1) + +proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} = + let h = head(p) + when optimizedOrc: + if cyclic: h.rc = h.rc or maybeCycle + when defined(yrcAtomics): + let s = getStripeIdx() + let slot = atomicFetchAdd(addr stripes[s].toIncLen, 1, ATOMIC_ACQ_REL) + if slot < QueueSize: + atomicStoreN(addr stripes[s].toInc[slot], h, ATOMIC_RELEASE) + else: + withLock gYrcGlobalLock: + h.rc = h.rc +% rcIncrement + for i in 0..", importc.} + +proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl, inl.} = + let p = cast[ptr pointer](q) + if p[] != nil: + orcAssert strstr(desc.name, "TType") == nil, "following a TType but it's acyclic!" + var j = cast[ptr GcEnv](env) + j.traceStack.add(p, desc) + +proc nimTraceRefDyn(q: pointer; env: pointer) {.compilerRtl, inl.} = + let p = cast[ptr pointer](q) + if p[] != nil: + var j = cast[ptr GcEnv](env) + j.traceStack.add(p, cast[ptr PNimTypeV2](p[])[]) + +proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) = + s.setColor colBlack + let until = j.traceStack.len + trace(s, desc, j) + when logOrc: writeCell("root still alive", s, desc) + while j.traceStack.len > until: + let (entry, desc) = j.traceStack.pop() + let t = head entry[] + t.rc = t.rc +% rcIncrement + if t.color != colBlack: + t.setColor colBlack + trace(t, desc, j) + when logOrc: writeCell("child still alive", t, desc) + +proc markGray(s: Cell; desc: PNimTypeV2; j: var GcEnv) = + if s.color != colGray: + s.setColor colGray + j.touched = j.touched +% 1 + j.rcSum = j.rcSum +% (s.rc shr rcShift) +% 1 + orcAssert(j.traceStack.len == 0, "markGray: trace stack not empty") + trace(s, desc, j) + while j.traceStack.len > 0: + let (entry, desc) = j.traceStack.pop() + let t = head entry[] + t.rc = t.rc -% rcIncrement + j.edges = j.edges +% 1 + if t.color != colGray: + t.setColor colGray + j.touched = j.touched +% 1 + j.rcSum = j.rcSum +% (t.rc shr rcShift) +% 2 + trace(t, desc, j) + +proc scan(s: Cell; desc: PNimTypeV2; j: var GcEnv) = + if s.color == colGray: + if (s.rc shr rcShift) >= 0: + scanBlack(s, desc, j) + else: + orcAssert(j.traceStack.len == 0, "scan: trace stack not empty") + s.setColor(colWhite) + trace(s, desc, j) + while j.traceStack.len > 0: + let (entry, desc) = j.traceStack.pop() + let t = head entry[] + if t.color == colGray: + if (t.rc shr rcShift) >= 0: + scanBlack(t, desc, j) + else: + t.setColor(colWhite) + trace(t, desc, j) + +proc collectColor(s: Cell; desc: PNimTypeV2; col: int; j: var GcEnv) = + if s.color == col and (s.rc and inRootsFlag) == 0: + orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty") + s.setColor(colBlack) + j.toFree.add(s, desc) + trace(s, desc, j) + while j.traceStack.len > 0: + let (entry, desc) = j.traceStack.pop() + let t = head entry[] + entry[] = nil + if t.color == col and (t.rc and inRootsFlag) == 0: + j.toFree.add(t, desc) + t.setColor(colBlack) + trace(t, desc, j) + +proc collectCyclesBacon(j: var GcEnv; lowMark: int) = + let last = roots.len -% 1 + when logOrc: + for i in countdown(last, lowMark): + writeCell("root", roots.d[i][0], roots.d[i][1]) + for i in countdown(last, lowMark): + markGray(roots.d[i][0], roots.d[i][1], j) + var colToCollect = colWhite + if j.rcSum == j.edges: + colToCollect = colGray + j.keepThreshold = true + else: + for i in countdown(last, lowMark): + scan(roots.d[i][0], roots.d[i][1], j) + init j.toFree + for i in 0 ..< roots.len: + let s = roots.d[i][0] + s.rc = s.rc and not inRootsFlag + collectColor(s, roots.d[i][1], colToCollect, j) + when not defined(nimStressOrc): + let oldThreshold = rootsThreshold + rootsThreshold = high(int) + roots.len = 0 + for i in 0 ..< j.toFree.len: + when orcLeakDetector: + writeCell("CYCLIC OBJECT FREED", j.toFree.d[i][0], j.toFree.d[i][1]) + free(j.toFree.d[i][0], j.toFree.d[i][1]) + when not defined(nimStressOrc): + rootsThreshold = oldThreshold + j.freed = j.freed +% j.toFree.len + deinit j.toFree + +when defined(nimOrcStats): + var freedCyclicObjects {.threadvar.}: int + +proc collectCycles() = + when logOrc: + cfprintf(cstderr, "[collectCycles] begin\n") + withLock gYrcGlobalLock: + mergePendingRoots() + if roots.len >= RootsThreshold and mayRunCycleCollect(): + var j: GcEnv + init j.traceStack + collectCyclesBacon(j, 0) + if roots.len == 0 and roots.d != nil: + deinit roots + when not defined(nimStressOrc): + if j.keepThreshold: + discard + elif j.freed *% 2 >= j.touched: + when not defined(nimFixedOrc): + rootsThreshold = max(rootsThreshold div 3 *% 2, 16) + else: + rootsThreshold = 0 + elif rootsThreshold < high(int) div 4: + rootsThreshold = (if rootsThreshold <= 0: defaultThreshold else: rootsThreshold) + rootsThreshold = rootsThreshold div 2 +% rootsThreshold + when logOrc: + cfprintf(cstderr, "[collectCycles] end; freed %ld new threshold %ld\n", j.freed, rootsThreshold) + when defined(nimOrcStats): + inc freedCyclicObjects, j.freed + deinit j.traceStack + +when defined(nimOrcStats): + type + OrcStats* = object + freedCyclicObjects*: int + proc GC_orcStats*(): OrcStats = + result = OrcStats(freedCyclicObjects: freedCyclicObjects) + +proc GC_runOrc* = + withLock gYrcGlobalLock: + mergePendingRoots() + if mayRunCycleCollect(): + var j: GcEnv + init j.traceStack + collectCyclesBacon(j, 0) + deinit j.traceStack + roots.len = 0 + when logOrc: orcAssert roots.len == 0, "roots not empty!" + +proc GC_enableOrc*() = + when not defined(nimStressOrc): + rootsThreshold = 0 + +proc GC_disableOrc*() = + when not defined(nimStressOrc): + rootsThreshold = high(int) + +proc GC_prepareOrc*(): int {.inline.} = + withLock gYrcGlobalLock: + mergePendingRoots() + result = roots.len + +proc GC_partialCollect*(limit: int) = + withLock gYrcGlobalLock: + mergePendingRoots() + if roots.len > limit and mayRunCycleCollect(): + var j: GcEnv + init j.traceStack + collectCyclesBacon(j, limit) + deinit j.traceStack + roots.len = limit + +proc GC_fullCollect* = + GC_runOrc() + +proc GC_enableMarkAndSweep*() = GC_enableOrc() +proc GC_disableMarkAndSweep*() = GC_disableOrc() + +const acyclicFlag = 1 + +when optimizedOrc: + template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool = + (desc.flags and acyclicFlag) == 0 and (s.rc and maybeCycle) != 0 +else: + template markedAsCyclic(s: Cell; desc: PNimTypeV2): bool = + (desc.flags and acyclicFlag) == 0 + +proc nimDecRefIsLastCyclicDyn(p: pointer): bool {.compilerRtl, inl.} = + result = false + if p != nil: + let cell = head(p) + let desc = cast[ptr PNimTypeV2](p)[] + let idx = getStripeIdx() + while true: + var overflow = false + withLock stripes[idx].lockDec: + if stripes[idx].toDecLen < QueueSize: + stripes[idx].toDec[stripes[idx].toDecLen] = (cell, desc) + stripes[idx].toDecLen += 1 + else: + overflow = true + if overflow: + collectCycles() + else: + break + +proc nimDecRefIsLastDyn(p: pointer): bool {.compilerRtl, inl.} = + nimDecRefIsLastCyclicDyn(p) + +proc nimDecRefIsLastCyclicStatic(p: pointer; desc: PNimTypeV2): bool {.compilerRtl, inl.} = + result = false + if p != nil: + let cell = head(p) + let idx = getStripeIdx() + while true: + var overflow = false + withLock stripes[idx].lockDec: + if stripes[idx].toDecLen < QueueSize: + stripes[idx].toDec[stripes[idx].toDecLen] = (cell, desc) + stripes[idx].toDecLen += 1 + else: + overflow = true + if overflow: + collectCycles() + else: + break + +proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} = + dest[] = src + if src != nil: nimIncRefCyclic(src, true) + +proc yrcDec(tmp: pointer; desc: PNimTypeV2) {.inline.} = + if desc != nil: + discard nimDecRefIsLastCyclicStatic(tmp, desc) + else: + discard nimDecRefIsLastCyclicDyn(tmp) + +proc nimAsgnYrc(dest: ptr pointer; src: pointer; desc: PNimTypeV2) {.compilerRtl.} = + ## YRC write barrier for ref copy assignment. + ## Atomically stores src into dest, then buffers RC adjustments. + ## Freeing is always done by the cycle collector, never inline. + let tmp = dest[] + atomicStoreN(dest, src, ATOMIC_RELEASE) + if src != nil: + nimIncRefCyclic(src, true) + if tmp != nil: + yrcDec(tmp, desc) + +proc nimSinkYrc(dest: ptr pointer; src: pointer; desc: PNimTypeV2) {.compilerRtl.} = + ## YRC write barrier for ref sink (move). No incRef on source. + ## Freeing is always done by the cycle collector, never inline. + let tmp = dest[] + atomicStoreN(dest, src, ATOMIC_RELEASE) + if tmp != nil: + yrcDec(tmp, desc) + +proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = + when optimizedOrc: + if p != nil: + let h = head(p) + h.rc = h.rc or maybeCycle + +# Initialize locks at module load +initLock(gYrcGlobalLock) +for i in 0.. exact hr + | step _ _ _ hedge ih => exact Reachable.step _ _ ih hedge + +/-! ### What the collector frees -/ + +/-- An object has an *external reference* if some thread's stack roots point to it. -/ +def hasExternalRef (s : State) (x : Obj) : Prop := + ∃ t, s.roots t x + +/-- An object is *externally anchored* if it is heap-reachable from some + object that has an external reference. This is what scanBlack computes: + it starts from objects with trialRC ≥ 0 (= has external refs) and traces + the current physical graph. -/ +def anchored (s : State) (x : Obj) : Prop := + ∃ r, hasExternalRef s r ∧ HeapReachable s r x + +/-- The collector frees `x` only if `x` is *not anchored*: + no external ref, and not reachable from any externally-referenced object. + This models: after trial deletion, x remained white, and scanBlack + didn't rescue it. -/ +def collectorFrees (s : State) (x : Obj) : Prop := + ¬ anchored s x + +/-! ### Main safety theorem -/ + +/-- **Lemma**: Every reachable object is anchored. + If thread `t` reaches `x`, then there is a chain from a stack root + (which has an external ref) through heap edges to `x`. -/ +theorem reachable_is_anchored (s : State) (x : Obj) + (h : Reachable s x) : anchored s x := by + induction h with + | root t x hroot => + exact ⟨x, ⟨t, hroot⟩, HeapReachable.refl x⟩ + | step a b h_reach_a h_edge ih => + obtain ⟨r, h_ext_r, h_path_r_a⟩ := ih + exact ⟨r, h_ext_r, HeapReachable.step r a b h_path_r_a h_edge⟩ + +/-- **Main Safety Theorem**: If the collector frees `x`, then no thread + can reach `x`. Freed objects are unreachable. + + This is the contrapositive of `reachable_is_anchored`. -/ +theorem yrc_safety (s : State) (x : Obj) + (h_freed : collectorFrees s x) : ¬ Reachable s x := by + intro h_reach + exact h_freed (reachable_is_anchored s x h_reach) + +/-! ### The write barrier preserves reachability -/ + +/-- Model of `nimAsgnYrc(dest_field_of_a, src)`: + Object `a` had a field pointing to `old`, now points to `src`. + Graph update is immediate. The new edge takes priority (handles src = old). -/ +def writeBarrier (s : State) (a old src : Obj) : State := + { s with + edges := fun x y => + if x = a ∧ y = src then True + else if x = a ∧ y = old then False + else s.edges x y + pendingInc := fun x => if x = src then s.pendingInc x + 1 else s.pendingInc x + pendingDec := fun x => if x = old then s.pendingDec x + 1 else s.pendingDec x } + +/-- **No Lost Object Theorem**: If thread `t` holds a stack ref to `a` and + executes `a.field = b` (replacing old), then `b` is reachable afterward. + + This is why the "lost object" problem from concurrent GC literature + doesn't arise in YRC: the atomic store makes `a→b` visible immediately, + and `a` is anchored (thread `t` holds it), so scanBlack traces `a→b` + and rescues `b`. -/ +theorem no_lost_object (s : State) (t : Thread) (a old b : Obj) + (h_root_a : s.roots t a) : + Reachable (writeBarrier s a old b) b := by + apply Reachable.step a b + · exact Reachable.root t a h_root_a + · simp [writeBarrier] + +/-! ### Non-atomic write barrier window safety + + The write barrier does three steps non-atomically: + 1. atomicStore(dest, src) — graph update + 2. buffer inc(src) — deferred + 3. buffer dec(old) — deferred + + If the collector runs between steps 1 and 2 (inc not yet buffered): + - src has a new incoming heap edge not yet reflected in RCs + - But src is reachable from the mutator's stack (mutator held a ref to store it) + - So src has an external ref → trialRC ≥ 1 → scanBlack rescues src ✓ + + If the collector runs between steps 2 and 3 (dec not yet buffered): + - old's RC is inflated by 1 (the dec hasn't arrived) + - This is conservative: old appears to have more refs than it does + - Trial deletion won't spuriously free it ✓ +-/ + +/-- Model the state between steps 1-2: graph updated, inc not yet buffered. + `src` has new edge but RC doesn't reflect it yet. -/ +def stateAfterStore (s : State) (a old src : Obj) : State := + { s with + edges := fun x y => + if x = a ∧ y = src then True + else if x = a ∧ y = old then False + else s.edges x y } + +/-- Even in the window between atomic store and buffered inc, + src is still reachable (from the mutator's stack via a→src). -/ +theorem src_reachable_in_window (s : State) (t : Thread) (a old src : Obj) + (h_root_a : s.roots t a) : + Reachable (stateAfterStore s a old src) src := by + apply Reachable.step a src + · exact Reachable.root t a h_root_a + · simp [stateAfterStore] + +/-- Therefore src is anchored in the window → collector won't free it. -/ +theorem src_safe_in_window (s : State) (t : Thread) (a old src : Obj) + (h_root_a : s.roots t a) : + ¬ collectorFrees (stateAfterStore s a old src) src := by + intro h_freed + exact h_freed (reachable_is_anchored _ _ (src_reachable_in_window s t a old src h_root_a)) + +/-! ### Deadlock freedom + + YRC uses three classes of locks: + • gYrcGlobalLock (level 0) + • stripes[i].lockInc (level 2*i + 1, for i in 0..N-1) + • stripes[i].lockDec (level 2*i + 2, for i in 0..N-1) + + Total order: global < lockInc[0] < lockDec[0] < lockInc[1] < lockDec[1] < ... + + Every code path in yrc.nim acquires locks in strictly ascending level order: + + **nimIncRefCyclic** (mutator fast path): + acquire lockInc[myStripe] → release → done. + Holds exactly one lock. ✓ + + **nimIncRefCyclic** (overflow path): + acquire gYrcGlobalLock (level 0), then for i=0..N-1: acquire lockInc[i] → release. + Ascending: 0 < 1 < 3 < 5 < ... ✓ + + **nimDecRefIsLastCyclic{Dyn,Static}** (fast path): + acquire lockDec[myStripe] → release → done. + Holds exactly one lock. ✓ + + **nimDecRefIsLastCyclic{Dyn,Static}** (overflow path): + calls collectCycles → acquire gYrcGlobalLock (level 0), + then mergePendingRoots which for i=0..N-1: + acquire lockInc[i] → release, acquire lockDec[i] → release. + Ascending: 0 < 1 < 2 < 3 < 4 < ... ✓ + + **collectCycles / GC_runOrc** (collector): + acquire gYrcGlobalLock (level 0), + then mergePendingRoots (same ascending pattern as above). ✓ + + **nimAsgnYrc / nimSinkYrc** (write barrier): + Calls nimIncRefCyclic then nimDecRefIsLastCyclic*. + Each call acquires and releases its lock independently. + No nesting between the two calls. ✓ + + Since every path follows the total order, deadlock is impossible. +-/ + +/-- Lock levels in YRC. Each lock maps to a unique natural number. -/ +inductive LockId (n : Nat) where + | global : LockId n + | lockInc (i : Nat) (h : i < n) : LockId n + | lockDec (i : Nat) (h : i < n) : LockId n + +/-- The level (priority) of each lock in the total order. -/ +def lockLevel {n : Nat} : LockId n → Nat + | .global => 0 + | .lockInc i _ => 2 * i + 1 + | .lockDec i _ => 2 * i + 2 + +/-- All lock levels are distinct (the level function is injective). -/ +theorem lockLevel_injective {n : Nat} (a b : LockId n) + (h : lockLevel a = lockLevel b) : a = b := by + cases a with + | global => + cases b with + | global => rfl + | lockInc j hj => simp [lockLevel] at h + | lockDec j hj => simp [lockLevel] at h + | lockInc i hi => + cases b with + | global => simp [lockLevel] at h + | lockInc j hj => + have : i = j := by simp [lockLevel] at h; omega + subst this; rfl + | lockDec j hj => simp [lockLevel] at h; omega + | lockDec i hi => + cases b with + | global => simp [lockLevel] at h + | lockInc j hj => simp [lockLevel] at h; omega + | lockDec j hj => + have : i = j := by simp [lockLevel] at h; omega + subst this; rfl + +/-- Helper: stripe lock levels are strictly ascending across stripes. -/ +theorem stripe_levels_ascending (i : Nat) : + 2 * i + 1 < 2 * i + 2 ∧ 2 * i + 2 < 2 * (i + 1) + 1 := by + constructor <;> omega + +/-- lockInc levels are strictly ascending with index. -/ +theorem lockInc_level_strict_mono {n : Nat} (i j : Nat) (hi : i < n) (hj : j < n) + (hij : i < j) : lockLevel (.lockInc i hi : LockId n) < lockLevel (.lockInc j hj) := by + simp [lockLevel]; omega + +/-- lockDec levels are strictly ascending with index. -/ +theorem lockDec_level_strict_mono {n : Nat} (i j : Nat) (hi : i < n) (hj : j < n) + (hij : i < j) : lockLevel (.lockDec i hi : LockId n) < lockLevel (.lockDec j hj) := by + simp [lockLevel]; omega + +/-- Global lock has the lowest level (level 0). -/ +theorem global_level_min {n : Nat} (l : LockId n) (h : l ≠ .global) : + lockLevel (.global : LockId n) < lockLevel l := by + cases l with + | global => exact absurd rfl h + | lockInc i hi => simp [lockLevel] + | lockDec i hi => simp [lockLevel] + +/-- **Deadlock Freedom**: Any sequence of lock acquisitions that follows the + "acquire in ascending level order" discipline cannot deadlock. + + This is a standard result: a total order on locks with the invariant that + every thread acquires locks in strictly ascending order prevents cycles + in the wait-for graph, which is necessary and sufficient for deadlock. + + We prove the 2-thread case (the general N-thread case follows by the + same transitivity argument on the wait-for cycle). -/ +theorem no_deadlock_from_total_order {n : Nat} + -- Two threads each hold a lock and wait for another + (held₁ waited₁ held₂ waited₂ : LockId n) + -- Thread 1 holds held₁ and wants waited₁ (ascending order) + (h1 : lockLevel held₁ < lockLevel waited₁) + -- Thread 2 holds held₂ and wants waited₂ (ascending order) + (h2 : lockLevel held₂ < lockLevel waited₂) + -- Deadlock requires: thread 1 waits for what thread 2 holds, + -- and thread 2 waits for what thread 1 holds + (h_wait1 : waited₁ = held₂) + (h_wait2 : waited₂ = held₁) : + False := by + subst h_wait1; subst h_wait2 + omega + +/-! ### Summary of verified properties (all QED, no sorry) + + 1. `reachable_is_anchored`: Every reachable object is anchored + (has a path from an externally-referenced object via heap edges). + + 2. `yrc_safety`: The collector only frees unanchored objects, + which are unreachable by all threads. **No use-after-free.** + + 3. `no_lost_object`: After `a.field = b`, `b` is reachable + (atomic store makes the edge visible immediately). + + 4. `src_safe_in_window`: Even between the atomic store and + the buffered inc, the collector cannot free src. + + 5. `lockLevel_injective`: All lock levels are distinct (well-defined total order). + + 6. `global_level_min`: The global lock has the lowest level. + + 7. `lockInc_level_strict_mono`, `lockDec_level_strict_mono`: + Stripe locks are strictly ordered by index. + + 8. `no_deadlock_from_total_order`: A 2-thread deadlock cycle is impossible + when both threads acquire locks in ascending level order. + + Together these establish that YRC's write barrier protocol + (atomic store → buffer inc → buffer dec) is safe under concurrent + collection, and the locking discipline prevents deadlock. + + ## What is NOT proved: Completeness (liveness) + + This proof covers **safety** (no use-after-free) and **deadlock-freedom**, + but does NOT prove **completeness** — that all garbage cycles are eventually + collected. + + Completeness depends on the trial deletion algorithm (Bacon 2001) correctly + identifying closed cycles. Specifically it requires proving: + + 1. After `mergePendingRoots`, merged RCs equal logical RCs + (buffered inc/dec exactly compensate graph changes since last merge). + 2. `markGray` subtracts exactly the internal (heap→heap) edge count from + each node's merged RC, yielding `trialRC(x) = externalRefCount(x)`. + 3. `scan` correctly partitions: nodes with `trialRC ≥ 0` are rescued by + `scanBlack`; nodes with `trialRC < 0` remain white. + 4. White nodes form closed subgraphs with zero external refs → garbage. + + These properties follow from the well-known Bacon trial-deletion algorithm + and are assumed here rather than re-proved. The YRC-specific contribution + (buffered RCs, striped queues, concurrent mutators) is what our safety + proof covers — showing that concurrency does not break the preconditions + that trial deletion relies on (physical graph consistency, eventual RC + consistency after merge). + + Reference: D.F. Bacon and V.T. Rajan, "Concurrent Cycle Collection in + Reference Counted Systems", ECOOP 2001. +-/ diff --git a/lib/system/yrc_proof.tla b/lib/system/yrc_proof.tla new file mode 100644 index 0000000000..67d6d31a74 --- /dev/null +++ b/lib/system/yrc_proof.tla @@ -0,0 +1,761 @@ +---- MODULE yrc_proof ---- +\* TLA+ specification of YRC (Thread-safe ORC cycle collector) +\* Models the fine details of barriers, striped queues, and synchronization +\* +\* ## Key Barrier Semantics Modeled +\* +\* ### Write Barrier (nimAsgnYrc) +\* 1. atomicStoreN(dest, src, ATOMIC_RELEASE) +\* - Graph update is immediately visible to all threads (including collector) +\* - ATOMIC_RELEASE ensures all prior writes are visible before this store +\* - No lock required for graph updates (lock-free) +\* +\* 2. nimIncRefCyclic(src, true) +\* - Acquires per-stripe lockInc[stripe] (fine-grained) +\* - Buffers increment in toInc[stripe] queue +\* - On overflow: acquires global lock, merges all stripes, applies increment +\* +\* 3. yrcDec(tmp, desc) +\* - Acquires per-stripe lockDec[stripe] (fine-grained) +\* - Buffers decrement in toDec[stripe] queue +\* - On overflow: acquires global lock, merges all stripes, applies decrement, +\* adds to roots array if not already present +\* +\* ### Merge Operation (mergePendingRoots) +\* - Acquires global lock (exclusive access) +\* - Sequentially acquires each stripe's lockInc and lockDec +\* - Drains all buffers, applies RC adjustments +\* - Adds decremented objects to roots array +\* - After merge: mergedRC = logicalRC (current graph state) +\* +\* ### Collection Cycle (under global lock) +\* 1. mergePendingRoots: reconcile buffered changes +\* 2. markGray: trial deletion (subtract internal edges) +\* 3. scan: rescue objects with RC >= 0 (scanBlack follows current graph) +\* 4. collectColor: free white objects (closed cycles) +\* +\* ## Safety Argument +\* +\* The collector only frees closed cycles (zero external refs). Concurrent writes +\* cannot cause "lost objects" because: +\* - Graph updates are atomic and immediately visible +\* - Mutator must hold stack ref to modify object (external ref) +\* - scanBlack follows current physical edges (rescues newly written objects) +\* - Only objects unreachable from any stack root are freed + +EXTENDS Naturals, Integers, Sequences, FiniteSets, TLC + +CONSTANTS NumStripes, QueueSize, RootsThreshold, Objects, Threads, ObjTypes +ASSUME NumStripes \in Nat /\ NumStripes > 0 +ASSUME QueueSize \in Nat /\ QueueSize > 0 +ASSUME RootsThreshold \in Nat +ASSUME IsFiniteSet(Objects) +ASSUME IsFiniteSet(Threads) +ASSUME IsFiniteSet(ObjTypes) + +\* NULL constant (represents "no thread" for locks) +\* We use a sentinel value that's guaranteed not to be in Threads or Objects +NULL == "NULL" \* String literal that won't conflict with Threads/Objects +ASSUME NULL \notin Threads /\ NULL \notin Objects + +\* Helper functions +\* Note: GetStripeIdx is not used, GetStripe is used instead + +\* Color constants +colBlack == 0 +colGray == 1 +colWhite == 2 +maybeCycle == 4 +inRootsFlag == 8 +colorMask == 3 + +\* State variables +VARIABLES + \* Physical heap graph (always up-to-date, atomic stores) + edges, \* edges[obj1][obj2] = TRUE if obj1.field points to obj2 + \* Stack roots per thread + roots, \* roots[thread][obj] = TRUE if thread has local var pointing to obj + \* Reference counts (stored in object header) + rc, \* rc[obj] = reference count (logical, after merge) + \* Color markers (stored in object header, bits 0-2) + color, \* color[obj] \in {colBlack, colGray, colWhite} + \* Root tracking flags + inRoots, \* inRoots[obj] = TRUE if obj is in roots array + \* Striped increment queues + toIncLen, \* toIncLen[stripe] = current length of increment queue + toInc, \* toInc[stripe][i] = object to increment + \* Striped decrement queues + toDecLen, \* toDecLen[stripe] = current length of decrement queue + toDec, \* toDec[stripe][i] = (object, type) pair to decrement + \* Per-stripe locks + lockInc, \* lockInc[stripe] = thread holding increment lock (or NULL) + lockDec, \* lockDec[stripe] = thread holding decrement lock (or NULL) + \* Global lock + globalLock, \* thread holding global lock (or NULL) + \* Merged roots array (used during collection) + mergedRoots, \* sequence of (object, type) pairs + \* Collection state + collecting, \* TRUE if collection is in progress + gcEnv, \* GC environment: {touched, edges, rcSum, toFree, ...} + \* Pending operations (for modeling atomicity) + pendingWrites \* set of pending write barrier operations + +\* Type invariants +TypeOK == + /\ edges \in [Objects -> [Objects -> BOOLEAN]] + /\ roots \in [Threads -> [Objects -> BOOLEAN]] + /\ rc \in [Objects -> Int] + /\ color \in [Objects -> {colBlack, colGray, colWhite}] + /\ inRoots \in [Objects -> BOOLEAN] + /\ toIncLen \in [0..(NumStripes-1) -> 0..QueueSize] + /\ toInc \in [0..(NumStripes-1) -> Seq(Objects)] + /\ toDecLen \in [0..(NumStripes-1) -> 0..QueueSize] + /\ toDec \in [0..(NumStripes-1) -> Seq([obj: Objects, desc: ObjTypes])] + /\ lockInc \in [0..(NumStripes-1) -> Threads \cup {NULL}] + /\ lockDec \in [0..(NumStripes-1) -> Threads \cup {NULL}] + /\ globalLock \in Threads \cup {NULL} + /\ mergedRoots \in Seq([obj: Objects, desc: ObjTypes]) + /\ collecting \in BOOLEAN + /\ pendingWrites \in SUBSET ([thread: Threads, dest: Objects, old: Objects \cup {NULL}, src: Objects \cup {NULL}, phase: {"store", "inc", "dec"}]) + +\* Helper: internal reference count (heap-to-heap edges) +InternalRC(obj) == + Cardinality({src \in Objects : edges[src][obj]}) + +\* Helper: external reference count (stack roots) +ExternalRC(obj) == + Cardinality({t \in Threads : roots[t][obj]}) + +\* Helper: logical reference count +LogicalRC(obj) == + InternalRC(obj) + ExternalRC(obj) + +\* Helper: get stripe index for thread +\* Map threads to stripe indices deterministically +\* Since threads are ModelValues, we use a simple deterministic mapping: +\* Assign each thread to stripe 0 (for small models, this is fine) +\* For larger models, TLC will handle the mapping deterministically +GetStripe(thread) == 0 + +\* ============================================================================ +\* Write Barrier: nimAsgnYrc +\* ============================================================================ +\* The write barrier does: +\* 1. atomicStoreN(dest, src, ATOMIC_RELEASE) -- graph update is immediate +\* 2. nimIncRefCyclic(src, true) -- buffer inc(src) +\* 3. yrcDec(tmp, desc) -- buffer dec(old) +\* +\* Key barrier semantics: +\* - ATOMIC_RELEASE on store ensures all prior writes are visible before the graph update +\* - The graph update is immediately visible to all threads (including collector) +\* - RC adjustments are buffered and only applied during merge + +\* ============================================================================ +\* Phase 1: Atomic Store (Topology Update) +\* ============================================================================ +\* The atomic store always happens first, updating the graph topology. +\* This is independent of RC operations and never blocks. +MutatorWriteAtomicStore(thread, destObj, destField, oldVal, newVal, desc) == + \* Atomic store with RELEASE barrier - updates graph topology immediately + \* Clear ALL edges from destObj first (atomic store replaces old value completely), + \* then set the new edge. This ensures destObj.field can only point to one object. + /\ edges' = [edges EXCEPT ![destObj] = [x \in Objects |-> + IF x = newVal /\ newVal # NULL + THEN TRUE + ELSE FALSE]] + /\ UNCHANGED <> + +\* ============================================================================ +\* Phase 2: RC Buffering (if space available) +\* ============================================================================ +\* Buffers increment/decrement if there's space. If overflow would happen, +\* this action is disabled (blocked) until merge can happen. +WriteBarrier(thread, destObj, destField, oldVal, newVal, desc) == + LET stripe == GetStripe(thread) + IN + \* Determine if overflow happens for increment or decrement + /\ LET + incOverflow == (newVal # NULL) /\ (toIncLen[stripe] >= QueueSize) + decOverflow == (oldVal # NULL) /\ (toDecLen[stripe] >= QueueSize) + IN + \* Buffering: only enabled if no overflow (otherwise blocked until merge can happen) + /\ ~incOverflow \* Precondition: increment buffer has space (blocks if full) + /\ ~decOverflow \* Precondition: decrement buffer has space (blocks if full) + /\ toIncLen' = IF newVal # NULL /\ toIncLen[stripe] < QueueSize + THEN [toIncLen EXCEPT ![stripe] = toIncLen[stripe] + 1] + ELSE toIncLen + /\ toInc' = IF newVal # NULL /\ toIncLen[stripe] < QueueSize + THEN [toInc EXCEPT ![stripe] = Append(toInc[stripe], newVal)] + ELSE toInc + /\ toDecLen' = IF oldVal # NULL /\ toDecLen[stripe] < QueueSize + THEN [toDecLen EXCEPT ![stripe] = toDecLen[stripe] + 1] + ELSE toDecLen + /\ toDec' = IF oldVal # NULL /\ toDecLen[stripe] < QueueSize + THEN [toDec EXCEPT ![stripe] = Append(toDec[stripe], [obj |-> oldVal, desc |-> desc])] + ELSE toDec + /\ UNCHANGED <> + +\* ============================================================================ +\* Phase 3: Overflow Handling (separate actions that can block) +\* ============================================================================ + +\* Handle increment overflow: merge increment buffers when lock is available +\* This merges ALL increment buffers (for all stripes), not just the one that overflowed +MutatorWriteMergeInc(thread) == + LET stripe == GetStripe(thread) + IN + /\ \E s \in 0..(NumStripes-1): toIncLen[s] >= QueueSize \* Some stripe has increment overflow + /\ globalLock = NULL \* Lock must be available (blocks if held) + /\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0] + /\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>] + /\ rc' = \* Compute RC from LogicalRC of current graph (increment buffers merged) + \* The graph is already updated by atomic store, so we compute from current edges + [x \in Objects |-> + LET internalRC == Cardinality({src \in Objects : edges[src][x]}) + externalRC == Cardinality({t \in Threads : roots[t][x]}) + IN internalRC + externalRC] + /\ globalLock' = NULL \* Release lock after merge + /\ UNCHANGED <> + +\* Handle decrement overflow: merge ALL buffers when lock is available +\* This calls collectCycles() which merges both increment and decrement buffers +\* We inline MergePendingRoots here. The entire withLock block is atomic: +\* lock is acquired, merge happens, lock is released. +MutatorWriteMergeDec(thread) == + LET stripe == GetStripe(thread) + IN + /\ \E s \in 0..(NumStripes-1): toDecLen[s] >= QueueSize \* Some stripe has decrement overflow + /\ globalLock = NULL \* Lock must be available (blocks if held) + /\ \* Merge all buffers (inlined MergePendingRoots logic) + LET \* Compute new RC by merging all buffered increments and decrements + \* For each object, count buffered increments and decrements + bufferedInc == UNION {{toInc[s][i] : i \in 1..toIncLen[s]} : s \in 0..(NumStripes-1)} + bufferedDec == UNION {{toDec[s][i].obj : i \in 1..toDecLen[s]} : s \in 0..(NumStripes-1)} + \* Compute RC: current graph state (edges) + roots - buffered decrements + buffered increments + \* Actually, we compute from LogicalRC of current graph (buffers are merged) + newRC == [x \in Objects |-> + LET internalRC == Cardinality({src \in Objects : edges[src][x]}) + externalRC == Cardinality({t \in Threads : roots[t][x]}) + IN internalRC + externalRC] + \* Collect objects from decrement buffers for mergedRoots + newRootsSet == UNION {{toDec[s][i].obj : i \in 1..toDecLen[s]} : s \in 0..(NumStripes-1)} + newRootsSeq == IF newRootsSet = {} + THEN <<>> + ELSE LET ordered == CHOOSE f \in [1..Cardinality(newRootsSet) -> newRootsSet] : + \A i, j \in DOMAIN f : i # j => f[i] # f[j] + IN [i \in 1..Cardinality(newRootsSet) |-> ordered[i]] + IN + /\ rc' = newRC + /\ mergedRoots' = mergedRoots \o newRootsSeq + /\ inRoots' = [x \in Objects |-> + IF newRootsSet = {} + THEN inRoots[x] + ELSE LET rootObjs == UNION {{mergedRoots'[i].obj : i \in DOMAIN mergedRoots'}} + IN IF x \in rootObjs THEN TRUE ELSE inRoots[x]] + /\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0] + /\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>] + /\ toDecLen' = [s \in 0..(NumStripes-1) |-> 0] + /\ toDec' = [s \in 0..(NumStripes-1) |-> <<>>] + /\ globalLock' = NULL \* Lock acquired, merge done, lock released (entire withLock block is atomic) + /\ UNCHANGED <> + +\* ============================================================================ +\* Merge Operation: mergePendingRoots +\* ============================================================================ +\* Drains all stripe buffers under global lock. +\* Sequentially acquires each stripe's lockInc and lockDec to drain buffers. +\* This reconciles buffered RC adjustments with the current graph state. +\* +\* Key invariant: After merge, mergedRC = logicalRC (current graph + buffered changes) + +MergePendingRoots == + /\ globalLock # NULL + /\ LET + \* Count pending increments per object (across all stripes) + pendingInc == [x \in Objects |-> + Cardinality(UNION {{i \in DOMAIN toInc[s] : toInc[s][i] = x} : + s \in 0..(NumStripes-1)})] + \* Count pending decrements per object (across all stripes) + pendingDec == [x \in Objects |-> + Cardinality(UNION {{i \in DOMAIN toDec[s] : toDec[s][i].obj = x} : + s \in 0..(NumStripes-1)})] + \* After merge, RC should equal LogicalRC (current graph state) + \* The buffered changes compensate for graph changes that already happened, + \* so: mergedRC = currentRC + pendingInc - pendingDec = LogicalRC(current graph) + \* But to ensure correctness, we compute directly from the current graph: + newRC == [x \in Objects |-> + LogicalRC(x)] \* RC after merge equals logical RC of current graph + \* Add decremented objects to roots if not already there (check inRootsFlag) + \* Collect all new roots as a set, then convert to sequence + \* Build set by iterating over all (stripe, index) pairs + \* Use UNION with explicit per-stripe sets (avoiding function enumeration issues) + newRootsSet == UNION {UNION {IF inRoots[toDec[s][i].obj] = FALSE + THEN {[obj |-> toDec[s][i].obj, desc |-> toDec[s][i].desc]} + ELSE {} : i \in DOMAIN toDec[s]} : s \in 0..(NumStripes-1)} + newRootsSeq == IF newRootsSet = {} + THEN <<>> + ELSE LET ordered == CHOOSE f \in [1..Cardinality(newRootsSet) -> newRootsSet] : + \A i, j \in DOMAIN f : i # j => f[i] # f[j] + IN [i \in 1..Cardinality(newRootsSet) |-> ordered[i]] + IN + /\ rc' = newRC + /\ mergedRoots' = mergedRoots \o newRootsSeq \* Append new roots to sequence + /\ \* Update inRoots: mark objects in mergedRoots' as being in roots + \* Use explicit iteration to avoid enumeration issues + inRoots' = [x \in Objects |-> + IF mergedRoots' = <<>> + THEN inRoots[x] + ELSE LET rootObjs == UNION {{mergedRoots'[i].obj : i \in DOMAIN mergedRoots'}} + IN IF x \in rootObjs THEN TRUE ELSE inRoots[x]] + /\ toIncLen' = [s \in 0..(NumStripes-1) |-> 0] + /\ toInc' = [s \in 0..(NumStripes-1) |-> <<>>] + /\ toDecLen' = [s \in 0..(NumStripes-1) |-> 0] + /\ toDec' = [s \in 0..(NumStripes-1) |-> <<>>] + /\ UNCHANGED <> + +\* ============================================================================ +\* Trial Deletion: markGray +\* ============================================================================ +\* Subtracts internal (heap-to-heap) edges from reference counts. +\* This isolates external references (stack roots). +\* +\* Algorithm: +\* 1. Mark obj gray +\* 2. Trace obj's fields (via traceImpl) +\* 3. For each child c: decrement c.rc (subtract internal edge) +\* 4. Recursively markGray all children +\* +\* After markGray: trialRC(obj) = mergedRC(obj) - internalRefCount(obj) +\* = externalRefCount(obj) (if merge was correct) + +MarkGray(obj, desc) == + /\ globalLock # NULL + /\ collecting = TRUE + /\ color[obj] # colGray + /\ \* Compute transitive closure of all objects reachable from obj + \* This models the recursive traversal in the actual implementation + LET children == {c \in Objects : edges[obj][c]} + \* Compute all objects reachable from obj via heap edges + \* This is the transitive closure starting from obj's direct children + allReachable == {c \in Objects : + \E path \in Seq(Objects): + Len(path) > 0 /\ + path[1] \in children /\ + path[Len(path)] = c /\ + \A i \in 1..(Len(path)-1): + edges[path[i]][path[i+1]]} + \* All objects to mark gray: obj itself + all reachable descendants + objectsToMarkGray == {obj} \cup allReachable + \* For each reachable object, count internal edges pointing to it + \* from within the subgraph (obj + allReachable) + \* This is the number of times its RC should be decremented + subgraph == {obj} \cup allReachable + internalEdgeCount == [x \in Objects |-> + IF x \in allReachable + THEN Cardinality({y \in subgraph : edges[y][x]}) + ELSE 0] + IN + /\ \* Mark obj and all reachable objects gray + color' = [x \in Objects |-> + IF x \in objectsToMarkGray THEN colGray ELSE color[x]] + /\ \* Subtract internal edges: for each reachable object, decrement its RC + \* by the number of internal edges pointing to it from within the subgraph. + \* This matches the Nim implementation which decrements once per edge traversed. + \* Note: obj's RC is not decremented here (it has no parent in this subgraph). + \* For roots, the RC includes external refs which survive trial deletion. + rc' = [x \in Objects |-> + IF x \in allReachable THEN rc[x] - internalEdgeCount[x] ELSE rc[x]] + /\ UNCHANGED <> + +\* ============================================================================ +\* Scan Phase +\* ============================================================================ +\* Objects with RC >= 0 after trial deletion are rescued (scanBlack). +\* Objects with RC < 0 remain white (part of closed cycle). +\* +\* Key insight: scanBlack follows the *current* physical edges (which may have +\* changed since merge due to concurrent writes). This ensures objects written +\* during collection are still rescued. +\* +\* Algorithm: +\* IF rc[obj] >= 0: +\* scanBlack(obj): mark black, restore RC, trace and rescue all children +\* ELSE: +\* mark white (closed cycle with zero external refs) + +Scan(obj, desc) == + /\ globalLock # NULL + /\ collecting = TRUE + /\ color[obj] = colGray + /\ IF rc[obj] >= 0 + THEN \* scanBlack: rescue obj and all reachable objects + \* This follows the current physical graph (atomic stores are visible) + \* Restore RC for all reachable objects by incrementing by the number of + \* internal edges pointing to each (matching what markGray subtracted) + LET children == {c \in Objects : edges[obj][c]} + allReachable == {c \in Objects : + \E path \in Seq(Objects): + Len(path) > 0 /\ + path[1] \in children /\ + path[Len(path)] = c /\ + \A i \in 1..(Len(path)-1): + edges[path[i]][path[i+1]]} + objectsToMarkBlack == {obj} \cup allReachable + \* For each reachable object, count internal edges pointing to it + \* from within the subgraph (obj + allReachable) + \* This is the number of times its RC should be incremented (restored) + subgraph == {obj} \cup allReachable + internalEdgeCount == [x \in Objects |-> + IF x \in allReachable + THEN Cardinality({y \in subgraph : edges[y][x]}) + ELSE 0] + IN + /\ \* Restore RC: increment by the number of internal edges pointing to each + \* reachable object. This restores what markGray subtracted. + \* Note: obj's RC is not incremented here (it wasn't decremented in markGray). + \* The root's RC already reflects external refs which survived trial deletion. + rc' = [x \in Objects |-> + IF x \in allReachable THEN rc[x] + internalEdgeCount[x] ELSE rc[x]] + /\ \* Mark obj and all reachable objects black in one assignment + color' = [x \in Objects |-> + IF x \in objectsToMarkBlack THEN colBlack ELSE color[x]] + ELSE \* Mark white (part of closed cycle) + /\ color' = [color EXCEPT ![obj] = colWhite] + /\ UNCHANGED <> + /\ UNCHANGED <> + +\* ============================================================================ +\* Collection Phase: collectColor +\* ============================================================================ +\* Frees objects of the target color that are not in roots. +\* +\* Safety: Only objects with color = targetColor AND ~inRoots[obj] are freed. +\* These are closed cycles (zero external refs, not reachable from roots). + +CollectColor(obj, desc, targetColor) == + /\ globalLock # NULL + /\ collecting = TRUE + /\ color[obj] = targetColor + /\ ~inRoots[obj] + /\ \* Free obj: nullify all its outgoing edges (prevents use-after-free) + \* In the actual implementation, this happens during trace() when freeing + edges' = [edges EXCEPT ![obj] = [x \in Objects |-> + IF x = obj THEN FALSE ELSE edges[obj][x]]] + /\ color' = [color EXCEPT ![obj] = colBlack] \* Mark as freed + /\ UNCHANGED <> + +\* ============================================================================ +\* Collection Cycle: collectCyclesBacon +\* ============================================================================ + +StartCollection == + /\ globalLock # NULL + /\ ~collecting + /\ Len(mergedRoots) >= RootsThreshold + /\ collecting' = TRUE + /\ gcEnv' = [touched |-> 0, edges |-> 0, rcSum |-> 0, toFree |-> {}] + /\ UNCHANGED <> + +EndCollection == + /\ globalLock # NULL + /\ collecting = TRUE + /\ \* Clear root flags + inRoots' = [x \in Objects |-> + IF x \in {r.obj : r \in mergedRoots} THEN FALSE ELSE inRoots[x]] + /\ mergedRoots' = <<>> + /\ collecting' = FALSE + /\ UNCHANGED <> + +\* ============================================================================ +\* Mutator Actions +\* ============================================================================ + +\* Mutator can write at any time (graph updates are lock-free) +\* The ATOMIC_RELEASE barrier ensures proper ordering +\* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always +\* matches the current graph state (as read before the atomic store). +\* This prevents races at the user level - the GC itself is lock-free. +MutatorWrite(thread, destObj, destField, oldVal, newVal, desc) == + \* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always matches + \* the value read before the atomic store. This prevents races at the user level. + \* The precondition is enforced in the Next relation. + \* Phase 1: Atomic store (topology update) - ALWAYS happens first + /\ MutatorWriteAtomicStore(thread, destObj, destField, oldVal, newVal, desc) + \* Phase 2: RC buffering - happens if no overflow, otherwise overflow is handled separately + \* Note: In reality, if overflow happens, the thread blocks waiting for lock. + \* We model this as: atomic store happens, buffering is deferred (handled by merge actions). + /\ LET stripe == GetStripe(thread) + incOverflow == (newVal # NULL) /\ (toIncLen[stripe] >= QueueSize) + decOverflow == (oldVal # NULL) /\ (toDecLen[stripe] >= QueueSize) + IN + IF incOverflow \/ decOverflow + THEN \* Overflow: atomic store happened, but buffering is deferred + \* Buffers stay full, merge will happen when lock is available (via MutatorWriteMergeInc/Dec) + /\ UNCHANGED <> + ELSE \* No overflow: buffer normally + /\ WriteBarrier(thread, destObj, destField, oldVal, newVal, desc) + /\ UNCHANGED <> + +\* Stack root assignment: immediate RC increment (not buffered) +\* When assigning val to a root variable named obj, we set roots[thread][val] = TRUE +\* to indicate that thread has a stack reference to val +\* Semantics: obj is root variable name, val is the object being assigned +\* When val=NULL, obj was the old root value, so we decrement rc[obj] +MutatorRootAssign(thread, obj, val) == + /\ IF val # NULL + THEN /\ roots' = [roots EXCEPT ![thread][val] = TRUE] + /\ rc' = [rc EXCEPT ![val] = IF roots[thread][val] THEN @ ELSE @ + 1] \* Increment only if not already a root + ELSE /\ roots' = [roots EXCEPT ![thread][obj] = FALSE] \* Clear root when assigning NULL + /\ rc' = [rc EXCEPT ![obj] = IF roots[thread][obj] THEN @ - 1 ELSE @] \* Decrement old root value + /\ edges' = edges + /\ color' = color + /\ inRoots' = inRoots + /\ toIncLen' = toIncLen + /\ toInc' = toInc + /\ toDecLen' = toDecLen + /\ toDec' = toDec + /\ lockInc' = lockInc + /\ lockDec' = lockDec + /\ globalLock' = globalLock + /\ mergedRoots' = mergedRoots + /\ collecting' = collecting + /\ gcEnv' = gcEnv + /\ pendingWrites' = pendingWrites + +\* ============================================================================ +\* Collector Actions +\* ============================================================================ + +\* Collector acquires global lock for entire collection cycle +CollectorAcquireLock(thread) == + /\ globalLock = NULL + /\ globalLock' = thread + /\ UNCHANGED <> + +CollectorMerge == + /\ globalLock # NULL + /\ MergePendingRoots + +CollectorStart == + /\ globalLock # NULL + /\ StartCollection + +\* Mark all roots gray (trial deletion phase) +CollectorMarkGray == + /\ globalLock # NULL + /\ collecting = TRUE + /\ \E rootIdx \in DOMAIN mergedRoots: + LET root == mergedRoots[rootIdx] + IN MarkGray(root.obj, root.desc) + +\* Scan all roots (rescue phase) +CollectorScan == + /\ globalLock # NULL + /\ collecting = TRUE + /\ \E rootIdx \in DOMAIN mergedRoots: + LET root == mergedRoots[rootIdx] + IN Scan(root.obj, root.desc) + +\* Collect white/gray objects (free phase) +CollectorCollect == + /\ globalLock # NULL + /\ collecting = TRUE + /\ \E rootIdx \in DOMAIN mergedRoots, targetColor \in {colGray, colWhite}: + LET root == mergedRoots[rootIdx] + IN CollectColor(root.obj, root.desc, targetColor) + +CollectorEnd == + /\ globalLock # NULL + /\ EndCollection + +CollectorReleaseLock(thread) == + /\ globalLock = thread + /\ globalLock' = NULL + /\ UNCHANGED <> + +\* ============================================================================ +\* Next State Relation +\* ============================================================================ + +Next == + \/ \E thread \in Threads: + \E destObj \in Objects, oldVal, newVal \in Objects \cup {NULL}, desc \in ObjTypes: + \* Precondition: oldVal must match current graph state (user-level synchronization) + \* ASSUMPTION: Users synchronize pointer assignments with locks, so oldVal always matches + \* the value read before the atomic store. This prevents races at the user level. + /\ LET oldValMatches == CASE oldVal = NULL -> TRUE + [] oldVal \in Objects -> edges[destObj][oldVal] + [] OTHER -> FALSE + IN oldValMatches + /\ MutatorWrite(thread, destObj, "field", oldVal, newVal, desc) + \/ \E thread \in Threads: + \* Handle increment overflow: merge increment buffers when lock becomes available + MutatorWriteMergeInc(thread) + \/ \E thread \in Threads: + \* Handle decrement overflow: merge all buffers when lock becomes available + MutatorWriteMergeDec(thread) + \/ \E thread \in Threads: + \E obj, val \in Objects \cup {NULL}: + MutatorRootAssign(thread, obj, val) + \/ \E thread \in Threads: + CollectorAcquireLock(thread) + \/ CollectorMerge + \/ CollectorStart + \/ CollectorMarkGray + \/ CollectorScan + \/ CollectorCollect + \/ CollectorEnd + \/ \E thread \in Threads: + CollectorReleaseLock(thread) + +\* ============================================================================ +\* Initial State +\* ============================================================================ + +Init == + /\ edges = [x \in Objects |-> + [y \in Objects |-> + IF x = y THEN FALSE ELSE FALSE]] \* Empty graph initially + /\ roots = [t \in Threads |-> + [x \in Objects |-> + FALSE]] \* No stack roots initially + /\ rc = [x \in Objects |-> + 0] \* Zero reference counts + /\ color = [x \in Objects |-> + colBlack] \* All objects black initially + /\ inRoots = [x \in Objects |-> + FALSE] \* No objects in roots array + /\ toIncLen = [s \in 0..(NumStripes-1) |-> + 0] + /\ toInc = [s \in 0..(NumStripes-1) |-> + <<>>] + /\ toDecLen = [s \in 0..(NumStripes-1) |-> + 0] + /\ toDec = [s \in 0..(NumStripes-1) |-> + <<>>] + /\ lockInc = [s \in 0..(NumStripes-1) |-> + NULL] + /\ lockDec = [s \in 0..(NumStripes-1) |-> + NULL] + /\ globalLock = NULL + /\ mergedRoots = <<>> + /\ collecting = FALSE + /\ gcEnv = [touched |-> 0, edges |-> 0, rcSum |-> 0, toFree |-> {}] + /\ pendingWrites = {} + /\ TypeOK + +\* ============================================================================ +\* Safety Properties +\* ============================================================================ + +\* Safety: Objects are only freed if they are unreachable from any thread's stack +\* +\* An object is reachable if: +\* - It is a direct stack root (roots[t][obj] = TRUE), OR +\* - There exists a path from a stack root to obj via heap edges +\* +\* Safety guarantee: If an object is reachable, then: +\* - It is not white (not marked for collection), OR +\* - It is in roots array (protected from collection), OR +\* - It is reachable from an object that will be rescued by scanBlack +\* +\* More precisely: Only closed cycles (zero external refs, unreachable) are freed. + +\* Helper: Compute next set of reachable objects (one step of transitive closure) +ReachableStep(current) == + current \cup UNION {{y \in Objects : edges[x][y]} : x \in current} + +\* Compute the set of all reachable objects using bounded iteration +\* Since Objects is finite, we iterate at most Cardinality(Objects) times +\* This computes the transitive closure of edges starting from stack roots +\* We unroll the iteration explicitly to avoid recursion issues with TLC +ReachableSet == + LET StackRoots == {x \in Objects : \E t \in Threads : roots[t][x]} + Step1 == ReachableStep(StackRoots) + Step2 == ReachableStep(Step1) + Step3 == ReachableStep(Step2) + Step4 == ReachableStep(Step3) + \* Add more steps if needed for larger object sets + \* For small models (2 objects), 4 steps is sufficient + IN Step4 + +\* Check if an object is reachable +Reachable(obj) == obj \in ReachableSet + +\* Helper: Check if there's a path from 'from' to 'to' +\* For small object sets, we check all possible paths by checking +\* all combinations of intermediate objects +\* Path of length 0: from = to +\* Path of length 1: edges[from][to] +\* Path of length 2: \E i1: edges[from][i1] /\ edges[i1][to] +\* Path of length 3: \E i1, i2: edges[from][i1] /\ edges[i1][i2] /\ edges[i2][to] +\* etc. up to Cardinality(Objects) +HasPath(from, to) == + \/ from = to + \/ edges[from][to] + \/ \E i1 \in Objects: + edges[from][i1] /\ (edges[i1][to] \/ \E i2 \in Objects: + edges[i1][i2] /\ (edges[i2][to] \/ \E i3 \in Objects: + edges[i2][i3] /\ edges[i3][to])) + +\* Helper: Compute set of objects reachable from a given starting object +\* Uses the same iterative approach as ReachableSet +ReachableFrom(start) == + LET Step1 == ReachableStep({start}) + Step2 == ReachableStep(Step1) + Step3 == ReachableStep(Step2) + Step4 == ReachableStep(Step3) + IN Step4 + +\* Safety: Reachable objects are never freed (remain white without being collected) +\* A reachable object is safe if: +\* - It's not white (not marked for collection), OR +\* - It's in roots array (protected from collection), OR +\* - There exists a black object in ReachableSet such that obj is reachable from it +\* (the black object will be rescued by scanBlack, which rescues all white objects +\* reachable from black objects) +Safety == + \A obj \in Objects: + IF obj \in ReachableSet + THEN \/ color[obj] # colWhite \* Not marked for collection + \/ inRoots[obj] \* Protected in roots array + \/ \E blackObj \in ReachableSet: + /\ color[blackObj] = colBlack \* Black object will be rescued by scanBlack + /\ obj \in ReachableFrom(blackObj) \* obj is reachable from blackObj + ELSE TRUE \* Unreachable objects may be freed (this is safe) + +\* Invariant: Reference counts match logical counts after merge +\* (This is maintained by MergePendingRoots) +\* Note: Between merge and collection, RC = logicalRC. +\* During collection (after markGray), RC may be modified by trial deletion. +\* RC may be inconsistent when: +\* - globalLock = NULL (buffered changes pending) +\* - globalLock # NULL but merge hasn't happened yet (buffers still have pending changes) +\* RC must equal LogicalRC when: +\* - After merge (buffers are empty) and before collection starts +RCInvariant == + IF globalLock = NULL + THEN TRUE \* Not in collection, RC may be inconsistent (buffered changes pending) + ELSE IF collecting = FALSE /\ \A s \in 0..(NumStripes-1): toIncLen[s] = 0 /\ toDecLen[s] = 0 + THEN \A obj \in Objects: rc[obj] = LogicalRC(obj) \* After merge, buffers empty, RC = logical RC + ELSE TRUE \* During collection or before merge, RC may differ from logicalRC + +\* Invariant: Only closed cycles are collected +\* (Objects with external refs are rescued by scanBlack) +CycleInvariant == + \A obj \in Objects: + IF color[obj] = colWhite /\ ~inRoots[obj] + THEN ExternalRC(obj) = 0 + ELSE TRUE + +\* ============================================================================ +\* Specification +\* ============================================================================ + +Spec == Init /\ [][Next]_<> + +THEOREM Spec => []Safety +THEOREM Spec => []RCInvariant +THEOREM Spec => []CycleInvariant + +==== diff --git a/tests/yrc/tyrc_cas_race.nim b/tests/yrc/tyrc_cas_race.nim new file mode 100644 index 0000000000..6e7dde9bff --- /dev/null +++ b/tests/yrc/tyrc_cas_race.nim @@ -0,0 +1,98 @@ +discard """ + cmd: "nim c --mm:yrc -d:useMalloc --threads:on $file" + output: "ok" + valgrind: "leaks" + disabled: "windows" + disabled: "freebsd" + disabled: "openbsd" +""" + +# Test concurrent traversal and mutation of a shared cyclic list under YRC. +# Multiple threads race to replace nodes using a lock for synchronization. +# This exercises YRC's write barrier and cycle collection under contention. + +import std/locks + +type + Node = ref object + value: int + next: Node + +proc newCycle(start, count: int): Node = + result = Node(value: start) + var cur = result + for i in 1.. 1 -> 2 -> 3 -> 4 -> 5 -> 0 +initLock(sharedLock) +shared = newCycle(0, CycleLen) + +for i in 0.. 0 and seen > 0: + echo "ok" +else: + echo "FAIL: wins=", totalWins, " seen=", seen diff --git a/tests/yrc/tyrc_shared_cycle.nim b/tests/yrc/tyrc_shared_cycle.nim new file mode 100644 index 0000000000..969f2f5583 --- /dev/null +++ b/tests/yrc/tyrc_shared_cycle.nim @@ -0,0 +1,74 @@ +discard """ + cmd: "nim c --mm:yrc -d:useMalloc --threads:on $file" + output: "ok" + valgrind: "leaks" + disabled: "windows" + disabled: "freebsd" + disabled: "openbsd" +""" + +# Test sharing a cyclic list between threads under YRC. + +type + Node = ref object + value: int + next: Node + +proc newCycle(start, count: int): Node = + ## Create a cyclic linked list: start -> start+1 -> ... -> start+count-1 -> start + result = Node(value: start) + var cur = result + for i in 1.. 1 -> 2 -> 3 -> 4 -> 0 +shared = newCycle(0, NodesPerCycle) +let expectedSharedSum = 0 + 1 + 2 + 3 + 4 # = 10 + +for i in 0..