diff --git a/lib/system/gc2.nim b/lib/system/gc2.nim index 4ca0d144fc..78901b1dc9 100644 --- a/lib/system/gc2.nim +++ b/lib/system/gc2.nim @@ -1,7 +1,7 @@ # # # Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2015 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -9,13 +9,13 @@ # Garbage Collector # -# The basic algorithm is *Deferrent Reference Counting* with cycle detection. -# This is achieved by combining a Deutsch-Bobrow garbage collector -# together with Christoper's partial mark-sweep garbage collector. -# -# Special care has been taken to avoid recursion as far as possible to avoid -# stack overflows when traversing deep datastructures. It is well-suited -# for soft real time applications (like games). +# The basic algorithm is *Deferred Reference Counting* with an incremental mark +# and sweep GC to free cycles. It is hard realtime in that if you play +# according to its rules, no deadline will ever be missed. + +when defined(nimCoroutines): + import arch + {.push profiler:off.} const @@ -29,82 +29,30 @@ const when withRealTime and not declared(getTicks): include "system/timers" when defined(memProfiler): - proc nimProfile(requestedSize: int) + proc nimProfile(requestedSize: int) {.benign.} const - rcShift = 6 # the reference count is shifted so we can use - # the least significat bits for additinal flags: - - rcAlive = 0b00000 # object is reachable. - # color *black* in the original paper - - rcCycleCandidate = 0b00001 # possible root of a cycle. *purple* - - rcDecRefApplied = 0b00010 # the first dec-ref phase of the - # collector was already applied to this - # object. *gray* - - rcMaybeDead = 0b00011 # this object is a candidate for deletion - # during the collect cycles algorithm. - # *white*. - - rcReallyDead = 0b00100 # this is proved to be garbage - - rcRetiredBuffer = 0b00101 # this is a seq or string buffer that - # was replaced by a resize operation. - # see growObj for details - - rcColorMask = RefCount(0b00111) - - rcZct = 0b01000 # already added to ZCT - rcInCycleRoots = 0b10000 # already buffered as cycle candidate - rcHasStackRef = 0b100000 # the object had a stack ref in the last - # cycle collection - - rcMarkBit = rcHasStackRef # this is currently used for leak detection - # when traceGC is on - - rcBufferedAnywhere = rcZct or rcInCycleRoots - - rcIncrement = 1 shl rcShift # don't touch the color bits - -const - NewObjectsAreCycleRoots = true - # the alternative is to use the old strategy of adding cycle roots - # in incRef (in the compiler itself, this doesn't change much) - - IncRefRemovesCandidates = false - # this is safe only if we can reliably track the fact that the object - # has stack references. This could be easily done by adding another bit - # to the refcount field and setting it up in unmarkStackAndRegisters. - # The bit must also be set for new objects that are not rc1 and it must be - # examined in the decref loop in collectCycles. - # XXX: not implemented yet as tests didn't show any improvement from this - - MarkingSkipsAcyclicObjects = true - # Acyclic objects can be safely ignored in the mark and scan phases, - # because they cannot contribute to the internal count. - # XXX: if we generate specialized `markCyclic` and `markAcyclic` - # procs we can further optimize this as there won't be need for any - # checks in the code - - MinimumStackMarking = false - # Try to scan only the user stack and ignore the part of the stack - # belonging to the GC itself. see setStackTop for further info. - # XXX: still has problems in release mode in the compiler itself. - # investigate how it affects growObj - - CollectCyclesStats = false - + rcIncrement = 0b1000 # so that lowest 3 bits are not touched + rcWhite = 0b000 # cell is colored white + rcGrey = 0b001 # traditional color for incremental mark&sweep + rcBlack = 0b010 # marked as alive; direct children also have been processed + rcUnused = 0b011 + ZctFlag = 0b100 # in ZCT + rcShift = 3 # shift by rcShift to get the reference counter + colorMask = 0b011 type WalkOp = enum - waPush + waMarkGlobal, # part of the backup mark&sweep + waMarkGrey, + waZctDecRef #, waDebug - Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall.} + Phase {.pure.} = enum + None, Marking, Sweeping + Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.} # A ref type can have a finalizer that is called before the object's # storage is freed. - GcStat {.final, pure.} = object + GcStat = object stackScans: int # number of performed stack scans (for statistics) cycleCollections: int # number of performed full collections maxThreshold: int # max threshold that has been set @@ -113,373 +61,37 @@ type cycleTableSize: int # max entries in cycle table maxPause: int64 # max measured GC pause in nanoseconds - GcHeap {.final, pure.} = object # this contains the zero count and - # non-zero count table + GcStack = object + prev: ptr GcStack + next: ptr GcStack + starts: pointer + pos: pointer + maxStackSize: int + + GcHeap = object # this contains the zero count and + # non-zero count table + stack: ptr GcStack stackBottom: pointer - stackTop: pointer + phase: Phase cycleThreshold: int + when useCellIds: + idGenerator: int zct: CellSeq # the zero count table decStack: CellSeq # cells in the stack that are to decref again - cycleRoots: CellSeq - tempStack: CellSeq # temporary stack for recursion elimination - freeStack: CellSeq # objects ready to be freed + greyStack: CellSeq recGcLock: int # prevent recursion via finalizers; no thread lock - cycleRootsTrimIdx: int # Trimming is a light-weight collection of the - # cycle roots table that uses a cheap linear scan - # to find only possitively dead objects. - # One strategy is to perform it only for new objects - # allocated between the invocations of collectZCT. - # This index indicates the start of the range of - # such new objects within the table. when withRealTime: maxPause: Nanos # max allowed pause in nanoseconds; active if > 0 region: MemRegion # garbage collected region stat: GcStat -{.deprecated: [TWalkOp: WalkOp, TFinalizer: Finalizer, TGcStat: GcStat, - TGcHeap: GcHeap].} + additionalRoots: CellSeq # dummy roots for GC_ref/unref + var - gch* {.rtlThreadVar.}: GcHeap + gch {.rtlThreadVar.}: GcHeap when not defined(useNimRtl): instantiateForRegion(gch.region) -template acquire(gch: GcHeap) = - when hasThreadSupport and hasSharedHeap: - AcquireSys(HeapLock) - -template release(gch: GcHeap) = - when hasThreadSupport and hasSharedHeap: - releaseSys(HeapLock) - -template setColor(c: PCell, color) = - c.refcount = (c.refcount and not rcColorMask) or color - -template color(c: PCell): expr = - c.refcount and rcColorMask - -template isBitDown(c: PCell, bit): expr = - (c.refcount and bit) == 0 - -template isBitUp(c: PCell, bit): expr = - (c.refcount and bit) != 0 - -template setBit(c: PCell, bit): expr = - c.refcount = c.refcount or bit - -template isDead(c: Pcell): expr = - c.isBitUp(rcReallyDead) # also covers rcRetiredBuffer - -template clearBit(c: PCell, bit): expr = - c.refcount = c.refcount and (not RefCount(bit)) - -when debugGC: - var gcCollectionIdx = 0 - - proc colorStr(c: PCell): cstring = - let color = c.color - case color - of rcAlive: return "alive" - of rcMaybeDead: return "maybedead" - of rcCycleCandidate: return "candidate" - of rcDecRefApplied: return "marked" - of rcRetiredBuffer: return "retired" - of rcReallyDead: return "dead" - else: return "unknown?" - - proc inCycleRootsStr(c: PCell): cstring = - if c.isBitUp(rcInCycleRoots): result = "cycleroot" - else: result = "" - - proc inZctStr(c: PCell): cstring = - if c.isBitUp(rcZct): result = "zct" - else: result = "" - - proc writeCell*(msg: CString, c: PCell, force = false) = - var kind = -1 - if c.typ != nil: kind = ord(c.typ.kind) - when trackAllocationSource: - c_fprintf(c_stdout, "[GC %d] %s: %p %d rc=%ld %s %s %s from %s(%ld)\n", - gcCollectionIdx, - msg, c, kind, c.refcount shr rcShift, - c.colorStr, c.inCycleRootsStr, c.inZctStr, - c.filename, c.line) - else: - c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld\n", - msg, c, kind, c.refcount shr rcShift) - -proc addZCT(zct: var CellSeq, c: PCell) {.noinline.} = - if c.isBitDown(rcZct): - c.setBit rcZct - zct.add c - -template setStackTop(gch) = - # This must be called immediately after we enter the GC code - # to minimize the size of the scanned stack. The stack consumed - # by the GC procs may amount to 200-400 bytes depending on the - # build settings and this contributes to false-positives - # in the conservative stack marking - when MinimumStackMarking: - var stackTop {.volatile.}: pointer - gch.stackTop = addr(stackTop) - -template addCycleRoot(cycleRoots: var CellSeq, c: PCell) = - if c.color != rcCycleCandidate: - c.setColor rcCycleCandidate - - # the object may be buffered already. for example, consider: - # decref; incref; decref - if c.isBitDown(rcInCycleRoots): - c.setBit rcInCycleRoots - cycleRoots.add c - -proc cellToUsr(cell: PCell): pointer {.inline.} = - # convert object (=pointer to refcount) to pointer to userdata - result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell))) - -proc usrToCell*(usr: pointer): PCell {.inline.} = - # convert pointer to userdata to object (=pointer to refcount) - result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell))) - -proc canbeCycleRoot(c: PCell): bool {.inline.} = - result = ntfAcyclic notin c.typ.flags - -proc extGetCellType(c: pointer): PNimType {.compilerproc.} = - # used for code generation concerning debugging - result = usrToCell(c).typ - -proc internRefcount(p: pointer): int {.exportc: "getRefcount".} = - result = int(usrToCell(p).refcount) shr rcShift - -# this that has to equals zero, otherwise we have to round up UnitsPerPage: -when BitsPerPage mod (sizeof(int)*8) != 0: - {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} - -# forward declarations: -proc collectCT(gch: var GcHeap) -proc isOnStack*(p: pointer): bool {.noinline.} -proc forAllChildren(cell: PCell, op: WalkOp) -proc doOperation(p: pointer, op: WalkOp) -proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) -# we need the prototype here for debugging purposes - -proc prepareDealloc(cell: PCell) = - if cell.typ.finalizer != nil: - # the finalizer could invoke something that - # allocates memory; this could trigger a garbage - # collection. Since we are already collecting we - # prevend recursive entering here by a lock. - # XXX: we should set the cell's children to nil! - inc(gch.recGcLock) - (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell)) - dec(gch.recGcLock) - -when traceGC: - # traceGC is a special switch to enable extensive debugging - type - CellState = enum - csAllocated, csFreed - {.deprecated: [TCellState: CellState].} - var - states: array[CellState, CellSet] - - proc traceCell(c: PCell, state: CellState) = - case state - of csAllocated: - if c in states[csAllocated]: - writeCell("attempt to alloc an already allocated cell", c) - sysAssert(false, "traceCell 1") - excl(states[csFreed], c) - # writecell("allocated", c) - of csFreed: - if c in states[csFreed]: - writeCell("attempt to free a cell twice", c) - sysAssert(false, "traceCell 2") - if c notin states[csAllocated]: - writeCell("attempt to free not an allocated cell", c) - sysAssert(false, "traceCell 3") - excl(states[csAllocated], c) - # writecell("freed", c) - incl(states[state], c) - - proc computeCellWeight(c: PCell): int = - var x: CellSet - x.init - - let startLen = gch.tempStack.len - c.forAllChildren waPush - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if c in states[csFreed]: continue - inc result - if c notin x: - x.incl c - c.forAllChildren waPush - - template markChildrenRec(cell) = - let startLen = gch.tempStack.len - cell.forAllChildren waPush - let isMarked = cell.isBitUp(rcMarkBit) - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if c in states[csFreed]: continue - if c.isBitDown(rcMarkBit): - c.setBit rcMarkBit - c.forAllChildren waPush - if c.isBitUp(rcMarkBit) and not isMarked: - writecell("cyclic cell", cell) - cprintf "Weight %d\n", cell.computeCellWeight - - proc writeLeakage(onlyRoots: bool) = - if onlyRoots: - for c in elements(states[csAllocated]): - if c notin states[csFreed]: - markChildrenRec(c) - var f = 0 - var a = 0 - for c in elements(states[csAllocated]): - inc a - if c in states[csFreed]: inc f - elif c.isBitDown(rcMarkBit): - writeCell("leak", c) - cprintf "Weight %d\n", c.computeCellWeight - cfprintf(cstdout, "Allocations: %ld; freed: %ld\n", a, f) - -template gcTrace(cell, state: expr): stmt {.immediate.} = - when logGC: writeCell($state, cell) - when traceGC: traceCell(cell, state) - -template WithHeapLock(blk: stmt): stmt = - when hasThreadSupport and hasSharedHeap: AcquireSys(HeapLock) - blk - when hasThreadSupport and hasSharedHeap: ReleaseSys(HeapLock) - -proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = - # we MUST access gch as a global here, because this crosses DLL boundaries! - WithHeapLock: addCycleRoot(gch.cycleRoots, c) - -proc rtlAddZCT(c: PCell) {.rtl, inl.} = - # we MUST access gch as a global here, because this crosses DLL boundaries! - WithHeapLock: addZCT(gch.zct, c) - -type - CyclicMode = enum - Cyclic, - Acyclic, - MaybeCyclic - - ReleaseType = enum - AddToZTC - FreeImmediately - - HeapType = enum - LocalHeap - SharedHeap -{.deprecated: [TCyclicMode: CyclicMode, TReleaseType: ReleaseType, - THeapType: HeapType].} - -template `++` (rc: RefCount, heapType: HeapType): stmt = - when heapType == SharedHeap: - discard atomicInc(rc, rcIncrement) - else: - inc rc, rcIncrement - -template `--`(rc: RefCount): expr = - dec rc, rcIncrement - rc <% rcIncrement - -template `--` (rc: RefCount, heapType: HeapType): expr = - (when heapType == SharedHeap: atomicDec(rc, rcIncrement) <% rcIncrement else: --rc) - -template doDecRef(cc: PCell, - heapType = LocalHeap, - cycleFlag = MaybeCyclic): stmt = - var c = cc - sysAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr") - # XXX: move this elesewhere - - sysAssert(c.refcount >=% rcIncrement, "decRef") - if c.refcount--(heapType): - # this is the last reference from the heap - # add to a zero-count-table that will be matched against stack pointers - rtlAddZCT(c) - else: - when cycleFlag != Acyclic: - if cycleFlag == Cyclic or canBeCycleRoot(c): - # a cycle may have been broken - rtlAddCycleRoot(c) - -template doIncRef(cc: PCell, - heapType = LocalHeap, - cycleFlag = MaybeCyclic): stmt = - var c = cc - c.refcount++(heapType) - when cycleFlag != Acyclic: - when NewObjectsAreCycleRoots: - if canbeCycleRoot(c): - addCycleRoot(gch.cycleRoots, c) - elif IncRefRemovesCandidates: - c.setColor rcAlive - # XXX: this is not really atomic enough! - -proc nimGCref(p: pointer) {.compilerProc, inline.} = doIncRef(usrToCell(p)) -proc nimGCunref(p: pointer) {.compilerProc, inline.} = doDecRef(usrToCell(p)) - -proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} = - sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle") - var c = usrToCell(p) - sysAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr") - if c.refcount--(LocalHeap): - rtlAddZCT(c) - sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2") - sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5") - -template doAsgnRef(dest: PPointer, src: pointer, - heapType = LocalHeap, cycleFlag = MaybeCyclic): stmt = - sysAssert(not isOnStack(dest), "asgnRef") - # BUGFIX: first incRef then decRef! - if src != nil: doIncRef(usrToCell(src), heapType, cycleFlag) - if dest[] != nil: doDecRef(usrToCell(dest[]), heapType, cycleFlag) - dest[] = src - -proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} = - # the code generator calls this proc! - doAsgnRef(dest, src, LocalHeap, MaybeCyclic) - -proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} = - # the code generator calls this proc if it is known at compile time that no - # cycle is possible. - doAsgnRef(dest, src, LocalHeap, Acyclic) - -proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} = - # unsureAsgnRef updates the reference counters only if dest is not on the - # stack. It is used by the code generator if it cannot decide wether a - # reference is in the stack or not (this can happen for var parameters). - if not isOnStack(dest): - if src != nil: doIncRef(usrToCell(src)) - # XXX we must detect a shared heap here - # better idea may be to just eliminate the need for unsureAsgnRef - # - # XXX finally use assembler for the stack checking instead! - # the test for '!= nil' is correct, but I got tired of the segfaults - # resulting from the crappy stack checking: - if cast[int](dest[]) >=% PageSize: doDecRef(usrToCell(dest[])) - else: - # can't be an interior pointer if it's a stack location! - sysAssert(interiorAllocatedPtr(gch.region, dest)==nil, - "stack loc AND interior pointer") - dest[] = src - -when hasThreadSupport and hasSharedHeap: - # shared heap version of the above procs - proc asgnRefSh(dest: PPointer, src: pointer) {.compilerProc, inline.} = - doAsgnRef(dest, src, SharedHeap, MaybeCyclic) - - proc asgnRefNoCycleSh(dest: PPointer, src: pointer) {.compilerProc, inline.} = - doAsgnRef(dest, src, SharedHeap, Acyclic) - proc initGC() = when not defined(useNimRtl): when traceGC: @@ -493,26 +105,214 @@ proc initGC() = gch.stat.cycleTableSize = 0 # init the rt init(gch.zct) - init(gch.tempStack) - init(gch.freeStack) - init(gch.cycleRoots) init(gch.decStack) + init(gch.additionalRoots) + init(gch.greyStack) -proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) = +template gcAssert(cond: bool, msg: string) = + when defined(useGcAssert): + if not cond: + echo "[GCASSERT] ", msg + GC_disable() + writeStackTrace() + quit 1 + +proc addZCT(s: var CellSeq, c: PCell) {.noinline.} = + if (c.refcount and ZctFlag) == 0: + c.refcount = c.refcount or ZctFlag + add(s, c) + +proc cellToUsr(cell: PCell): pointer {.inline.} = + # convert object (=pointer to refcount) to pointer to userdata + result = cast[pointer](cast[ByteAddress](cell)+%ByteAddress(sizeof(Cell))) + +proc usrToCell(usr: pointer): PCell {.inline.} = + # convert pointer to userdata to object (=pointer to refcount) + result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell))) + +proc canBeCycleRoot(c: PCell): bool {.inline.} = + result = ntfAcyclic notin c.typ.flags + +proc extGetCellType(c: pointer): PNimType {.compilerproc.} = + # used for code generation concerning debugging + result = usrToCell(c).typ + +proc internRefcount(p: pointer): int {.exportc: "getRefcount".} = + result = int(usrToCell(p).refcount) shr rcShift + +# this that has to equals zero, otherwise we have to round up UnitsPerPage: +when BitsPerPage mod (sizeof(int)*8) != 0: + {.error: "(BitsPerPage mod BitsPerUnit) should be zero!".} + +template color(c): expr = c.refCount and colorMask +template setColor(c, col) = + when col == rcWhite: + c.refcount = c.refcount and not colorMask + else: + c.refcount = c.refcount and not colorMask or col + +proc writeCell(msg: cstring, c: PCell) = + var kind = -1 + if c.typ != nil: kind = ord(c.typ.kind) + when leakDetector: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld from %s(%ld)\n", + msg, c, kind, c.refcount shr rcShift, c.filename, c.line) + else: + c_fprintf(c_stdout, "[GC] %s: %p %d rc=%ld; color=%ld\n", + msg, c, kind, c.refcount shr rcShift, c.color) + +template gcTrace(cell, state: expr): stmt {.immediate.} = + when traceGC: traceCell(cell, state) + +# forward declarations: +proc collectCT(gch: var GcHeap) {.benign.} +proc isOnStack(p: pointer): bool {.noinline, benign.} +proc forAllChildren(cell: PCell, op: WalkOp) {.benign.} +proc doOperation(p: pointer, op: WalkOp) {.benign.} +proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.} +# we need the prototype here for debugging purposes + +when hasThreadSupport and hasSharedHeap: + template `--`(x: expr): expr = atomicDec(x, rcIncrement) <% rcIncrement + template `++`(x: expr): stmt = discard atomicInc(x, rcIncrement) +else: + template `--`(x: expr): expr = + dec(x, rcIncrement) + x <% rcIncrement + template `++`(x: expr): stmt = inc(x, rcIncrement) + +proc prepareDealloc(cell: PCell) = + if cell.typ.finalizer != nil: + # the finalizer could invoke something that + # allocates memory; this could trigger a garbage + # collection. Since we are already collecting we + # prevend recursive entering here by a lock. + # XXX: we should set the cell's children to nil! + inc(gch.recGcLock) + (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell)) + dec(gch.recGcLock) + +proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} = + # we MUST access gch as a global here, because this crosses DLL boundaries! + discard + +proc rtlAddZCT(c: PCell) {.rtl, inl.} = + # we MUST access gch as a global here, because this crosses DLL boundaries! + addZCT(gch.zct, c) + +proc decRef(c: PCell) {.inline.} = + gcAssert(isAllocatedPtr(gch.region, c), "decRef: interiorPtr") + gcAssert(c.refcount >=% rcIncrement, "decRef") + if --c.refcount: + rtlAddZCT(c) + +proc incRef(c: PCell) {.inline.} = + gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr") + c.refcount = c.refcount +% rcIncrement + +proc nimGCref(p: pointer) {.compilerProc.} = + let cell = usrToCell(p) + incRef(cell) + add(gch.additionalRoots, cell) + +proc nimGCunref(p: pointer) {.compilerProc.} = + let cell = usrToCell(p) + decRef(cell) + var L = gch.additionalRoots.len-1 + var i = L + let d = gch.additionalRoots.d + while i >= 0: + if d[i] == cell: + d[i] = d[L] + dec gch.additionalRoots.len + break + dec(i) + +template markGrey(x: PCell) = + if x.color == rcWhite and gch.phase == Phase.Marking: + x.setColor(rcGrey) + add(gch.greyStack, x) + +proc GC_addCycleRoot*[T](p: ref T) {.inline.} = + ## adds 'p' to the cycle candidate set for the cycle collector. It is + ## necessary if you used the 'acyclic' pragma for optimization + ## purposes and need to break cycles manually. + rtlAddCycleRoot(usrToCell(cast[pointer](p))) + +proc nimGCunrefNoCycle(p: pointer) {.compilerProc, inline.} = + sysAssert(allocInv(gch.region), "begin nimGCunrefNoCycle") + var c = usrToCell(p) + gcAssert(isAllocatedPtr(gch.region, c), "nimGCunrefNoCycle: isAllocatedPtr") + if --c.refcount: + rtlAddZCT(c) + sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 2") + sysAssert(allocInv(gch.region), "end nimGCunrefNoCycle 5") + +proc asgnRef(dest: PPointer, src: pointer) {.compilerProc, inline.} = + # the code generator calls this proc! + gcAssert(not isOnStack(dest), "asgnRef") + # BUGFIX: first incRef then decRef! + if src != nil: + let s = usrToCell(src) + incRef(s) + markGrey(s) + if dest[] != nil: decRef(usrToCell(dest[])) + dest[] = src + +proc asgnRefNoCycle(dest: PPointer, src: pointer) {.compilerProc, inline.} = + # the code generator calls this proc if it is known at compile time that no + # cycle is possible. + gcAssert(not isOnStack(dest), "asgnRefNoCycle") + if src != nil: + var c = usrToCell(src) + ++c.refcount + markGrey(c) + if dest[] != nil: + var c = usrToCell(dest[]) + if --c.refcount: + rtlAddZCT(c) + dest[] = src + +proc unsureAsgnRef(dest: PPointer, src: pointer) {.compilerProc.} = + # unsureAsgnRef updates the reference counters only if dest is not on the + # stack. It is used by the code generator if it cannot decide wether a + # reference is in the stack or not (this can happen for var parameters). + if not isOnStack(dest): + if src != nil: + let s = usrToCell(src) + incRef(s) + markGrey(s) + # XXX finally use assembler for the stack checking instead! + # the test for '!= nil' is correct, but I got tired of the segfaults + # resulting from the crappy stack checking: + if cast[int](dest[]) >=% PageSize: decRef(usrToCell(dest[])) + else: + # can't be an interior pointer if it's a stack location! + gcAssert(interiorAllocatedPtr(gch.region, dest) == nil, + "stack loc AND interior pointer") + dest[] = src + +type + GlobalMarkerProc = proc () {.nimcall, benign.} +var + globalMarkersLen: int + globalMarkers: array[0.. 7_000, GlobalMarkerProc] + +proc nimRegisterGlobalMarker(markerProc: GlobalMarkerProc) {.compilerProc.} = + if globalMarkersLen <= high(globalMarkers): + globalMarkers[globalMarkersLen] = markerProc + inc globalMarkersLen + else: + echo "[GC] cannot register global variable; too many global variables" + quit 1 + +proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} = var d = cast[ByteAddress](dest) case n.kind of nkSlot: forAllChildrenAux(cast[pointer](d +% n.offset), n.typ, op) of nkList: for i in 0..n.len-1: - # inlined for speed - if n.sons[i].kind == nkSlot: - if n.sons[i].typ.kind in {tyRef, tyString, tySequence}: - doOperation(cast[PPointer](d +% n.sons[i].offset)[], op) - else: - forAllChildrenAux(cast[pointer](d +% n.sons[i].offset), - n.sons[i].typ, op) - else: - forAllSlotsAux(dest, n.sons[i], op) + forAllSlotsAux(dest, n.sons[i], op) of nkCase: var m = selectBranch(dest, n) if m != nil: forAllSlotsAux(dest, m, op) @@ -533,9 +333,10 @@ proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) = else: discard proc forAllChildren(cell: PCell, op: WalkOp) = - sysAssert(cell != nil, "forAllChildren: 1") - sysAssert(cell.typ != nil, "forAllChildren: 2") - sysAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 3" + gcAssert(cell != nil, "forAllChildren: 1") + gcAssert(isAllocatedPtr(gch.region, cell), "forAllChildren: 2") + gcAssert(cell.typ != nil, "forAllChildren: 3") + gcAssert cell.typ.kind in {tyRef, tySequence, tyString}, "forAllChildren: 4" let marker = cell.typ.marker if marker != nil: marker(cellToUsr(cell), op.int) @@ -547,10 +348,9 @@ proc forAllChildren(cell: PCell, op: WalkOp) = var d = cast[ByteAddress](cellToUsr(cell)) var s = cast[PGenericSeq](d) if s != nil: - let baseAddr = d +% GenericSeqSize for i in 0..s.len-1: - forAllChildrenAux(cast[pointer](baseAddr +% i *% cell.typ.base.size), - cell.typ.base, op) + forAllChildrenAux(cast[pointer](d +% i *% cell.typ.base.size +% + GenericSeqSize), cell.typ.base, op) else: discard proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} = @@ -571,7 +371,7 @@ proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} = template replaceZctEntry(i: expr) = c = d[i] if c.refcount >=% rcIncrement: - c.clearBit(rcZct) + c.refcount = c.refcount and not ZctFlag d[i] = res return if L > 8: @@ -592,408 +392,302 @@ proc addNewObjToZCT(res: PCell, gch: var GcHeap) {.inline.} = for i in countdown(L-1, max(0, L-8)): var c = d[i] if c.refcount >=% rcIncrement: - c.clearBit(rcZct) + c.refcount = c.refcount and not ZctFlag d[i] = res return add(gch.zct, res) -proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap, rc1 = false): pointer = +{.push stackTrace: off, profiler:off.} +proc gcInvariant*() = + sysAssert(allocInv(gch.region), "injected") + when declared(markForDebug): + markForDebug(gch) +{.pop.} + +proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer = # generates a new object and sets its reference counter to 0 - acquire(gch) sysAssert(allocInv(gch.region), "rawNewObj begin") - sysAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") - + gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") collectCT(gch) - sysAssert(allocInv(gch.region), "rawNewObj after collect") - var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell))) - sysAssert(allocInv(gch.region), "rawNewObj after rawAlloc") - - sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2") - + gcAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2") + # now it is buffered in the ZCT res.typ = typ - - when trackAllocationSource and not hasThreadSupport: - if framePtr != nil and framePtr.prev != nil and framePtr.prev.prev != nil: - res.filename = framePtr.prev.prev.filename - res.line = framePtr.prev.prev.line - else: - res.filename = "nofile" - - if rc1: - res.refcount = rcIncrement # refcount is 1 - else: - # its refcount is zero, so add it to the ZCT: - res.refcount = rcZct - addNewObjToZCT(res, gch) - - if NewObjectsAreCycleRoots and canBeCycleRoot(res): - res.setBit(rcInCycleRoots) - res.setColor rcCycleCandidate - gch.cycleRoots.add res - + when leakDetector and not hasThreadSupport: + if framePtr != nil and framePtr.prev != nil: + res.filename = framePtr.prev.filename + res.line = framePtr.prev.line + # refcount is zero, color is black, but mark it to be in the ZCT + res.refcount = ZctFlag sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3") - + # its refcount is zero, so add it to the ZCT: + addNewObjToZCT(res, gch) when logGC: writeCell("new cell", res) gcTrace(res, csAllocated) - release(gch) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator result = cellToUsr(res) sysAssert(allocInv(gch.region), "rawNewObj end") {.pop.} -proc freeCell(gch: var GcHeap, c: PCell) = - # prepareDealloc(c) - gcTrace(c, csFreed) - - when reallyDealloc: rawDealloc(gch.region, c) - else: - sysAssert(c.typ != nil, "collectCycles") - zeroMem(c, sizeof(Cell)) - -template eraseAt(cells: var CellSeq, at: int): stmt = - cells.d[at] = cells.d[cells.len - 1] - dec cells.len - -template trimAt(roots: var CellSeq, at: int): stmt = - # This will remove a cycle root candidate during trimming. - # a candidate is removed either because it received a refup and - # it's no longer a candidate or because it received further refdowns - # and now it's dead for sure. - let c = roots.d[at] - c.clearBit(rcInCycleRoots) - roots.eraseAt(at) - if c.isBitUp(rcReallyDead) and c.refcount <% rcIncrement: - # This case covers both dead objects and retired buffers - # That's why we must also check the refcount (it may be - # kept possitive by stack references). - freeCell(gch, c) +proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} = + result = rawNewObj(typ, size, gch) + when defined(memProfiler): nimProfile(size) proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} = - setStackTop(gch) - result = rawNewObj(typ, size, gch, false) + result = rawNewObj(typ, size, gch) zeroMem(result, size) when defined(memProfiler): nimProfile(size) -proc newObjNoInit(typ: PNimType, size: int): pointer {.compilerRtl.} = - setStackTop(gch) - result = rawNewObj(typ, size, gch, false) - when defined(memProfiler): nimProfile(size) - proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} = - setStackTop(gch) # `newObj` already uses locks, so no need for them here. let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) result = newObj(typ, size) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).reserved = len + when defined(memProfiler): nimProfile(size) proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} = - setStackTop(gch) - result = rawNewObj(typ, size, gch, true) + # generates a new object and sets its reference counter to 1 + sysAssert(allocInv(gch.region), "newObjRC1 begin") + gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") + collectCT(gch) + sysAssert(allocInv(gch.region), "newObjRC1 after collectCT") + + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell))) + sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc") + sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "newObj: 2") + # now it is buffered in the ZCT + res.typ = typ + when leakDetector and not hasThreadSupport: + if framePtr != nil and framePtr.prev != nil: + res.filename = framePtr.prev.filename + res.line = framePtr.prev.line + res.refcount = rcIncrement # refcount is 1 + sysAssert(isAllocatedPtr(gch.region, res), "newObj: 3") + when logGC: writeCell("new cell", res) + gcTrace(res, csAllocated) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator + result = cellToUsr(res) + zeroMem(result, size) + sysAssert(allocInv(gch.region), "newObjRC1 end") when defined(memProfiler): nimProfile(size) proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} = - setStackTop(gch) let size = addInt(mulInt(len, typ.base.size), GenericSeqSize) result = newObjRC1(typ, size) cast[PGenericSeq](result).len = len cast[PGenericSeq](result).reserved = len + when defined(memProfiler): nimProfile(size) proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer = - acquire(gch) collectCT(gch) var ol = usrToCell(old) sysAssert(ol.typ != nil, "growObj: 1") - sysAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2") + gcAssert(ol.typ.kind in {tyString, tySequence}, "growObj: 2") sysAssert(allocInv(gch.region), "growObj begin") var res = cast[PCell](rawAlloc(gch.region, newsize + sizeof(Cell))) - var elemSize = if ol.typ.kind != tyString: ol.typ.base.size - else: 1 + var elemSize = 1 + if ol.typ.kind != tyString: elemSize = ol.typ.base.size var oldsize = cast[PGenericSeq](old).len*elemSize + GenericSeqSize - - # XXX: This should happen outside - # call user-defined move code - # call user-defined default constructor copyMem(res, ol, oldsize + sizeof(Cell)) zeroMem(cast[pointer](cast[ByteAddress](res)+% oldsize +% sizeof(Cell)), newsize-oldsize) - sysAssert((cast[ByteAddress](res) and (MemAlign-1)) == 0, "growObj: 3") - sysAssert(res.refcount shr rcShift <=% 1, "growObj: 4") - - when false: - if ol.isBitUp(rcZct): - var j = gch.zct.len-1 - var d = gch.zct.d - while j >= 0: - if d[j] == ol: - d[j] = res - break - dec(j) - - if ol.isBitUp(rcInCycleRoots): - for i in 0 .. = 0: + if d[j] == ol: + d[j] = res + break + dec(j) + rawDealloc(gch.region, ol) + else: + # we split the old refcount in 2 parts. XXX This is still not entirely + # correct if the pointer that receives growObj's result is on the stack. + # A better fix would be to emit the location specific write barrier for + # 'growObj', but this is lots of more work and who knows what new problems + # this would create. + res.refcount = rcIncrement + decRef(ol) + else: + sysAssert(ol.typ != nil, "growObj: 5") + zeroMem(ol, sizeof(Cell)) + when useCellIds: + inc gch.idGenerator + res.id = gch.idGenerator result = cellToUsr(res) sysAssert(allocInv(gch.region), "growObj end") when defined(memProfiler): nimProfile(newsize-oldsize) proc growObj(old: pointer, newsize: int): pointer {.rtl.} = - setStackTop(gch) result = growObj(old, newsize, gch) {.push profiler:off.} + +template takeStartTime(workPackageSize) {.dirty.} = + const workPackage = workPackageSize + when withRealTime: + var steps = workPackage + var t0: Ticks + if gch.maxPause > 0: t0 = getticks() + +template takeTime {.dirty.} = + when withRealTime: dec steps + +template checkTime {.dirty.} = + when withRealTime: + if steps == 0: + steps = workPackage + if gch.maxPause > 0: + let duration = getticks() - t0 + # the GC's measuring is not accurate and needs some cleanup actions + # (stack unmarking), so subtract some short amount of time in + # order to miss deadlines less often: + if duration >= gch.maxPause - 50_000: + return false + # ---------------- cycle collector ------------------------------------------- +proc freeCyclicCell(gch: var GcHeap, c: PCell) = + prepareDealloc(c) + gcTrace(c, csCycFreed) + when logGC: writeCell("cycle collector dealloc cell", c) + when reallyDealloc: + sysAssert(allocInv(gch.region), "free cyclic cell") + rawDealloc(gch.region, c) + else: + gcAssert(c.typ != nil, "freeCyclicCell") + zeroMem(c, sizeof(Cell)) + +proc sweep(gch: var GcHeap) = + # XXX make this incremental! + for x in allObjects(gch.region): + if isCell(x): + # cast to PCell is correct here: + var c = cast[PCell](x) + if c.color == rcWhite: freeCyclicCell(gch, c) + else: c.setColor(rcWhite) + +proc markS(gch: var GcHeap, c: PCell) = + if x.color != rcGrey: + x.setColor(rcGrey) + add(gch.greyStack, x) + #forAllChildren(c, waMarkGrey) + #x.setColor(rcBlack) + +proc markIncrementally(gch: var GcHeap): bool = + var L = addr(gch.greyStack.len) + takeStartTime(100) + while L[] > 0: + var c = gch.greyStack.d[0] + sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr") + gch.greyStack.d[0] = gch.greyStack.d[L[] - 1] + dec(L[]) + takeTime() + if c.color == clGrey: + forAllChildren(c, waMarkGrey) + c.setColor(clBlack) + checkTime() + result = true + +proc markGlobals(gch: var GcHeap) = + for i in 0 .. < globalMarkersLen: globalMarkers[i]() + +proc markLocals(gch: var GcHeap) = + var d = gch.decStack.d + for i in 0..=% rcIncrement, "doOperation 2") + #c.refcount = c.refcount -% rcIncrement + when logGC: writeCell("decref (from doOperation)", c) + decRef(c) + #if c.refcount <% rcIncrement: addZCT(gch.zct, c) + of waMarkGlobal: + when hasThreadSupport: + # could point to a cell which we don't own and don't want to touch/trace + if isAllocatedPtr(gch.region, c): + markS(gch, c) + else: + markS(gch, c) + of waMarkGrey: + if x.color != rcGrey: + c.setColor(rcGrey) + add(gch.greyStack, c) + #of waDebug: debugGraph(c) proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} = doOperation(d, WalkOp(op)) -type - RecursionType = enum - FromChildren, - FromRoot -{.deprecated: [TRecursionType: RecursionType].} +proc collectZCT(gch: var GcHeap): bool {.benign.} -proc collectZCT(gch: var GcHeap): bool - -template pseudoRecursion(typ: RecursionType, body: stmt): stmt = - discard - -proc trimCycleRoots(gch: var GcHeap, startIdx = gch.cycleRootsTrimIdx) = - var i = startIdx - while i < gch.cycleRoots.len: - if gch.cycleRoots.d[i].color != rcCycleCandidate: - gch.cycleRoots.trimAt i - else: - inc i - - gch.cycleRootsTrimIdx = gch.cycleRoots.len - -# we now use a much simpler and non-recursive algorithm for cycle removal -proc collectCycles(gch: var GcHeap) = - if gch.cycleRoots.len == 0: return - gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, gch.cycleRoots.len) - - when CollectCyclesStats: - let l0 = gch.cycleRoots.len - let tStart = getTicks() - - var - decrefs = 0 - increfs = 0 - collected = 0 - maybedeads = 0 - - template ignoreObject(c: PCell): expr = - # This controls which objects will be ignored in the mark and scan stages - (when MarkingSkipsAcyclicObjects: not canbeCycleRoot(c) else: false) - # not canbeCycleRoot(c) - # false - # c.isBitUp(rcHasStackRef) - - template earlyMarkAliveRec(cell) = - let startLen = gch.tempStack.len - cell.setColor rcAlive - cell.forAllChildren waPush - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if c.color != rcAlive: - c.setColor rcAlive - c.forAllChildren waPush - - template earlyMarkAlive(stackRoots) = - # This marks all objects reachable from the stack as alive before any - # of the other stages is executed. Such objects cannot be garbage and - # they don't need to participate in the recursive decref/incref. - for i in 0 .. =% rcIncrement, "recursive dec ref") - dec c.refcount, rcIncrement - inc decrefs - if c.color != rcDecRefApplied: - c.setColor rcDecRefApplied - c.forAllChildren waPush - - template markRoots(roots) = - var i = 0 - while i < roots.len: - if roots.d[i].color == rcCycleCandidate: - recursiveDecRef(roots.d[i]) - inc i - else: - roots.trimAt i - - markRoots(gch.cycleRoots) - - when CollectCyclesStats: - let tAfterMark = getTicks() - c_printf "COLLECT CYCLES %d: %d/%d\n", gcCollectionIdx, gch.cycleRoots.len, l0 - - template recursiveMarkAlive(cell) = - let startLen = gch.tempStack.len - cell.setColor rcAlive - cell.forAllChildren waPush - - while startLen != gch.tempStack.len: - dec gch.tempStack.len - var c = gch.tempStack.d[gch.tempStack.len] - if ignoreObject(c): continue - inc c.refcount, rcIncrement - inc increfs - - if c.color != rcAlive: - c.setColor rcAlive - c.forAllChildren waPush - - template scanRoots(roots) = - for i in 0 .. =% rcIncrement: - recursiveMarkAlive(c) - else: - # note that this is not necessarily the ultimate - # destiny of the object. we may still mark it alive - # later if we encounter another node from where it's - # reachable. - c.setColor rcMaybeDead - inc maybedeads - c.forAllChildren waPush - - scanRoots(gch.cycleRoots) - - when CollectCyclesStats: - let tAfterScan = getTicks() - - template collectDead(roots) = - for i in 0 .. 0: repeat - -var gcDebugging* = false - -var seqdbg* : proc (s: PGenericSeq) {.cdecl.} +proc collectCycles(gch: var GcHeap): bool = + # ensure the ZCT 'color' is not used: + while gch.zct.len > 0: discard collectZCT(gch) + markGlobals(gch) + markLocals(gch) + if markIncrementally(gch): + gch.phase = Phase.Sweeping + sweep(gch) + gch.phase = Phase.None + result = true + else: + gch.phase = Phase.Marking proc gcMark(gch: var GcHeap, p: pointer) {.inline.} = # the addresses are not as cells on the stack, so turn them to cells: @@ -1005,235 +699,33 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} = var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell)) if objStart != nil: # mark the cell: - if objStart.color != rcReallyDead: - if gcDebugging: - # writeCell("marking ", objStart) - discard - else: - inc objStart.refcount, rcIncrement - gch.decStack.add objStart - else: - # With incremental clean-up, objects spend some time - # in various lists before being deallocated. - # We just found a reference on the stack to an object, - # which we have previously labeled as unreachable. - # This is either a bug in the GC or a pure accidental - # coincidence due to the conservative stack marking. - when debugGC: - # writeCell("marking dead object", objStart) - discard - when false: - if isAllocatedPtr(gch.region, cell): - sysAssert false, "allocated pointer but not interior?" - # mark the cell: - inc cell.refcount, rcIncrement - add(gch.decStack, cell) + objStart.refcount = objStart.refcount +% rcIncrement + add(gch.decStack, objStart) sysAssert(allocInv(gch.region), "gcMark end") -proc markThreadStacks(gch: var GcHeap) = - when hasThreadSupport and hasSharedHeap: - {.error: "not fully implemented".} - var it = threadList - while it != nil: - # mark registers: - for i in 0 .. high(it.registers): gcMark(gch, it.registers[i]) - var sp = cast[ByteAddress](it.stackBottom) - var max = cast[ByteAddress](it.stackTop) - # XXX stack direction? - # XXX unroll this loop: - while sp <=% max: - gcMark(gch, cast[PPointer](sp)[]) - sp = sp +% sizeof(pointer) - it = it.next +include gc_common -# ----------------- stack management -------------------------------------- -# inspired from Smart Eiffel - -when defined(sparc): - const stackIncreases = false -elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or - defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820): - const stackIncreases = true -else: - const stackIncreases = false - -when not defined(useNimRtl): - {.push stack_trace: off.} - proc setStackBottom(theStackBottom: pointer) = - #c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom) - # the first init must be the one that defines the stack bottom: - if gch.stackBottom == nil: gch.stackBottom = theStackBottom - else: - var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2 - var b = cast[ByteAddress](gch.stackBottom) - #c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom) - when stackIncreases: - gch.stackBottom = cast[pointer](min(a, b)) - else: - gch.stackBottom = cast[pointer](max(a, b)) - {.pop.} - -proc stackSize(): int {.noinline.} = - var stackTop {.volatile.}: pointer - result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom)) - -var - jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int - # a little hack to get the size of a JmpBuf in the generated C code - # in a platform independent way - -when defined(sparc): # For SPARC architecture. - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var b = cast[ByteAddress](gch.stackBottom) - var a = cast[ByteAddress](stackTop) - var x = cast[ByteAddress](p) - result = a <=% x and x <=% b - - proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} = - when defined(sparcv9): - asm """"flushw \n" """ - else: - asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """ - - var - max = gch.stackBottom - sp: PPointer - stackTop: array[0..1, pointer] - sp = addr(stackTop[0]) - # Addresses decrease as the stack grows. - while sp <= max: - gcMark(gch, sp[]) - sp = cast[PPointer](cast[ByteAddress](sp) +% sizeof(pointer)) - -elif defined(ELATE): - {.error: "stack marking code is to be written for this architecture".} - -elif stackIncreases: - # --------------------------------------------------------------------------- - # Generic code for architectures where addresses increase as the stack grows. - # --------------------------------------------------------------------------- - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var a = cast[ByteAddress](gch.stackBottom) - var b = cast[ByteAddress](stackTop) - var x = cast[ByteAddress](p) - result = a <=% x and x <=% b - - proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} = - var registers: C_JmpBuf - if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - var max = cast[ByteAddress](gch.stackBottom) - var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer) - # sp will traverse the JMP_BUF as well (jmp_buf size is added, - # otherwise sp would be below the registers structure). - while sp >=% max: - gcMark(gch, cast[PPointer](sp)[]) - sp = sp -% sizeof(pointer) - -else: - # --------------------------------------------------------------------------- - # Generic code for architectures where addresses decrease as the stack grows. - # --------------------------------------------------------------------------- - proc isOnStack(p: pointer): bool = - var stackTop {.volatile.}: pointer - stackTop = addr(stackTop) - var b = cast[ByteAddress](gch.stackBottom) - var a = cast[ByteAddress](stackTop) - var x = cast[ByteAddress](p) - result = a <=% x and x <=% b - - proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} = - # We use a jmp_buf buffer that is in the C stack. - # Used to traverse the stack and registers assuming - # that 'setjmp' will save registers in the C stack. - type PStackSlice = ptr array [0..7, pointer] - var registers: C_JmpBuf - if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. - when MinimumStackMarking: - # mark the registers - var jmpbufPtr = cast[ByteAddress](addr(registers)) - var jmpbufEnd = jmpbufPtr +% jmpbufSize - - while jmpbufPtr <=% jmpbufEnd: - gcMark(gch, cast[PPointer](jmpbufPtr)[]) - jmpbufPtr = jmpbufPtr +% sizeof(pointer) - - var sp = cast[ByteAddress](gch.stackTop) - else: - var sp = cast[ByteAddress](addr(registers)) - # mark the user stack - var max = cast[ByteAddress](gch.stackBottom) - # loop unrolled: - while sp <% max - 8*sizeof(pointer): - gcMark(gch, cast[PStackSlice](sp)[0]) - gcMark(gch, cast[PStackSlice](sp)[1]) - gcMark(gch, cast[PStackSlice](sp)[2]) - gcMark(gch, cast[PStackSlice](sp)[3]) - gcMark(gch, cast[PStackSlice](sp)[4]) - gcMark(gch, cast[PStackSlice](sp)[5]) - gcMark(gch, cast[PStackSlice](sp)[6]) - gcMark(gch, cast[PStackSlice](sp)[7]) - sp = sp +% sizeof(pointer)*8 - # last few entries: - while sp <=% max: - gcMark(gch, cast[PPointer](sp)[]) - sp = sp +% sizeof(pointer) - -# ---------------------------------------------------------------------------- -# end of non-portable code -# ---------------------------------------------------------------------------- - -proc releaseCell(gch: var GcHeap, cell: PCell) = - if cell.color != rcReallyDead: - prepareDealloc(cell) - cell.setColor rcReallyDead - - let l1 = gch.tempStack.len - cell.forAllChildren waPush - let l2 = gch.tempStack.len - for i in l1 .. 0: t0 = getticks() + takeStartTime(100) while L[] > 0: var c = gch.zct.d[0] - sysAssert c.isBitUp(rcZct), "collectZCT: rcZct missing!" - sysAssert(isAllocatedPtr(gch.region, c), "collectZCT: isAllocatedPtr") - + sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr") # remove from ZCT: - c.clearBit(rcZct) + gcAssert((c.refcount and ZctFlag) == ZctFlag, "collectZCT") + + c.refcount = c.refcount and not ZctFlag gch.zct.d[0] = gch.zct.d[L[] - 1] dec(L[]) - when withRealtime: dec steps + takeTime() if c.refcount <% rcIncrement: # It may have a RC > 0, if it is in the hardware stack or # it has not been removed yet from the ZCT. This is because @@ -1241,92 +733,78 @@ proc collectZCT(gch: var GcHeap): bool = # as this might be too slow. # In any case, it should be removed from the ZCT. But not # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!** - if c.color == rcRetiredBuffer: - if c.isBitDown(rcInCycleRoots): - freeCell(gch, c) + when logGC: writeCell("zct dealloc cell", c) + gcTrace(c, csZctFreed) + # We are about to free the object, call the finalizer BEFORE its + # children are deleted as well, because otherwise the finalizer may + # access invalid memory. This is done by prepareDealloc(): + prepareDealloc(c) + forAllChildren(c, waZctDecRef) + when reallyDealloc: + sysAssert(allocInv(gch.region), "collectZCT: rawDealloc") + rawDealloc(gch.region, c) else: - # if c.color == rcReallyDead: writeCell("ReallyDead in ZCT?", c) - releaseCell(gch, c) - when withRealtime: - if steps == 0: - steps = workPackage - if gch.maxPause > 0: - let duration = getticks() - t0 - # the GC's measuring is not accurate and needs some cleanup actions - # (stack unmarking), so subtract some short amount of time in to - # order to miss deadlines less often: - if duration >= gch.maxPause - 50_000: - return false + sysAssert(c.typ != nil, "collectZCT 2") + zeroMem(c, sizeof(Cell)) + checkTime() result = true - gch.trimCycleRoots - #deInit(gch.zct) - #init(gch.zct) proc unmarkStackAndRegisters(gch: var GcHeap) = var d = gch.decStack.d - for i in 0 .. = gch.cycleThreshold or alwaysCycleGC: - collectCycles(gch) - sysAssert gch.zct.len == 0, "zct is not null after collect cycles" - inc(gch.stat.cycleCollections) - gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * - CycleIncrease) - gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold) + if collectCycles(gch): + inc(gch.stat.cycleCollections) + gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() * + CycleIncrease) + gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold) unmarkStackAndRegisters(gch) sysAssert(allocInv(gch.region), "collectCT: end") - when withRealtime: + when withRealTime: let duration = getticks() - t0 gch.stat.maxPause = max(gch.stat.maxPause, duration) when defined(reportMissedDeadlines): if gch.maxPause > 0 and duration > gch.maxPause: c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration) +when defined(nimCoroutines): + proc currentStackSizes(): int = + for stack in items(gch.stack): + result = result + stackSize(stack.starts, stack.pos) + proc collectCT(gch: var GcHeap) = - if (gch.zct.len >= ZctThreshold or (cycleGC and + # stackMarkCosts prevents some pathological behaviour: Stack marking + # becomes more expensive with large stacks and large stacks mean that + # cells with RC=0 are more likely to be kept alive by the stack. + when defined(nimCoroutines): + let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold) + else: + let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold) + if (gch.zct.len >= stackMarkCosts or (cycleGC and getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and gch.recGcLock == 0: collectCTBody(gch) -when withRealtime: +when withRealTime: proc toNano(x: int): Nanos {.inline.} = result = x * 1000 @@ -1334,13 +812,11 @@ when withRealtime: gch.maxPause = MaxPauseInUs.toNano proc GC_step(gch: var GcHeap, us: int, strongAdvice: bool) = - acquire(gch) gch.maxPause = us.toNano if (gch.zct.len >= ZctThreshold or (cycleGC and getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) or strongAdvice: collectCTBody(gch) - release(gch) proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice) @@ -1358,11 +834,7 @@ when not defined(useNimRtl): dec(gch.recGcLock) proc GC_setStrategy(strategy: GC_Strategy) = - case strategy - of gcThroughput: discard - of gcResponsiveness: discard - of gcOptimizeSpace: discard - of gcOptimizeTime: discard + discard proc GC_enableMarkAndSweep() = gch.cycleThreshold = InitialCycleThreshold @@ -1372,13 +844,10 @@ when not defined(useNimRtl): # set to the max value to suppress the cycle detector proc GC_fullCollect() = - setStackTop(gch) - acquire(gch) var oldThreshold = gch.cycleThreshold gch.cycleThreshold = 0 # forces cycle collection collectCT(gch) gch.cycleThreshold = oldThreshold - release(gch) proc GC_getStatistics(): string = GC_disable() @@ -1390,9 +859,13 @@ when not defined(useNimRtl): "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" & "[GC] zct capacity: " & $gch.zct.cap & "\n" & "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" & - "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" & "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) - when traceGC: writeLeakage(true) + when defined(nimCoroutines): + result = result & "[GC] number of stacks: " & $gch.stack.len & "\n" + for stack in items(gch.stack): + result = result & "[GC] stack " & stack.starts.repr & "[GC] max stack size " & $stack.maxStackSize & "\n" + else: + result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" GC_enable() {.pop.}