Merge branch 'prim-gc' into devel

2026-07-17 22:41:14 +00:00 · 2016-05-22 15:01:36 +02:00
parent ec7d556c20 dcf830bba9
commit 2ecf4633f4
6 changed files with 549 additions and 186 deletions
--- a/compiler/ccgstmts.nim
+++ b/compiler/ccgstmts.nim
@@ -16,7 +16,7 @@ const
    # above X strings a hash-switch for strings is generated

 proc registerGcRoot(p: BProc, v: PSym) =
-  if gSelectedGC in {gcMarkAndSweep, gcGenerational, gcV2} and
+  if gSelectedGC in {gcMarkAndSweep, gcGenerational, gcV2, gcRefc} and
      containsGarbageCollectedRef(v.loc.t):
    # we register a specialized marked proc here; this has the advantage
    # that it works out of the box for thread local storage then :-)
--- a/lib/system/alloc.nim
+++ b/lib/system/alloc.nim
@@ -27,15 +27,14 @@ const

 type
  PTrunk = ptr Trunk
-  Trunk {.final.} = object
+  Trunk = object
    next: PTrunk         # all nodes are connected with this pointer
    key: int             # start address at bit 0
    bits: array[0..IntsPerTrunk-1, int] # a bit vector

  TrunkBuckets = array[0..255, PTrunk]
-  IntSet {.final.} = object
+  IntSet = object
    data: TrunkBuckets
-{.deprecated: [TIntSet: IntSet, TTrunk: Trunk, TTrunkBuckets: TrunkBuckets].}

 type
  AlignType = BiggestFloat
@@ -64,8 +63,6 @@ type
    next, prev: PBigChunk    # chunks of the same (or bigger) size
    align: int
    data: AlignType      # start of usable memory
-{.deprecated: [TAlignType: AlignType, TFreeCell: FreeCell, TBaseChunk: BaseChunk,
-              TBigChunk: BigChunk, TSmallChunk: SmallChunk].}

 template smallChunkOverhead(): expr = sizeof(SmallChunk)-sizeof(AlignType)
 template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)
@@ -79,18 +76,18 @@ template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)

 type
  PLLChunk = ptr LLChunk
-  LLChunk {.pure.} = object ## *low-level* chunk
+  LLChunk = object ## *low-level* chunk
    size: int                # remaining size
    acc: int                 # accumulator
    next: PLLChunk           # next low-level chunk; only needed for dealloc

  PAvlNode = ptr AvlNode
-  AvlNode {.pure, final.} = object
+  AvlNode = object
    link: array[0..1, PAvlNode] # Left (0) and right (1) links
    key, upperBound: int
    level: int

-  MemRegion {.final, pure.} = object
+  MemRegion = object
    minLargeObj, maxLargeObj: int
    freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
    llmem: PLLChunk
@@ -99,6 +96,7 @@ type
    freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access
    chunkStarts: IntSet
    root, deleted, last, freeAvlNodes: PAvlNode
+    locked: bool # if locked, we cannot free pages.
 {.deprecated: [TLLChunk: LLChunk, TAvlNode: AvlNode, TMemRegion: MemRegion].}

 # shared:
@@ -234,7 +232,8 @@ proc isSmallChunk(c: PChunk): bool {.inline.} =
 proc chunkUnused(c: PChunk): bool {.inline.} =
  result = not c.used

-iterator allObjects(m: MemRegion): pointer {.inline.} =
+iterator allObjects(m: var MemRegion): pointer {.inline.} =
+  m.locked = true
  for s in elements(m.chunkStarts):
    # we need to check here again as it could have been modified:
    if s in m.chunkStarts:
@@ -252,6 +251,7 @@ iterator allObjects(m: MemRegion): pointer {.inline.} =
        else:
          let c = cast[PBigChunk](c)
          yield addr(c.data)
+  m.locked = false

 proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
                      magic: "Plugin", compileTime.}
@@ -385,7 +385,7 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
          excl(a.chunkStarts, pageIndex(c))
          c = cast[PBigChunk](le)

-  if c.size < ChunkOsReturn or doNotUnmap:
+  if c.size < ChunkOsReturn or doNotUnmap or a.locked:
    incl(a, a.chunkStarts, pageIndex(c))
    updatePrevSize(a, c, c.size)
    listAdd(a.freeChunksList, c)
@@ -442,26 +442,29 @@ proc getSmallChunk(a: var MemRegion): PSmallChunk =
 # -----------------------------------------------------------------------------
 proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}

-proc allocInv(a: MemRegion): bool =
-  ## checks some (not all yet) invariants of the allocator's data structures.
-  for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
-    var c = a.freeSmallChunks[s]
-    while not (c == nil):
-      if c.next == c:
-        echo "[SYSASSERT] c.next == c"
-        return false
-      if not (c.size == s * MemAlign):
-        echo "[SYSASSERT] c.size != s * MemAlign"
-        return false
-      var it = c.freeList
-      while not (it == nil):
-        if not (it.zeroField == 0):
-          echo "[SYSASSERT] it.zeroField != 0"
-          c_printf("%ld %p\n", it.zeroField, it)
+when true:
+  template allocInv(a: MemRegion): bool = true
+else:
+  proc allocInv(a: MemRegion): bool =
+    ## checks some (not all yet) invariants of the allocator's data structures.
+    for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
+      var c = a.freeSmallChunks[s]
+      while not (c == nil):
+        if c.next == c:
+          echo "[SYSASSERT] c.next == c"
          return false
-        it = it.next
-      c = c.next
-  result = true
+        if not (c.size == s * MemAlign):
+          echo "[SYSASSERT] c.size != s * MemAlign"
+          return false
+        var it = c.freeList
+        while not (it == nil):
+          if not (it.zeroField == 0):
+            echo "[SYSASSERT] it.zeroField != 0"
+            c_printf("%ld %p\n", it.zeroField, it)
+            return false
+          it = it.next
+        c = c.next
+    result = true

 proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
  sysAssert(allocInv(a), "rawAlloc: begin")
--- a/lib/system/gc.nim
+++ b/lib/system/gc.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nim's Runtime Library
-#        (c) Copyright 2015 Andreas Rumpf
+#        (c) Copyright 2016 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -9,13 +9,8 @@

 #            Garbage Collector
 #
-# The basic algorithm is *Deferred Reference Counting* with cycle detection.
-# This is achieved by combining a Deutsch-Bobrow garbage collector
-# together with Christoper's partial mark-sweep garbage collector.
-#
-# Special care has been taken to avoid recursion as far as possible to avoid
-# stack overflows when traversing deep datastructures. It is well-suited
-# for soft real time applications (like games).
+# Refcounting + Mark&Sweep. Complex algorithms avoided.
+# Been there, done that, didn't work.

 when defined(nimCoroutines):
  import arch
@@ -30,7 +25,7 @@ const
                      # this seems to be a good value
  withRealTime = defined(useRealtimeGC)
  useMarkForDebug = defined(gcGenerational)
-  useBackupGc = false                     # use a simple M&S GC to collect
+  useBackupGc = true                      # use a simple M&S GC to collect
                                          # cycles instead of the complex
                                          # algorithm

@@ -55,8 +50,7 @@ type
  WalkOp = enum
    waMarkGlobal,    # part of the backup/debug mark&sweep
    waMarkPrecise,   # part of the backup/debug mark&sweep
-    waZctDecRef, waPush, waCycleDecRef, waMarkGray, waScan, waScanBlack,
-    waCollectWhite #, waDebug
+    waZctDecRef, waPush

  Finalizer {.compilerproc.} = proc (self: pointer) {.nimcall, benign.}
    # A ref type can have a finalizer that is called before the object's
@@ -87,7 +81,6 @@ type
      idGenerator: int
    zct: CellSeq             # the zero count table
    decStack: CellSeq        # cells in the stack that are to decref again
-    cycleRoots: CellSet
    tempStack: CellSeq       # temporary stack for recursion elimination
    recGcLock: int           # prevent recursion via finalizers; no thread lock
    when withRealTime:
@@ -136,9 +129,6 @@ proc usrToCell(usr: pointer): PCell {.inline.} =
  # convert pointer to userdata to object (=pointer to refcount)
  result = cast[PCell](cast[ByteAddress](usr)-%ByteAddress(sizeof(Cell)))

-proc canBeCycleRoot(c: PCell): bool {.inline.} =
-  result = ntfAcyclic notin c.typ.flags
-
 proc extGetCellType(c: pointer): PNimType {.compilerproc.} =
  # used for code generation concerning debugging
  result = usrToCell(c).typ
@@ -200,14 +190,16 @@ proc prepareDealloc(cell: PCell) =
    (cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
    dec(gch.recGcLock)

+template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) =
+  when false:
+    for i in 0..gch.decStack.len-1:
+      if gch.decStack.d[i] == c:
+        sysAssert(false, msg)
+
 proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
  # we MUST access gch as a global here, because this crosses DLL boundaries!
  when hasThreadSupport and hasSharedHeap:
    acquireSys(HeapLock)
-  when cycleGC:
-    if c.color != rcPurple:
-      c.setColor(rcPurple)
-      incl(gch.cycleRoots, c)
  when hasThreadSupport and hasSharedHeap:
    releaseSys(HeapLock)

@@ -224,19 +216,12 @@ proc decRef(c: PCell) {.inline.} =
  gcAssert(c.refcount >=% rcIncrement, "decRef")
  if --c.refcount:
    rtlAddZCT(c)
-  elif canbeCycleRoot(c):
-    # unfortunately this is necessary here too, because a cycle might just
-    # have been broken up and we could recycle it.
-    rtlAddCycleRoot(c)
-    #writeCell("decRef", c)

 proc incRef(c: PCell) {.inline.} =
  gcAssert(isAllocatedPtr(gch.region, c), "incRef: interiorPtr")
  c.refcount = c.refcount +% rcIncrement
  # and not colorMask
  #writeCell("incRef", c)
-  if canbeCycleRoot(c):
-    rtlAddCycleRoot(c)

 proc nimGCref(p: pointer) {.compilerProc, inline.} = incRef(usrToCell(p))
 proc nimGCunref(p: pointer) {.compilerProc, inline.} = decRef(usrToCell(p))
@@ -306,7 +291,6 @@ proc initGC() =
    # init the rt
    init(gch.zct)
    init(gch.tempStack)
-    init(gch.cycleRoots)
    init(gch.decStack)
    when useMarkForDebug or useBackupGc:
      init(gch.marked)
@@ -563,7 +547,7 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
            d[j] = res
            break
          dec(j)
-      if canbeCycleRoot(ol): excl(gch.cycleRoots, ol)
+      beforeDealloc(gch, ol, "growObj stack trash")
      rawDealloc(gch.region, ol)
    else:
      # we split the old refcount in 2 parts. XXX This is still not entirely
@@ -597,54 +581,12 @@ proc freeCyclicCell(gch: var GcHeap, c: PCell) =
  when logGC: writeCell("cycle collector dealloc cell", c)
  when reallyDealloc:
    sysAssert(allocInv(gch.region), "free cyclic cell")
+    beforeDealloc(gch, c, "freeCyclicCell: stack trash")
    rawDealloc(gch.region, c)
  else:
    gcAssert(c.typ != nil, "freeCyclicCell")
    zeroMem(c, sizeof(Cell))

-proc markGray(s: PCell) =
-  if s.color != rcGray:
-    setColor(s, rcGray)
-    forAllChildren(s, waMarkGray)
-
-proc scanBlack(s: PCell) =
-  s.setColor(rcBlack)
-  forAllChildren(s, waScanBlack)
-
-proc scan(s: PCell) =
-  if s.color == rcGray:
-    if s.refcount >=% rcIncrement:
-      scanBlack(s)
-    else:
-      s.setColor(rcWhite)
-      forAllChildren(s, waScan)
-
-proc collectWhite(s: PCell) =
-  # This is a hacky way to deal with the following problem (bug #1796)
-  # Consider this content in cycleRoots:
-  #   x -> a; y -> a  where 'a' is an acyclic object so not included in
-  # cycleRoots itself. Then 'collectWhite' used to free 'a' twice. The
-  # 'isAllocatedPtr' check prevents this. This also means we do not need
-  # to query 's notin gch.cycleRoots' at all.
-  if isAllocatedPtr(gch.region, s) and s.color == rcWhite:
-    s.setColor(rcBlack)
-    forAllChildren(s, waCollectWhite)
-    freeCyclicCell(gch, s)
-
-proc markRoots(gch: var GcHeap) =
-  var tabSize = 0
-  for s in elements(gch.cycleRoots):
-    #writeCell("markRoot", s)
-    inc tabSize
-    if s.color == rcPurple and s.refcount >=% rcIncrement:
-      markGray(s)
-    else:
-      excl(gch.cycleRoots, s)
-      # (s.color == rcBlack and rc == 0) as 1 condition:
-      if s.refcount == 0:
-        freeCyclicCell(gch, s)
-  gch.stat.cycleTableSize = max(gch.stat.cycleTableSize, tabSize)
-
 when useBackupGc:
  proc sweep(gch: var GcHeap) =
    for x in allObjects(gch.region):
@@ -667,16 +609,6 @@ when useMarkForDebug or useBackupGc:
  proc markGlobals(gch: var GcHeap) =
    for i in 0 .. < globalMarkersLen: globalMarkers[i]()

-  proc stackMarkS(gch: var GcHeap, p: pointer) {.inline.} =
-    # the addresses are not as cells on the stack, so turn them to cells:
-    var cell = usrToCell(p)
-    var c = cast[ByteAddress](cell)
-    if c >% PageSize:
-      # fast check: does it look like a cell?
-      var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
-      if objStart != nil:
-        markS(gch, objStart)
-
 when logGC:
  var
    cycleCheckA: array[100, PCell]
@@ -717,19 +649,6 @@ proc doOperation(p: pointer, op: WalkOp) =
    #if c.refcount <% rcIncrement: addZCT(gch.zct, c)
  of waPush:
    add(gch.tempStack, c)
-  of waCycleDecRef:
-    gcAssert(c.refcount >=% rcIncrement, "doOperation 3")
-    c.refcount = c.refcount -% rcIncrement
-  of waMarkGray:
-    gcAssert(c.refcount >=% rcIncrement, "waMarkGray")
-    c.refcount = c.refcount -% rcIncrement
-    markGray(c)
-  of waScan: scan(c)
-  of waScanBlack:
-    c.refcount = c.refcount +% rcIncrement
-    if c.color != rcBlack:
-      scanBlack(c)
-  of waCollectWhite: collectWhite(c)
  of waMarkGlobal:
    when useMarkForDebug or useBackupGc:
      when hasThreadSupport:
@@ -748,14 +667,6 @@ proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =

 proc collectZCT(gch: var GcHeap): bool {.benign.}

-when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var GcHeap) {.noinline, cdecl,
-                                                         benign.}
-
-proc collectRoots(gch: var GcHeap) =
-  for s in elements(gch.cycleRoots):
-    collectWhite(s)
-
 proc collectCycles(gch: var GcHeap) =
  when hasThreadSupport:
    for c in gch.toDispose:
@@ -764,33 +675,12 @@ proc collectCycles(gch: var GcHeap) =
  while gch.zct.len > 0: discard collectZCT(gch)
  when useBackupGc:
    cellsetReset(gch.marked)
-    markStackAndRegistersForSweep(gch)
+    var d = gch.decStack.d
+    for i in 0..gch.decStack.len-1:
+      sysAssert isAllocatedPtr(gch.region, d[i]), "collectCycles"
+      markS(gch, d[i])
    markGlobals(gch)
    sweep(gch)
-  else:
-    markRoots(gch)
-    # scanRoots:
-    for s in elements(gch.cycleRoots): scan(s)
-    collectRoots(gch)
-
-    cellsetReset(gch.cycleRoots)
-  # alive cycles need to be kept in 'cycleRoots' if they are referenced
-  # from the stack; otherwise the write barrier will add the cycle root again
-  # anyway:
-  when false:
-    var d = gch.decStack.d
-    var cycleRootsLen = 0
-    for i in 0..gch.decStack.len-1:
-      var c = d[i]
-      gcAssert isAllocatedPtr(gch.region, c), "addBackStackRoots"
-      gcAssert c.refcount >=% rcIncrement, "addBackStackRoots: dead cell"
-      if canBeCycleRoot(c):
-        #if c notin gch.cycleRoots:
-        inc cycleRootsLen
-        incl(gch.cycleRoots, c)
-      gcAssert c.typ != nil, "addBackStackRoots 2"
-    if cycleRootsLen != 0:
-      cfprintf(cstdout, "cycle roots: %ld\n", cycleRootsLen)

 proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
  # the addresses are not as cells on the stack, so turn them to cells:
@@ -812,31 +702,11 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
        add(gch.decStack, cell)
  sysAssert(allocInv(gch.region), "gcMark end")

-proc markThreadStacks(gch: var GcHeap) =
-  when hasThreadSupport and hasSharedHeap:
-    {.error: "not fully implemented".}
-    var it = threadList
-    while it != nil:
-      # mark registers:
-      for i in 0 .. high(it.registers): gcMark(gch, it.registers[i])
-      var sp = cast[ByteAddress](it.stackBottom)
-      var max = cast[ByteAddress](it.stackTop)
-      # XXX stack direction?
-      # XXX unroll this loop:
-      while sp <=% max:
-        gcMark(gch, cast[ppointer](sp)[])
-        sp = sp +% sizeof(pointer)
-      it = it.next
-
 include gc_common

 proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
  forEachStackSlot(gch, gcMark)

-when useMarkForDebug or useBackupGc:
-  proc markStackAndRegistersForSweep(gch: var GcHeap) =
-    forEachStackSlot(gch, stackMarkS)
-
 proc collectZCT(gch: var GcHeap): bool =
  # Note: Freeing may add child objects to the ZCT! So essentially we do
  # deep freeing, which is bad for incremental operation. In order to
@@ -866,8 +736,6 @@ proc collectZCT(gch: var GcHeap): bool =
      # as this might be too slow.
      # In any case, it should be removed from the ZCT. But not
      # freed. **KEEP THIS IN MIND WHEN MAKING THIS INCREMENTAL!**
-      when cycleGC:
-        if canbeCycleRoot(c): excl(gch.cycleRoots, c)
      when logGC: writeCell("zct dealloc cell", c)
      gcTrace(c, csZctFreed)
      # We are about to free the object, call the finalizer BEFORE its
@@ -877,6 +745,7 @@ proc collectZCT(gch: var GcHeap): bool =
      forAllChildren(c, waZctDecRef)
      when reallyDealloc:
        sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
+        beforeDealloc(gch, c, "collectZCT: stack trash")
        rawDealloc(gch.region, c)
      else:
        sysAssert(c.typ != nil, "collectZCT 2")
@@ -915,7 +784,6 @@ proc collectCTBody(gch: var GcHeap) =
  sysAssert(gch.decStack.len == 0, "collectCT")
  prepareForInteriorPointerChecking(gch.region)
  markStackAndRegisters(gch)
-  markThreadStacks(gch)
  gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
  inc(gch.stat.stackScans)
  if collectZCT(gch):
@@ -937,11 +805,6 @@ proc collectCTBody(gch: var GcHeap) =
      if gch.maxPause > 0 and duration > gch.maxPause:
        c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)

-when useMarkForDebug or useBackupGc:
-  proc markForDebug(gch: var GcHeap) =
-    markStackAndRegistersForSweep(gch)
-    markGlobals(gch)
-
 when defined(nimCoroutines):
  proc currentStackSizes(): int =
    for stack in items(gch.stack):
@@ -1035,7 +898,7 @@ when not defined(useNimRtl):
             "[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
             "[GC] zct capacity: " & $gch.zct.cap & "\n" &
             "[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
-             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
+             "[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000) & "\n"
    when defined(nimCoroutines):
      result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
      for stack in items(gch.stack):
--- a/tests/gc/thavlak.nim
+++ b/tests/gc/thavlak.nim
@@ -0,0 +1,457 @@
+discard """
+  output: '''Welcome to LoopTesterApp, Nim edition
+Constructing Simple CFG...
+15000 dummy loops
+Constructing CFG...
+Performing Loop Recognition
+1 Iteration
+Another 50 iterations...
+..................................................
+Found 1 loops (including artificial root node) (50)'''
+"""
+
+# bug #3184
+
+import tables
+import sequtils
+import sets
+
+type
+  BasicBlock = object
+    inEdges: seq[ref BasicBlock]
+    outEdges: seq[ref BasicBlock]
+    name: int
+
+proc newBasicBlock(name: int): ref BasicBlock =
+  new(result)
+  result.inEdges = newSeq[ref BasicBlock]()
+  result.outEdges = newSeq[ref BasicBlock]()
+  result.name = name
+
+proc hash(x: ref BasicBlock): int {.inline.} =
+  result = x.name
+
+type
+  BasicBlockEdge = object
+    fr: ref BasicBlock
+    to: ref BasicBlock
+
+  Cfg = object
+    basicBlockMap: Table[int, ref BasicBlock]
+    edgeList: seq[BasicBlockEdge]
+    startNode: ref BasicBlock
+
+proc newCfg(): Cfg =
+  result.basicBlockMap = initTable[int, ref BasicBlock]()
+  result.edgeList = newSeq[BasicBlockEdge]()
+
+proc createNode(self: var Cfg, name: int): ref BasicBlock =
+  result = self.basicBlockMap.getOrDefault(name)
+  if result == nil:
+    result = newBasicBlock(name)
+    self.basicBlockMap.add name, result
+
+  if self.startNode == nil:
+    self.startNode = result
+
+proc addEdge(self: var Cfg, edge: BasicBlockEdge) =
+  self.edgeList.add(edge)
+
+proc getNumNodes(self: Cfg): int =
+  self.basicBlockMap.len
+
+proc newBasicBlockEdge(cfg: var Cfg, fromName: int, toName: int): BasicBlockEdge =
+  result.fr = cfg.createNode(fromName)
+  result.to = cfg.createNode(toName)
+  result.fr.outEdges.add(result.to)
+  result.to.inEdges.add(result.fr)
+  cfg.addEdge(result)
+
+type
+  SimpleLoop = object
+    basicBlocks: seq[ref BasicBlock] # TODO: set here
+    children: seq[ref SimpleLoop] # TODO: set here
+    parent: ref SimpleLoop
+    header: ref BasicBlock
+    isRoot: bool
+    isReducible: bool
+    counter: int
+    nestingLevel: int
+    depthLevel: int
+
+proc newSimpleLoop(): ref SimpleLoop =
+  new(result)
+  result.basicBlocks = newSeq[ref BasicBlock]()
+  result.children = newSeq[ref SimpleLoop]()
+  result.parent = nil
+  result.header = nil
+  result.isRoot = false
+  result.isReducible = true
+  result.counter = 0
+  result.nestingLevel = 0
+  result.depthLevel = 0
+
+proc addNode(self: ref SimpleLoop, bb: ref BasicBlock) =
+  self.basicBlocks.add bb
+
+proc addChildLoop(self: ref SimpleLoop, loop: ref SimpleLoop) =
+  self.children.add loop
+
+proc setParent(self: ref SimpleLoop, parent: ref SimpleLoop) =
+  self.parent = parent
+  self.parent.addChildLoop(self)
+
+proc setHeader(self: ref SimpleLoop, bb: ref BasicBlock) =
+  self.basicBlocks.add(bb)
+  self.header = bb
+
+proc setNestingLevel(self: ref SimpleLoop, level: int) =
+  self.nestingLevel = level
+  if level == 0: self.isRoot = true
+
+var loop_counter: int = 0
+
+type
+  Lsg = object
+    loops: seq[ref SimpleLoop]
+    root: ref SimpleLoop
+
+proc createNewLoop(self: var Lsg): ref SimpleLoop =
+  result = newSimpleLoop()
+  loop_counter += 1
+  result.counter = loop_counter
+
+proc addLoop(self: var Lsg, l: ref SimpleLoop) =
+  self.loops.add l
+
+proc newLsg(): Lsg =
+  result.loops = newSeq[ref SimpleLoop]()
+  result.root = result.createNewLoop()
+  result.root.setNestingLevel(0)
+  result.addLoop(result.root)
+
+proc getNumLoops(self: Lsg): int =
+  self.loops.len
+
+type
+  UnionFindNode = object
+    parent: ref UnionFindNode
+    bb: ref BasicBlock
+    l: ref SimpleLoop
+    dfsNumber: int
+
+proc newUnionFindNode(): ref UnionFindNode =
+  new(result)
+  when false:
+    result.parent = nil
+    result.bb = nil
+    result.l = nil
+    result.dfsNumber = 0
+
+proc initNode(self: ref UnionFindNode, bb: ref BasicBlock, dfsNumber: int) =
+  self.parent = self
+  self.bb = bb
+  self.dfsNumber = dfsNumber
+
+proc findSet(self: ref UnionFindNode): ref UnionFindNode =
+  var nodeList = newSeq[ref UnionFindNode]()
+  result = self
+
+  while result != result.parent:
+    var parent = result.parent
+    if parent != parent.parent: nodeList.add result
+    result = parent
+
+  for iter in nodeList: iter.parent = result.parent
+
+proc union(self: ref UnionFindNode, unionFindNode: ref UnionFindNode) =
+  self.parent = unionFindNode
+
+
+const
+  BB_TOP          = 0 # uninitialized
+  BB_NONHEADER    = 1 # a regular BB
+  BB_REDUCIBLE    = 2 # reducible loop
+  BB_SELF         = 3 # single BB loop
+  BB_IRREDUCIBLE  = 4 # irreducible loop
+  BB_DEAD         = 5 # a dead BB
+  BB_LAST         = 6 # Sentinel
+
+  # # Marker for uninitialized nodes.
+  UNVISITED = -1
+
+  # # Safeguard against pathologic algorithm behavior.
+  MAXNONBACKPREDS = (32 * 1024)
+
+type
+  HavlakLoopFinder = object
+    cfg: Cfg
+    lsg: Lsg
+
+proc newHavlakLoopFinder(cfg: Cfg, lsg: Lsg): HavlakLoopFinder =
+  result.cfg = cfg
+  result.lsg = lsg
+
+proc isAncestor(w: int, v: int, last: seq[int]): bool =
+  w <= v and v <= last[w]
+
+proc dfs(currentNode: ref BasicBlock, nodes: var seq[ref UnionFindNode], number: var Table[ref BasicBlock, int], last: var seq[int], current: int): int =
+  var stack = @[(currentNode, current)]
+  while stack.len > 0:
+    let (currentNode, current) = stack.pop()
+    nodes[current].initNode(currentNode, current)
+    number[currentNode] = current
+
+    result = current
+    for target in currentNode.outEdges:
+      if number[target] == UNVISITED:
+        stack.add((target, result+1))
+        #result = dfs(target, nodes, number, last, result + 1)
+  last[number[currentNode]] = result
+
+proc findLoops(self: var HavlakLoopFinder): int =
+  var startNode = self.cfg.startNode
+  if startNode == nil: return 0
+  var size = self.cfg.getNumNodes
+
+  var nonBackPreds    = newSeq[HashSet[int]]()
+  var backPreds       = newSeq[seq[int]]()
+  var number          = initTable[ref BasicBlock, int]()
+  var header          = newSeq[int](size)
+  var types           = newSeq[int](size)
+  var last            = newSeq[int](size)
+  var nodes           = newSeq[ref UnionFindNode]()
+
+  for i in 1..size:
+    nonBackPreds.add initSet[int](1)
+    backPreds.add newSeq[int]()
+    nodes.add newUnionFindNode()
+
+  # Step a:
+  #   - initialize all nodes as unvisited.
+  #   - depth-first traversal and numbering.
+  #   - unreached BB's are marked as dead.
+  #
+  for v in self.cfg.basicBlockMap.values: number[v] = UNVISITED
+  var res = dfs(startNode, nodes, number, last, 0)
+
+  # Step b:
+  #   - iterate over all nodes.
+  #
+  #   A backedge comes from a descendant in the DFS tree, and non-backedges
+  #   from non-descendants (following Tarjan).
+  #
+  #   - check incoming edges 'v' and add them to either
+  #     - the list of backedges (backPreds) or
+  #     - the list of non-backedges (nonBackPreds)
+  #
+  for w in 0 .. <size:
+    header[w] = 0
+    types[w]  = BB_NONHEADER
+
+    var nodeW = nodes[w].bb
+    if nodeW != nil:
+      for nodeV in nodeW.inEdges:
+        var v = number[nodeV]
+        if v != UNVISITED:
+          if isAncestor(w, v, last):
+            backPreds[w].add v
+          else:
+            nonBackPreds[w].incl v
+    else:
+      types[w] = BB_DEAD
+
+  # Start node is root of all other loops.
+  header[0] = 0
+
+  # Step c:
+  #
+  # The outer loop, unchanged from Tarjan. It does nothing except
+  # for those nodes which are the destinations of backedges.
+  # For a header node w, we chase backward from the sources of the
+  # backedges adding nodes to the set P, representing the body of
+  # the loop headed by w.
+  #
+  # By running through the nodes in reverse of the DFST preorder,
+  # we ensure that inner loop headers will be processed before the
+  # headers for surrounding loops.
+
+  for w in countdown(size - 1, 0):
+    # this is 'P' in Havlak's paper
+    var nodePool = newSeq[ref UnionFindNode]()
+
+    var nodeW = nodes[w].bb
+    if nodeW != nil: # dead BB
+      # Step d:
+      for v in backPreds[w]:
+        if v != w:
+          nodePool.add nodes[v].findSet
+        else:
+          types[w] = BB_SELF
+
+      # Copy nodePool to workList.
+      #
+      var workList = newSeq[ref UnionFindNode]()
+      for x in nodePool: workList.add x
+
+      if nodePool.len != 0: types[w] = BB_REDUCIBLE
+
+      # work the list...
+      #
+      while workList.len > 0:
+        var x = workList[0]
+        workList.del(0)
+
+        # Step e:
+        #
+        # Step e represents the main difference from Tarjan's method.
+        # Chasing upwards from the sources of a node w's backedges. If
+        # there is a node y' that is not a descendant of w, w is marked
+        # the header of an irreducible loop, there is another entry
+        # into this loop that avoids w.
+        #
+
+        # The algorithm has degenerated. Break and
+        # return in this case.
+        #
+        var nonBackSize = nonBackPreds[x.dfsNumber].len
+        if nonBackSize > MAXNONBACKPREDS: return 0
+
+        for iter in nonBackPreds[x.dfsNumber]:
+          var y = nodes[iter]
+          var ydash = y.findSet
+
+          if not isAncestor(w, ydash.dfsNumber, last):
+            types[w] = BB_IRREDUCIBLE
+            nonBackPreds[w].incl ydash.dfsNumber
+          else:
+            if ydash.dfsNumber != w and not nodePool.contains(ydash):
+              workList.add ydash
+              nodePool.add ydash
+
+      # Collapse/Unionize nodes in a SCC to a single node
+      # For every SCC found, create a loop descriptor and link it in.
+      #
+      if (nodePool.len > 0) or (types[w] == BB_SELF):
+        var l = self.lsg.createNewLoop
+
+        l.setHeader(nodeW)
+        l.isReducible = types[w] != BB_IRREDUCIBLE
+
+        # At this point, one can set attributes to the loop, such as:
+        #
+        # the bottom node:
+        #    iter  = backPreds(w).begin();
+        #    loop bottom is: nodes(iter).node;
+        #
+        # the number of backedges:
+        #    backPreds(w).size()
+        #
+        # whether this loop is reducible:
+        #    types(w) != BB_IRREDUCIBLE
+        #
+        nodes[w].l = l
+
+        for node in nodePool:
+          # Add nodes to loop descriptor.
+          header[node.dfsNumber] = w
+          node.union(nodes[w])
+
+          # Nested loops are not added, but linked together.
+          var node_l = node.l
+          if node_l != nil:
+            node_l.setParent(l)
+          else:
+            l.addNode(node.bb)
+
+        self.lsg.addLoop(l)
+
+  result = self.lsg.getNumLoops
+
+
+type
+  LoopTesterApp = object
+    cfg: Cfg
+    lsg: Lsg
+
+proc newLoopTesterApp(): LoopTesterApp =
+  result.cfg = newCfg()
+  result.lsg = newLsg()
+
+proc buildDiamond(self: var LoopTesterApp, start: int): int =
+  var bb0 = start
+  var x1 = newBasicBlockEdge(self.cfg, bb0, bb0 + 1)
+  var x2 = newBasicBlockEdge(self.cfg, bb0, bb0 + 2)
+  var x3 = newBasicBlockEdge(self.cfg, bb0 + 1, bb0 + 3)
+  var x4 = newBasicBlockEdge(self.cfg, bb0 + 2, bb0 + 3)
+  result = bb0 + 3
+
+proc buildConnect(self: var LoopTesterApp, start1: int, end1: int) =
+  var x1 = newBasicBlockEdge(self.cfg, start1, end1)
+
+proc buildStraight(self: var LoopTesterApp, start: int, n: int): int =
+  for i in 0..n-1:
+    self.buildConnect(start + i, start + i + 1)
+  result = start + n
+
+proc buildBaseLoop(self: var LoopTesterApp, from1: int): int =
+  var header   = self.buildStraight(from1, 1)
+  var diamond1 = self.buildDiamond(header)
+  var d11      = self.buildStraight(diamond1, 1)
+  var diamond2 = self.buildDiamond(d11)
+  var footer   = self.buildStraight(diamond2, 1)
+
+  self.buildConnect(diamond2, d11)
+  self.buildConnect(diamond1, header)
+  self.buildConnect(footer, from1)
+  result = self.buildStraight(footer, 1)
+
+proc run(self: var LoopTesterApp) =
+  echo "Welcome to LoopTesterApp, Nim edition"
+  echo "Constructing Simple CFG..."
+
+  var x1 = self.cfg.createNode(0)
+  var x2 = self.buildBaseLoop(0)
+  var x3 = self.cfg.createNode(1)
+  self.buildConnect(0, 2)
+
+  echo "15000 dummy loops"
+
+  for i in 1..15000:
+    var h = newHavlakLoopFinder(self.cfg, newLsg())
+    var res = h.findLoops
+
+  echo "Constructing CFG..."
+  var n = 2
+
+  for parlooptrees in 1..10:
+    var x6 = self.cfg.createNode(n + 1)
+    self.buildConnect(2, n + 1)
+    n += 1
+    for i in 1..100:
+      var top = n
+      n = self.buildStraight(n, 1)
+      for j in 1..25: n = self.buildBaseLoop(n)
+      var bottom = self.buildStraight(n, 1)
+      self.buildConnect n, top
+      n = bottom
+    self.buildConnect(n, 1)
+
+  echo "Performing Loop Recognition\n1 Iteration"
+
+  var h = newHavlakLoopFinder(self.cfg, newLsg())
+  var loops = h.findLoops
+
+  echo "Another 50 iterations..."
+
+  var sum = 0
+  for i in 1..50:
+    write stdout, "."
+    flushFile(stdout)
+    var hlf = newHavlakLoopFinder(self.cfg, newLsg())
+    sum += hlf.findLoops
+    #echo getOccupiedMem()
+  echo "\nFound ", loops, " loops (including artificial root node) (", sum, ")"
+
+var l = newLoopTesterApp()
+l.run
--- a/tests/gc/tlists.nim
+++ b/tests/gc/tlists.nim
@@ -0,0 +1,37 @@
+discard """
+    output: '''Success'''
+"""
+
+# bug #3793
+
+import os
+import math
+import lists
+import strutils
+
+proc mkleak() =
+    # allocate 10 MB via linked lists
+    let numberOfLists = 100
+    for i in countUp(1, numberOfLists):
+        var leakList = initDoublyLinkedList[string]()
+        let numberOfLeaks = 50000
+        for j in countUp(1, numberOfLeaks):
+            let leakSize = 200
+            let leaked = newString(leakSize)
+            leakList.append(leaked)
+
+proc mkManyLeaks() =
+    for i in 0..0:
+        when false: echo getOccupiedMem()
+        mkleak()
+        when false: echo getOccupiedMem()
+        # Force a full collection. This should free all of the
+        # lists and bring the memory usage down to a few MB's.
+        GC_fullCollect()
+        when false: echo getOccupiedMem()
+        if getOccupiedMem() > 8 * 200 * 50_000 * 2:
+          echo GC_getStatistics()
+          quit "leaking"
+    echo "Success"
+
+mkManyLeaks()
--- a/tests/testament/categories.nim
+++ b/tests/testament/categories.nim
@@ -152,6 +152,9 @@ proc gcTests(r: var TResults, cat: Category, options: string) =
  testWithoutBoehm "closureleak"
  testWithoutMs "refarrayleak"

+  testWithoutBoehm "tlists"
+  testWithoutBoehm "thavlak"
+
  test "stackrefleak"
  test "cyclecollector"