This commit is contained in:
Andreas Rumpf
2016-05-22 14:28:53 +02:00
parent d691867c84
commit dcf830bba9
4 changed files with 503 additions and 53 deletions

View File

@@ -27,15 +27,14 @@ const
type
PTrunk = ptr Trunk
Trunk {.final.} = object
Trunk = object
next: PTrunk # all nodes are connected with this pointer
key: int # start address at bit 0
bits: array[0..IntsPerTrunk-1, int] # a bit vector
TrunkBuckets = array[0..255, PTrunk]
IntSet {.final.} = object
IntSet = object
data: TrunkBuckets
{.deprecated: [TIntSet: IntSet, TTrunk: Trunk, TTrunkBuckets: TrunkBuckets].}
type
AlignType = BiggestFloat
@@ -64,8 +63,6 @@ type
next, prev: PBigChunk # chunks of the same (or bigger) size
align: int
data: AlignType # start of usable memory
{.deprecated: [TAlignType: AlignType, TFreeCell: FreeCell, TBaseChunk: BaseChunk,
TBigChunk: BigChunk, TSmallChunk: SmallChunk].}
template smallChunkOverhead(): expr = sizeof(SmallChunk)-sizeof(AlignType)
template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)
@@ -79,18 +76,18 @@ template bigChunkOverhead(): expr = sizeof(BigChunk)-sizeof(AlignType)
type
PLLChunk = ptr LLChunk
LLChunk {.pure.} = object ## *low-level* chunk
LLChunk = object ## *low-level* chunk
size: int # remaining size
acc: int # accumulator
next: PLLChunk # next low-level chunk; only needed for dealloc
PAvlNode = ptr AvlNode
AvlNode {.pure, final.} = object
AvlNode = object
link: array[0..1, PAvlNode] # Left (0) and right (1) links
key, upperBound: int
level: int
MemRegion {.final, pure.} = object
MemRegion = object
minLargeObj, maxLargeObj: int
freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk]
llmem: PLLChunk
@@ -99,6 +96,7 @@ type
freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access
chunkStarts: IntSet
root, deleted, last, freeAvlNodes: PAvlNode
locked: bool # if locked, we cannot free pages.
{.deprecated: [TLLChunk: LLChunk, TAvlNode: AvlNode, TMemRegion: MemRegion].}
# shared:
@@ -234,7 +232,8 @@ proc isSmallChunk(c: PChunk): bool {.inline.} =
proc chunkUnused(c: PChunk): bool {.inline.} =
result = not c.used
iterator allObjects(m: MemRegion): pointer {.inline.} =
iterator allObjects(m: var MemRegion): pointer {.inline.} =
m.locked = true
for s in elements(m.chunkStarts):
# we need to check here again as it could have been modified:
if s in m.chunkStarts:
@@ -252,6 +251,7 @@ iterator allObjects(m: MemRegion): pointer {.inline.} =
else:
let c = cast[PBigChunk](c)
yield addr(c.data)
m.locked = false
proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
magic: "Plugin", compileTime.}
@@ -385,7 +385,7 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
excl(a.chunkStarts, pageIndex(c))
c = cast[PBigChunk](le)
if c.size < ChunkOsReturn or doNotUnmap:
if c.size < ChunkOsReturn or doNotUnmap or a.locked:
incl(a, a.chunkStarts, pageIndex(c))
updatePrevSize(a, c, c.size)
listAdd(a.freeChunksList, c)
@@ -442,26 +442,29 @@ proc getSmallChunk(a: var MemRegion): PSmallChunk =
# -----------------------------------------------------------------------------
proc isAllocatedPtr(a: MemRegion, p: pointer): bool {.benign.}
proc allocInv(a: MemRegion): bool =
## checks some (not all yet) invariants of the allocator's data structures.
for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
var c = a.freeSmallChunks[s]
while not (c == nil):
if c.next == c:
echo "[SYSASSERT] c.next == c"
return false
if not (c.size == s * MemAlign):
echo "[SYSASSERT] c.size != s * MemAlign"
return false
var it = c.freeList
while not (it == nil):
if not (it.zeroField == 0):
echo "[SYSASSERT] it.zeroField != 0"
c_printf("%ld %p\n", it.zeroField, it)
when true:
template allocInv(a: MemRegion): bool = true
else:
proc allocInv(a: MemRegion): bool =
## checks some (not all yet) invariants of the allocator's data structures.
for s in low(a.freeSmallChunks)..high(a.freeSmallChunks):
var c = a.freeSmallChunks[s]
while not (c == nil):
if c.next == c:
echo "[SYSASSERT] c.next == c"
return false
it = it.next
c = c.next
result = true
if not (c.size == s * MemAlign):
echo "[SYSASSERT] c.size != s * MemAlign"
return false
var it = c.freeList
while not (it == nil):
if not (it.zeroField == 0):
echo "[SYSASSERT] it.zeroField != 0"
c_printf("%ld %p\n", it.zeroField, it)
return false
it = it.next
c = c.next
result = true
proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
sysAssert(allocInv(a), "rawAlloc: begin")

View File

@@ -190,6 +190,12 @@ proc prepareDealloc(cell: PCell) =
(cast[Finalizer](cell.typ.finalizer))(cellToUsr(cell))
dec(gch.recGcLock)
template beforeDealloc(gch: var GcHeap; c: PCell; msg: typed) =
when false:
for i in 0..gch.decStack.len-1:
if gch.decStack.d[i] == c:
sysAssert(false, msg)
proc rtlAddCycleRoot(c: PCell) {.rtl, inl.} =
# we MUST access gch as a global here, because this crosses DLL boundaries!
when hasThreadSupport and hasSharedHeap:
@@ -541,6 +547,7 @@ proc growObj(old: pointer, newsize: int, gch: var GcHeap): pointer =
d[j] = res
break
dec(j)
beforeDealloc(gch, ol, "growObj stack trash")
rawDealloc(gch.region, ol)
else:
# we split the old refcount in 2 parts. XXX This is still not entirely
@@ -574,6 +581,7 @@ proc freeCyclicCell(gch: var GcHeap, c: PCell) =
when logGC: writeCell("cycle collector dealloc cell", c)
when reallyDealloc:
sysAssert(allocInv(gch.region), "free cyclic cell")
beforeDealloc(gch, c, "freeCyclicCell: stack trash")
rawDealloc(gch.region, c)
else:
gcAssert(c.typ != nil, "freeCyclicCell")
@@ -601,16 +609,6 @@ when useMarkForDebug or useBackupGc:
proc markGlobals(gch: var GcHeap) =
for i in 0 .. < globalMarkersLen: globalMarkers[i]()
proc stackMarkS(gch: var GcHeap, p: pointer) {.inline.} =
# the addresses are not as cells on the stack, so turn them to cells:
var cell = usrToCell(p)
var c = cast[ByteAddress](cell)
if c >% PageSize:
# fast check: does it look like a cell?
var objStart = cast[PCell](interiorAllocatedPtr(gch.region, cell))
if objStart != nil:
markS(gch, objStart)
when logGC:
var
cycleCheckA: array[100, PCell]
@@ -669,10 +667,6 @@ proc nimGCvisit(d: pointer, op: int) {.compilerRtl.} =
proc collectZCT(gch: var GcHeap): bool {.benign.}
when useMarkForDebug or useBackupGc:
proc markStackAndRegistersForSweep(gch: var GcHeap) {.noinline, cdecl,
benign.}
proc collectCycles(gch: var GcHeap) =
when hasThreadSupport:
for c in gch.toDispose:
@@ -681,7 +675,10 @@ proc collectCycles(gch: var GcHeap) =
while gch.zct.len > 0: discard collectZCT(gch)
when useBackupGc:
cellsetReset(gch.marked)
markStackAndRegistersForSweep(gch)
var d = gch.decStack.d
for i in 0..gch.decStack.len-1:
sysAssert isAllocatedPtr(gch.region, d[i]), "collectCycles"
markS(gch, d[i])
markGlobals(gch)
sweep(gch)
@@ -710,10 +707,6 @@ include gc_common
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
forEachStackSlot(gch, gcMark)
when useMarkForDebug or useBackupGc:
proc markStackAndRegistersForSweep(gch: var GcHeap) =
forEachStackSlot(gch, stackMarkS)
proc collectZCT(gch: var GcHeap): bool =
# Note: Freeing may add child objects to the ZCT! So essentially we do
# deep freeing, which is bad for incremental operation. In order to
@@ -752,6 +745,7 @@ proc collectZCT(gch: var GcHeap): bool =
forAllChildren(c, waZctDecRef)
when reallyDealloc:
sysAssert(allocInv(gch.region), "collectZCT: rawDealloc")
beforeDealloc(gch, c, "collectZCT: stack trash")
rawDealloc(gch.region, c)
else:
sysAssert(c.typ != nil, "collectZCT 2")
@@ -811,11 +805,6 @@ proc collectCTBody(gch: var GcHeap) =
if gch.maxPause > 0 and duration > gch.maxPause:
c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
when useMarkForDebug or useBackupGc:
proc markForDebug(gch: var GcHeap) =
markStackAndRegistersForSweep(gch)
markGlobals(gch)
when defined(nimCoroutines):
proc currentStackSizes(): int =
for stack in items(gch.stack):

457
tests/gc/thavlak.nim Normal file
View File

@@ -0,0 +1,457 @@
discard """
output: '''Welcome to LoopTesterApp, Nim edition
Constructing Simple CFG...
15000 dummy loops
Constructing CFG...
Performing Loop Recognition
1 Iteration
Another 50 iterations...
..................................................
Found 1 loops (including artificial root node) (50)'''
"""
# bug #3184
import tables
import sequtils
import sets
type
BasicBlock = object
inEdges: seq[ref BasicBlock]
outEdges: seq[ref BasicBlock]
name: int
proc newBasicBlock(name: int): ref BasicBlock =
new(result)
result.inEdges = newSeq[ref BasicBlock]()
result.outEdges = newSeq[ref BasicBlock]()
result.name = name
proc hash(x: ref BasicBlock): int {.inline.} =
result = x.name
type
BasicBlockEdge = object
fr: ref BasicBlock
to: ref BasicBlock
Cfg = object
basicBlockMap: Table[int, ref BasicBlock]
edgeList: seq[BasicBlockEdge]
startNode: ref BasicBlock
proc newCfg(): Cfg =
result.basicBlockMap = initTable[int, ref BasicBlock]()
result.edgeList = newSeq[BasicBlockEdge]()
proc createNode(self: var Cfg, name: int): ref BasicBlock =
result = self.basicBlockMap.getOrDefault(name)
if result == nil:
result = newBasicBlock(name)
self.basicBlockMap.add name, result
if self.startNode == nil:
self.startNode = result
proc addEdge(self: var Cfg, edge: BasicBlockEdge) =
self.edgeList.add(edge)
proc getNumNodes(self: Cfg): int =
self.basicBlockMap.len
proc newBasicBlockEdge(cfg: var Cfg, fromName: int, toName: int): BasicBlockEdge =
result.fr = cfg.createNode(fromName)
result.to = cfg.createNode(toName)
result.fr.outEdges.add(result.to)
result.to.inEdges.add(result.fr)
cfg.addEdge(result)
type
SimpleLoop = object
basicBlocks: seq[ref BasicBlock] # TODO: set here
children: seq[ref SimpleLoop] # TODO: set here
parent: ref SimpleLoop
header: ref BasicBlock
isRoot: bool
isReducible: bool
counter: int
nestingLevel: int
depthLevel: int
proc newSimpleLoop(): ref SimpleLoop =
new(result)
result.basicBlocks = newSeq[ref BasicBlock]()
result.children = newSeq[ref SimpleLoop]()
result.parent = nil
result.header = nil
result.isRoot = false
result.isReducible = true
result.counter = 0
result.nestingLevel = 0
result.depthLevel = 0
proc addNode(self: ref SimpleLoop, bb: ref BasicBlock) =
self.basicBlocks.add bb
proc addChildLoop(self: ref SimpleLoop, loop: ref SimpleLoop) =
self.children.add loop
proc setParent(self: ref SimpleLoop, parent: ref SimpleLoop) =
self.parent = parent
self.parent.addChildLoop(self)
proc setHeader(self: ref SimpleLoop, bb: ref BasicBlock) =
self.basicBlocks.add(bb)
self.header = bb
proc setNestingLevel(self: ref SimpleLoop, level: int) =
self.nestingLevel = level
if level == 0: self.isRoot = true
var loop_counter: int = 0
type
Lsg = object
loops: seq[ref SimpleLoop]
root: ref SimpleLoop
proc createNewLoop(self: var Lsg): ref SimpleLoop =
result = newSimpleLoop()
loop_counter += 1
result.counter = loop_counter
proc addLoop(self: var Lsg, l: ref SimpleLoop) =
self.loops.add l
proc newLsg(): Lsg =
result.loops = newSeq[ref SimpleLoop]()
result.root = result.createNewLoop()
result.root.setNestingLevel(0)
result.addLoop(result.root)
proc getNumLoops(self: Lsg): int =
self.loops.len
type
UnionFindNode = object
parent: ref UnionFindNode
bb: ref BasicBlock
l: ref SimpleLoop
dfsNumber: int
proc newUnionFindNode(): ref UnionFindNode =
new(result)
when false:
result.parent = nil
result.bb = nil
result.l = nil
result.dfsNumber = 0
proc initNode(self: ref UnionFindNode, bb: ref BasicBlock, dfsNumber: int) =
self.parent = self
self.bb = bb
self.dfsNumber = dfsNumber
proc findSet(self: ref UnionFindNode): ref UnionFindNode =
var nodeList = newSeq[ref UnionFindNode]()
result = self
while result != result.parent:
var parent = result.parent
if parent != parent.parent: nodeList.add result
result = parent
for iter in nodeList: iter.parent = result.parent
proc union(self: ref UnionFindNode, unionFindNode: ref UnionFindNode) =
self.parent = unionFindNode
const
BB_TOP = 0 # uninitialized
BB_NONHEADER = 1 # a regular BB
BB_REDUCIBLE = 2 # reducible loop
BB_SELF = 3 # single BB loop
BB_IRREDUCIBLE = 4 # irreducible loop
BB_DEAD = 5 # a dead BB
BB_LAST = 6 # Sentinel
# # Marker for uninitialized nodes.
UNVISITED = -1
# # Safeguard against pathologic algorithm behavior.
MAXNONBACKPREDS = (32 * 1024)
type
HavlakLoopFinder = object
cfg: Cfg
lsg: Lsg
proc newHavlakLoopFinder(cfg: Cfg, lsg: Lsg): HavlakLoopFinder =
result.cfg = cfg
result.lsg = lsg
proc isAncestor(w: int, v: int, last: seq[int]): bool =
w <= v and v <= last[w]
proc dfs(currentNode: ref BasicBlock, nodes: var seq[ref UnionFindNode], number: var Table[ref BasicBlock, int], last: var seq[int], current: int): int =
var stack = @[(currentNode, current)]
while stack.len > 0:
let (currentNode, current) = stack.pop()
nodes[current].initNode(currentNode, current)
number[currentNode] = current
result = current
for target in currentNode.outEdges:
if number[target] == UNVISITED:
stack.add((target, result+1))
#result = dfs(target, nodes, number, last, result + 1)
last[number[currentNode]] = result
proc findLoops(self: var HavlakLoopFinder): int =
var startNode = self.cfg.startNode
if startNode == nil: return 0
var size = self.cfg.getNumNodes
var nonBackPreds = newSeq[HashSet[int]]()
var backPreds = newSeq[seq[int]]()
var number = initTable[ref BasicBlock, int]()
var header = newSeq[int](size)
var types = newSeq[int](size)
var last = newSeq[int](size)
var nodes = newSeq[ref UnionFindNode]()
for i in 1..size:
nonBackPreds.add initSet[int](1)
backPreds.add newSeq[int]()
nodes.add newUnionFindNode()
# Step a:
# - initialize all nodes as unvisited.
# - depth-first traversal and numbering.
# - unreached BB's are marked as dead.
#
for v in self.cfg.basicBlockMap.values: number[v] = UNVISITED
var res = dfs(startNode, nodes, number, last, 0)
# Step b:
# - iterate over all nodes.
#
# A backedge comes from a descendant in the DFS tree, and non-backedges
# from non-descendants (following Tarjan).
#
# - check incoming edges 'v' and add them to either
# - the list of backedges (backPreds) or
# - the list of non-backedges (nonBackPreds)
#
for w in 0 .. <size:
header[w] = 0
types[w] = BB_NONHEADER
var nodeW = nodes[w].bb
if nodeW != nil:
for nodeV in nodeW.inEdges:
var v = number[nodeV]
if v != UNVISITED:
if isAncestor(w, v, last):
backPreds[w].add v
else:
nonBackPreds[w].incl v
else:
types[w] = BB_DEAD
# Start node is root of all other loops.
header[0] = 0
# Step c:
#
# The outer loop, unchanged from Tarjan. It does nothing except
# for those nodes which are the destinations of backedges.
# For a header node w, we chase backward from the sources of the
# backedges adding nodes to the set P, representing the body of
# the loop headed by w.
#
# By running through the nodes in reverse of the DFST preorder,
# we ensure that inner loop headers will be processed before the
# headers for surrounding loops.
for w in countdown(size - 1, 0):
# this is 'P' in Havlak's paper
var nodePool = newSeq[ref UnionFindNode]()
var nodeW = nodes[w].bb
if nodeW != nil: # dead BB
# Step d:
for v in backPreds[w]:
if v != w:
nodePool.add nodes[v].findSet
else:
types[w] = BB_SELF
# Copy nodePool to workList.
#
var workList = newSeq[ref UnionFindNode]()
for x in nodePool: workList.add x
if nodePool.len != 0: types[w] = BB_REDUCIBLE
# work the list...
#
while workList.len > 0:
var x = workList[0]
workList.del(0)
# Step e:
#
# Step e represents the main difference from Tarjan's method.
# Chasing upwards from the sources of a node w's backedges. If
# there is a node y' that is not a descendant of w, w is marked
# the header of an irreducible loop, there is another entry
# into this loop that avoids w.
#
# The algorithm has degenerated. Break and
# return in this case.
#
var nonBackSize = nonBackPreds[x.dfsNumber].len
if nonBackSize > MAXNONBACKPREDS: return 0
for iter in nonBackPreds[x.dfsNumber]:
var y = nodes[iter]
var ydash = y.findSet
if not isAncestor(w, ydash.dfsNumber, last):
types[w] = BB_IRREDUCIBLE
nonBackPreds[w].incl ydash.dfsNumber
else:
if ydash.dfsNumber != w and not nodePool.contains(ydash):
workList.add ydash
nodePool.add ydash
# Collapse/Unionize nodes in a SCC to a single node
# For every SCC found, create a loop descriptor and link it in.
#
if (nodePool.len > 0) or (types[w] == BB_SELF):
var l = self.lsg.createNewLoop
l.setHeader(nodeW)
l.isReducible = types[w] != BB_IRREDUCIBLE
# At this point, one can set attributes to the loop, such as:
#
# the bottom node:
# iter = backPreds(w).begin();
# loop bottom is: nodes(iter).node;
#
# the number of backedges:
# backPreds(w).size()
#
# whether this loop is reducible:
# types(w) != BB_IRREDUCIBLE
#
nodes[w].l = l
for node in nodePool:
# Add nodes to loop descriptor.
header[node.dfsNumber] = w
node.union(nodes[w])
# Nested loops are not added, but linked together.
var node_l = node.l
if node_l != nil:
node_l.setParent(l)
else:
l.addNode(node.bb)
self.lsg.addLoop(l)
result = self.lsg.getNumLoops
type
LoopTesterApp = object
cfg: Cfg
lsg: Lsg
proc newLoopTesterApp(): LoopTesterApp =
result.cfg = newCfg()
result.lsg = newLsg()
proc buildDiamond(self: var LoopTesterApp, start: int): int =
var bb0 = start
var x1 = newBasicBlockEdge(self.cfg, bb0, bb0 + 1)
var x2 = newBasicBlockEdge(self.cfg, bb0, bb0 + 2)
var x3 = newBasicBlockEdge(self.cfg, bb0 + 1, bb0 + 3)
var x4 = newBasicBlockEdge(self.cfg, bb0 + 2, bb0 + 3)
result = bb0 + 3
proc buildConnect(self: var LoopTesterApp, start1: int, end1: int) =
var x1 = newBasicBlockEdge(self.cfg, start1, end1)
proc buildStraight(self: var LoopTesterApp, start: int, n: int): int =
for i in 0..n-1:
self.buildConnect(start + i, start + i + 1)
result = start + n
proc buildBaseLoop(self: var LoopTesterApp, from1: int): int =
var header = self.buildStraight(from1, 1)
var diamond1 = self.buildDiamond(header)
var d11 = self.buildStraight(diamond1, 1)
var diamond2 = self.buildDiamond(d11)
var footer = self.buildStraight(diamond2, 1)
self.buildConnect(diamond2, d11)
self.buildConnect(diamond1, header)
self.buildConnect(footer, from1)
result = self.buildStraight(footer, 1)
proc run(self: var LoopTesterApp) =
echo "Welcome to LoopTesterApp, Nim edition"
echo "Constructing Simple CFG..."
var x1 = self.cfg.createNode(0)
var x2 = self.buildBaseLoop(0)
var x3 = self.cfg.createNode(1)
self.buildConnect(0, 2)
echo "15000 dummy loops"
for i in 1..15000:
var h = newHavlakLoopFinder(self.cfg, newLsg())
var res = h.findLoops
echo "Constructing CFG..."
var n = 2
for parlooptrees in 1..10:
var x6 = self.cfg.createNode(n + 1)
self.buildConnect(2, n + 1)
n += 1
for i in 1..100:
var top = n
n = self.buildStraight(n, 1)
for j in 1..25: n = self.buildBaseLoop(n)
var bottom = self.buildStraight(n, 1)
self.buildConnect n, top
n = bottom
self.buildConnect(n, 1)
echo "Performing Loop Recognition\n1 Iteration"
var h = newHavlakLoopFinder(self.cfg, newLsg())
var loops = h.findLoops
echo "Another 50 iterations..."
var sum = 0
for i in 1..50:
write stdout, "."
flushFile(stdout)
var hlf = newHavlakLoopFinder(self.cfg, newLsg())
sum += hlf.findLoops
#echo getOccupiedMem()
echo "\nFound ", loops, " loops (including artificial root node) (", sum, ")"
var l = newLoopTesterApp()
l.run

View File

@@ -153,6 +153,7 @@ proc gcTests(r: var TResults, cat: Category, options: string) =
testWithoutMs "refarrayleak"
testWithoutBoehm "tlists"
testWithoutBoehm "thavlak"
test "stackrefleak"
test "cyclecollector"