control flow graphs: introduce 'join' points for easy analyses based on abstract interpretation

This commit is contained in:
Andreas Rumpf
2019-01-21 14:35:49 +01:00
parent 2fb8b1d132
commit 11022fea1b
3 changed files with 395 additions and 154 deletions

View File

@@ -184,7 +184,7 @@ proc isHarmlessVar*(s: PSym; c: Con): bool =
inc usages
#of useWithinCall:
# if c.g[i].sym == s: return false
of goto, fork:
of goto, fork, InstrKind.join:
discard "we do not perform an abstract interpretation yet"
result = usages <= 1
@@ -246,6 +246,8 @@ proc isLastRead(n: PNode; c: var Con): bool =
if not takenForks.containsOrIncl(pc):
pcs.add pc + c.g[pc].dest
inc pc
of InstrKind.join:
inc pc
#echo c.graph.config $ n.info, " last read here!"
return true

View File

@@ -34,12 +34,12 @@ import ast, astalgo, types, intsets, tables, msgs, options, lineinfos
type
InstrKind* = enum
goto, fork, def, use
goto, fork, join, def, use
Instr* = object
n*: PNode
case kind*: InstrKind
of def, use: sym*: PSym
of goto, fork: dest*: int
of goto, fork, join: dest*: int
ControlFlowGraph* = seq[Instr]
@@ -56,6 +56,7 @@ type
inCall, inTryStmt: int
blocks: seq[TBlock]
tryStmtFixups: seq[TPosition]
forks: seq[TPosition]
owner: PSym
proc debugInfo(info: TLineInfo): string =
@@ -67,7 +68,7 @@ proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) =
var jumpTargets = initIntSet()
let last = if last < 0: c.len-1 else: min(last, c.len-1)
for i in start..last:
if c[i].kind in {goto, fork}:
if c[i].kind in {goto, fork, join}:
jumpTargets.incl(i+c[i].dest)
var i = start
while i <= last:
@@ -78,7 +79,7 @@ proc codeListing(c: ControlFlowGraph, result: var string, start=0; last = -1) =
case c[i].kind
of def, use:
result.add c[i].sym.name.s
of goto, fork:
of goto, fork, join:
result.add "L"
result.add c[i].dest+i
result.add("\t#")
@@ -98,11 +99,166 @@ proc echoCfg*(c: ControlFlowGraph; start=0; last = -1) {.deprecated.} =
proc forkI(c: var Con; n: PNode): TPosition =
result = TPosition(c.code.len)
c.code.add Instr(n: n, kind: fork, dest: 0)
c.forks.add result
proc gotoI(c: var Con; n: PNode): TPosition =
result = TPosition(c.code.len)
c.code.add Instr(n: n, kind: goto, dest: 0)
#[
Design of join
==============
block:
if cond: break
def(x)
use(x)
Generates:
L0: fork L1
join L0 # patched.
goto Louter
L1:
def x
join L0
Louter:
use x
block outer:
while a:
while b:
if foo:
if bar:
break outer # --> we need to 'join' every pushed 'fork' here
This works and then our abstract interpretation needs to deal with 'fork'
differently. It really causes a split in execution. Two threads are
"spawned" and both need to reach the 'join L' instruction. Afterwards
the abstract interpretations are joined and execution resumes single
threaded.
Abstract Interpretation
-----------------------
proc interpret(pc, state, comesFrom): state =
result = state
# we need an explicit 'create' instruction (an explicit heap), in order
# to deal with 'var x = create(); var y = x; var z = y; destroy(z)'
while true:
case pc
of fork:
let a = interpret(pc+1, result, pc)
let b = interpret(forkTarget, result, pc)
result = a ++ b # ++ is a union operation
inc pc
of join:
if joinTarget == comesFrom: return result
else: inc pc
of use X:
if not result.contains(x):
error "variable not initialized " & x
inc pc
of def X:
if not result.contains(x):
result.incl X
else:
error "overwrite of variable causes memory leak " & x
inc pc
of destroy X:
result.excl X
This is correct but still can lead to false positives:
proc p(cond: bool) =
if cond:
new(x)
otherThings()
if cond:
destroy x
Is not a leak. We should find a way to model *data* flow, not just
control flow. One solution is to rewrite the 'if' without a fork
instruction. The unstructured aspect can now be easily dealt with
the 'goto' and 'join' instructions.
proc p(cond: bool) =
L0: fork Lend
new(x)
# do not 'join' here!
Lend:
otherThings()
join L0 # SKIP THIS FOR new(x) SOMEHOW
destroy x
join L0 # but here.
But if we follow 'goto Louter' we will never come to the join point.
We restore the bindings after popping pc from the stack then there
"no" problem?!
while cond:
prelude()
if not condB: break
postlude()
--->
var setFlag = true
while cond and not setFlag:
prelude()
if not condB:
setFlag = true # BUT: Dependency
if not setFlag: # HERE
postlude()
--->
var setFlag = true
while cond and not setFlag:
prelude()
if not condB:
postlude()
setFlag = true
-------------------------------------------------
while cond:
prelude()
if more:
if not condB: break
stuffHere()
postlude()
-->
var setFlag = true
while cond and not setFlag:
prelude()
if more:
if not condB:
setFlag = false
else:
stuffHere()
postlude()
else:
postlude()
This is getting complicated. Instead we keep the whole 'join' idea but
duplicate the 'join' instructions on breaks and return exits!
]#
proc joinI(c: var Con; fromFork: TPosition; n: PNode) =
let dist = fromFork.int - c.code.len
c.code.add Instr(n: n, kind: join, dest: dist)
proc genLabel(c: Con): TPosition =
result = TPosition(c.code.len)
@@ -135,30 +291,97 @@ proc isTrue(n: PNode): bool =
proc gen(c: var Con; n: PNode) # {.noSideEffect.}
proc genWhile(c: var Con; n: PNode) =
# L1:
# cond, tmp
# fork tmp, L2
# body
# jmp L1
# L2:
let L1 = c.genLabel
withBlock(nil):
when true:
proc genWhile(c: var Con; n: PNode) =
# We unroll every loop 3 times. We emulate 0, 1, 2 iterations
# through the loop. We need to prove this is correct for our
# purposes. But Herb Sutter claims it is. (Proof by authority.)
#[
while cond:
body
Becomes:
if cond:
body
if cond:
body
if cond:
body
We still need to ensure 'break' resolves properly, so an AST to AST
translation is impossible.
So the code to generate is:
cond
fork L4 # F1
body
cond
fork L5 # F2
body
cond
fork L6 # F3
body
L6:
join F3
L5:
join F2
L4:
join F1
]#
if isTrue(n.sons[0]):
c.gen(n.sons[1])
c.jmpBack(n, L1)
# 'while true' is an idiom in Nim and so we produce
# better code for it:
for i in 0..2:
withBlock(nil):
c.gen(n.sons[1])
else:
c.gen(n.sons[0])
let L2 = c.forkI(n)
c.gen(n.sons[1])
c.jmpBack(n, L1)
c.patch(L2)
let oldForksLen = c.forks.len
var endings: array[3, TPosition]
for i in 0..2:
withBlock(nil):
c.gen(n.sons[0])
endings[i] = c.forkI(n)
c.gen(n.sons[1])
for i in countdown(endings.high, 0):
let endPos = endings[i]
c.patch(endPos)
c.joinI(c.forks.pop(), n)
doAssert(c.forks.len == oldForksLen)
else:
proc genWhile(c: var Con; n: PNode) =
# L1:
# cond, tmp
# fork tmp, L2
# body
# jmp L1
# L2:
let oldForksLen = c.forks.len
let L1 = c.genLabel
withBlock(nil):
if isTrue(n.sons[0]):
c.gen(n.sons[1])
c.jmpBack(n, L1)
else:
c.gen(n.sons[0])
let L2 = c.forkI(n)
c.gen(n.sons[1])
c.jmpBack(n, L1)
c.patch(L2)
setLen(c.forks, oldForksLen)
proc genBlock(c: var Con; n: PNode) =
withBlock(n.sons[0].sym):
c.gen(n.sons[1])
proc genJoins(c: var Con; n: PNode) =
for i in countdown(c.forks.high, 0): joinI(c, c.forks[i], n)
proc genBreak(c: var Con; n: PNode) =
genJoins(c, n)
let L1 = c.gotoI(n)
if n.sons[0].kind == nkSym:
#echo cast[int](n.sons[0].sym)
@@ -170,28 +393,76 @@ proc genBreak(c: var Con; n: PNode) =
else:
c.blocks[c.blocks.high].fixups.add L1
template forkT(n, body) =
let oldLen = c.forks.len
let L1 = c.forkI(n)
body
c.patch(L1)
c.joinI(L1, n)
setLen(c.forks, oldLen)
proc genIf(c: var Con, n: PNode) =
#[
if cond:
A
elif condB:
B
elif condC:
C
else:
D
cond
fork L1
A
goto Lend
L1:
condB
fork L2
B
goto Lend2
L2:
condC
fork L3
C
goto Lend3
L3:
D
goto Lend3 # not eliminated to simplify the join generation
Lend3:
join F3
Lend2:
join F2
Lend:
join F1
]#
let oldLen = c.forks.len
var endings: seq[TPosition] = @[]
for i in countup(0, len(n) - 1):
var it = n.sons[i]
c.gen(it.sons[0])
if it.len == 2:
let elsePos = c.forkI(it.sons[1])
let elsePos = forkI(c, it[1])
c.gen(it.sons[1])
if i < sonsLen(n)-1:
endings.add(c.gotoI(it.sons[1]))
endings.add(c.gotoI(it.sons[1]))
c.patch(elsePos)
for endPos in endings: c.patch(endPos)
for i in countdown(endings.high, 0):
let endPos = endings[i]
c.patch(endPos)
c.joinI(c.forks.pop(), n)
doAssert(c.forks.len == oldLen)
proc genAndOr(c: var Con; n: PNode) =
# asgn dest, a
# fork L1
# asgn dest, b
# L1:
# join F1
c.gen(n.sons[1])
let L1 = c.forkI(n)
c.gen(n.sons[2])
c.patch(L1)
forkT(n):
c.gen(n.sons[2])
proc genCase(c: var Con; n: PNode) =
# if (!expr1) goto L1;
@@ -204,56 +475,73 @@ proc genCase(c: var Con; n: PNode) =
# L2:
# elsePart
# Lend:
when false:
# XXX Exhaustiveness is not yet mapped to the control flow graph as
# it seems to offer no benefits for the 'last read of' question.
let isExhaustive = skipTypes(n.sons[0].typ,
abstractVarRange-{tyTypeDesc}).kind in {tyFloat..tyFloat128, tyString} or
lastSon(n).kind == nkElse
let isExhaustive = skipTypes(n.sons[0].typ,
abstractVarRange-{tyTypeDesc}).kind notin {tyFloat..tyFloat128, tyString}
var endings: seq[TPosition] = @[]
let oldLen = c.forks.len
c.gen(n.sons[0])
for i in 1 ..< n.len:
let it = n.sons[i]
if it.len == 1:
c.gen(it.sons[0])
elif i == n.len-1 and isExhaustive:
# treat the last branch as 'else' if this is an exhaustive case statement.
c.gen(it.lastSon)
else:
let elsePos = c.forkI(it.lastSon)
c.gen(it.lastSon)
if i < sonsLen(n)-1:
endings.add(c.gotoI(it.lastSon))
endings.add(c.gotoI(it.lastSon))
c.patch(elsePos)
for endPos in endings: c.patch(endPos)
for i in countdown(endings.high, 0):
let endPos = endings[i]
c.patch(endPos)
c.joinI(c.forks.pop(), n)
doAssert(c.forks.len == oldLen)
proc genTry(c: var Con; n: PNode) =
let oldLen = c.forks.len
var endings: seq[TPosition] = @[]
inc c.inTryStmt
var newFixups: seq[TPosition]
swap(newFixups, c.tryStmtFixups)
let oldFixups = c.tryStmtFixups.len
let elsePos = c.forkI(n)
#let elsePos = c.forkI(n)
c.gen(n.sons[0])
dec c.inTryStmt
for f in newFixups:
for i in oldFixups..c.tryStmtFixups.high:
let f = c.tryStmtFixups[i]
c.patch(f)
swap(newFixups, c.tryStmtFixups)
# we also need to produce join instructions
# for the 'fork' that might preceed the goto instruction
if f.int-1 >= 0 and c.code[f.int-1].kind == fork:
c.joinI(TPosition(f.int-1), n)
c.patch(elsePos)
setLen(c.tryStmtFixups, oldFixups)
#c.patch(elsePos)
for i in 1 ..< n.len:
let it = n.sons[i]
if it.kind != nkFinally:
var blen = len(it)
let endExcept = c.forkI(it)
c.gen(it.lastSon)
if i < sonsLen(n)-1:
endings.add(c.gotoI(it))
endings.add(c.gotoI(it))
c.patch(endExcept)
for endPos in endings: c.patch(endPos)
for i in countdown(endings.high, 0):
let endPos = endings[i]
c.patch(endPos)
c.joinI(c.forks.pop(), n)
# join the 'elsePos' forkI instruction:
#c.joinI(c.forks.pop(), n)
let fin = lastSon(n)
if fin.kind == nkFinally:
c.gen(fin.sons[0])
doAssert(c.forks.len == oldLen)
proc genRaise(c: var Con; n: PNode) =
genJoins(c, n)
gen(c, n.sons[0])
if c.inTryStmt > 0:
c.tryStmtFixups.add c.gotoI(n)
@@ -265,6 +553,7 @@ proc genImplicitReturn(c: var Con) =
gen(c, c.owner.ast.sons[resultPos])
proc genReturn(c: var Con; n: PNode) =
genJoins(c, n)
if n.sons[0].kind != nkEmpty:
gen(c, n.sons[0])
else:
@@ -287,6 +576,14 @@ proc genDef(c: var Con; n: PNode) =
if n.kind == nkSym and n.sym.kind in InterestingSyms:
c.code.add Instr(n: n, kind: def, sym: n.sym)
proc canRaise(fn: PNode): bool =
const magicsThatCanRaise = {
mNone, mSlurp, mStaticExec, mParseExprToAst, mParseStmtToAst}
if fn.kind == nkSym and fn.sym.magic notin magicsThatCanRaise:
result = false
else:
result = true
proc genCall(c: var Con; n: PNode) =
gen(c, n[0])
var t = n[0].typ
@@ -297,8 +594,16 @@ proc genCall(c: var Con; n: PNode) =
if t != nil and i < t.len and t.sons[i].kind == tyVar:
genDef(c, n[i])
# every call can potentially raise:
if c.inTryStmt > 0:
c.tryStmtFixups.add c.forkI(n)
if c.inTryStmt > 0 and canRaise(n[0]):
# we generate the instruction sequence:
# fork L1
# goto exceptionHandler (except or finally)
# L1:
# join F1
let endGoto = c.forkI(n)
c.tryStmtFixups.add c.gotoI(n)
c.patch(endGoto)
c.joinI(c.forks.pop(), n)
dec c.inCall
proc genMagic(c: var Con; n: PNode; m: TMagic) =
@@ -368,114 +673,48 @@ proc gen(c: var Con; n: PNode) =
doAssert false, "dfa construction pass requires the elimination of 'defer'"
else: discard
proc dfa(code: seq[Instr]; conf: ConfigRef) =
var u = newSeq[IntSet](code.len) # usages
var d = newSeq[IntSet](code.len) # defs
var c = newSeq[IntSet](code.len) # consumed
var backrefs = initTable[int, int]()
for i in 0..<code.len:
u[i] = initIntSet()
d[i] = initIntSet()
c[i] = initIntSet()
case code[i].kind
of use: u[i].incl(code[i].sym.id)
of def: d[i].incl(code[i].sym.id)
of fork, goto:
let d = i+code[i].dest
backrefs.add(d, i)
var w = @[0]
var maxIters = 50
var someChange = true
var takenGotos = initIntSet()
var consuming = -1
while w.len > 0 and maxIters > 0: # and someChange:
dec maxIters
var pc = w.pop() # w[^1]
var prevPc = -1
# this simulates a single linear control flow execution:
while pc < code.len:
if prevPc >= 0:
someChange = false
# merge step and test for changes (we compute the fixpoints here):
# 'u' needs to be the union of prevPc, pc
# 'd' needs to be the intersection of 'pc'
for id in u[prevPc]:
if not u[pc].containsOrIncl(id):
someChange = true
# in (a; b) if ``a`` sets ``v`` so does ``b``. The intersection
# is only interesting on merge points:
for id in d[prevPc]:
if not d[pc].containsOrIncl(id):
someChange = true
# if this is a merge point, we take the intersection of the 'd' sets:
if backrefs.hasKey(pc):
var intersect = initIntSet()
assign(intersect, d[pc])
var first = true
for prevPc in backrefs.allValues(pc):
for def in d[pc]:
if def notin d[prevPc]:
excl(intersect, def)
someChange = true
when defined(debugDfa):
echo "Excluding ", pc, " prev ", prevPc
assign d[pc], intersect
if consuming >= 0:
if not c[pc].containsOrIncl(consuming):
someChange = true
consuming = -1
# our interpretation ![I!]:
prevPc = pc
case code[pc].kind
of goto:
# we must leave endless loops eventually:
if not takenGotos.containsOrIncl(pc) or someChange:
pc = pc + code[pc].dest
else:
inc pc
of fork:
# we follow the next instruction but push the dest onto our "work" stack:
#if someChange:
w.add pc + code[pc].dest
inc pc
of use:
#if not d[prevPc].missingOrExcl():
# someChange = true
consuming = code[pc].sym.id
inc pc
of def:
if not d[pc].containsOrIncl(code[pc].sym.id):
someChange = true
inc pc
when defined(useDfa) and defined(debugDfa):
for i in 0..<code.len:
echo "PC ", i, ": defs: ", d[i], "; uses ", u[i], "; consumes ", c[i]
# now check the condition we're interested in:
for i in 0..<code.len:
case code[i].kind
of use:
let s = code[i].sym
if s.id notin d[i]:
localError(conf, code[i].n.info, "usage of uninitialized variable: " & s.name.s)
if s.id in c[i]:
localError(conf, code[i].n.info, "usage of an already consumed variable: " & s.name.s)
else: discard
proc dataflowAnalysis*(s: PSym; body: PNode; conf: ConfigRef) =
var c = Con(code: @[], blocks: @[])
gen(c, body)
genImplicitReturn(c)
when defined(useDfa) and defined(debugDfa): echoCfg(c.code)
dfa(c.code, conf)
proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph =
## constructs a control flow graph for ``body``.
var c = Con(code: @[], blocks: @[], owner: s)
gen(c, body)
genImplicitReturn(c)
shallowCopy(result, c.code)
proc interpret(code: ControlFlowGraph; pc: int, state: seq[PSym], comesFrom: int; threadId: int): (seq[PSym], int) =
var res = state
var pc = pc
while pc < code.len:
#echo threadId, " ", code[pc].kind
case code[pc].kind
of goto:
pc = pc + code[pc].dest
of fork:
let target = pc + code[pc].dest
let (branchA, pcA) = interpret(code, pc+1, res, pc, threadId+1)
let (branchB, _) = interpret(code, target, res, pc, threadId+2)
# we add vars if they are in both branches:
for v in branchB:
if v in branchA:
if v notin res:
res.add v
pc = pcA+1
of join:
let target = pc + code[pc].dest
if comesFrom == target: return (res, pc)
inc pc
of use:
let v = code[pc].sym
if v notin res and v.kind != skParam:
echo "attempt to read uninitialized variable ", v.name.s
inc pc
of def:
let v = code[pc].sym
if v notin res:
res.add v
inc pc
return (res, pc)
proc dataflowAnalysis*(s: PSym; body: PNode) =
let c = constructCfg(s, body)
#echoCfg c
discard interpret(c, 0, @[], -1, 1)

View File

@@ -1013,7 +1013,7 @@ proc trackProc*(g: ModuleGraph; s: PSym, body: PNode) =
"declared lock level is $1, but real lock level is $2" %
[$s.typ.lockLevel, $t.maxLockLevel])
when defined(useDfa):
if s.kind == skFunc:
if s.name.s == "testp":
dataflowAnalysis(s, body)
when false: trackWrites(s, body)