mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-17 21:12:42 +00:00
work in progress: a dataflow architecture for Nim
This commit is contained in:
412
compiler/dfa.nim
Normal file
412
compiler/dfa.nim
Normal file
@@ -0,0 +1,412 @@
|
||||
#
|
||||
#
|
||||
# The Nim Compiler
|
||||
# (c) Copyright 2017 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## Data flow analysis for Nim. For now the task is to prove that every
|
||||
## usage of a local variable 'v' is covered by an initialization to 'v'
|
||||
## first.
|
||||
## We transform the AST into a linear list of instructions first to
|
||||
## make this easier to handle: There are only 2 different branching
|
||||
## instructions: 'goto X' is an unconditional goto, 'fork X'
|
||||
## is a conditional goto (either the next instruction or 'X' can be
|
||||
## taken). Exhaustive case statements are translated
|
||||
## so that the last branch is transformed into an 'else' branch.
|
||||
## ``return`` and ``break`` are all covered by 'goto'.
|
||||
## The case to detect is ``use v`` that is not dominated by
|
||||
## a ``def v``.
|
||||
## The data structures and algorithms used here are inspired by
|
||||
## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen.
|
||||
## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf
|
||||
|
||||
import ast, astalgo, types, intsets, tables, msgs
|
||||
|
||||
type
|
||||
InstrKind = enum
|
||||
goto, fork, def, use
|
||||
Instr = object
|
||||
n: PNode
|
||||
case kind: InstrKind
|
||||
of def, use: sym: PSym
|
||||
of goto, fork: dest: int
|
||||
|
||||
TPosition = distinct int
|
||||
TBlock = object
|
||||
label: PSym
|
||||
fixups: seq[TPosition]
|
||||
|
||||
ValueKind = enum
|
||||
undef, value, valueOrUndef
|
||||
|
||||
Con = object
|
||||
code: seq[Instr]
|
||||
blocks: seq[TBlock]
|
||||
|
||||
proc debugInfo(info: TLineInfo): string =
|
||||
result = info.toFilename & ":" & $info.line
|
||||
|
||||
proc codeListing(c: Con, result: var string, start=0; last = -1) =
|
||||
# for debugging purposes
|
||||
# first iteration: compute all necessary labels:
|
||||
var jumpTargets = initIntSet()
|
||||
let last = if last < 0: c.code.len-1 else: min(last, c.code.len-1)
|
||||
for i in start..last:
|
||||
if c.code[i].kind in {goto, fork}:
|
||||
jumpTargets.incl(i+c.code[i].dest)
|
||||
var i = start
|
||||
while i <= last:
|
||||
if i in jumpTargets: result.add("L" & $i & ":\n")
|
||||
result.add "\t"
|
||||
result.add $c.code[i].kind
|
||||
result.add "\t"
|
||||
case c.code[i].kind
|
||||
of def, use:
|
||||
result.add c.code[i].sym.name.s
|
||||
of goto, fork:
|
||||
result.add "L"
|
||||
result.add c.code[i].dest+i
|
||||
result.add("\t#")
|
||||
result.add(debugInfo(c.code[i].n.info))
|
||||
result.add("\n")
|
||||
inc i
|
||||
if i in jumpTargets: result.add("L" & $i & ": End\n")
|
||||
|
||||
|
||||
proc echoCfg*(c: Con; start=0; last = -1) {.deprecated.} =
|
||||
var buf = ""
|
||||
codeListing(c, buf, start, last)
|
||||
echo buf
|
||||
|
||||
proc forkI(c: var Con; n: PNode): TPosition =
|
||||
result = TPosition(c.code.len)
|
||||
c.code.add Instr(n: n, kind: fork, dest: 0)
|
||||
|
||||
proc gotoI(c: var Con; n: PNode): TPosition =
|
||||
result = TPosition(c.code.len)
|
||||
c.code.add Instr(n: n, kind: goto, dest: 0)
|
||||
|
||||
proc genLabel(c: Con): TPosition =
|
||||
result = TPosition(c.code.len)
|
||||
|
||||
proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) =
|
||||
let dist = p.int - c.code.len
|
||||
internalAssert(-0x7fff < dist and dist < 0x7fff)
|
||||
c.code.add Instr(n: n, kind: goto, dest: dist)
|
||||
|
||||
proc patch(c: var Con, p: TPosition) =
|
||||
# patch with current index
|
||||
let p = p.int
|
||||
let diff = c.code.len - p
|
||||
internalAssert(-0x7fff < diff and diff < 0x7fff)
|
||||
c.code[p].dest = diff
|
||||
|
||||
proc popBlock(c: var Con; oldLen: int) =
|
||||
for f in c.blocks[oldLen].fixups:
|
||||
c.patch(f)
|
||||
c.blocks.setLen(oldLen)
|
||||
|
||||
template withBlock(labl: PSym; body: untyped) {.dirty.} =
|
||||
var oldLen {.gensym.} = c.blocks.len
|
||||
c.blocks.add TBlock(label: labl, fixups: @[])
|
||||
body
|
||||
popBlock(c, oldLen)
|
||||
|
||||
proc isTrue(n: PNode): bool =
|
||||
n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
|
||||
n.kind == nkIntLit and n.intVal != 0
|
||||
|
||||
proc gen(c: var Con; n: PNode) # {.noSideEffect.}
|
||||
|
||||
proc genWhile(c: var Con; n: PNode) =
|
||||
# L1:
|
||||
# cond, tmp
|
||||
# fjmp tmp, L2
|
||||
# body
|
||||
# jmp L1
|
||||
# L2:
|
||||
let L1 = c.genLabel
|
||||
withBlock(nil):
|
||||
if isTrue(n.sons[0]):
|
||||
c.gen(n.sons[1])
|
||||
c.jmpBack(n, L1)
|
||||
else:
|
||||
c.gen(n.sons[0])
|
||||
let L2 = c.forkI(n)
|
||||
c.gen(n.sons[1])
|
||||
c.jmpBack(n, L1)
|
||||
c.patch(L2)
|
||||
|
||||
proc genBlock(c: var Con; n: PNode) =
|
||||
withBlock(n.sons[0].sym):
|
||||
c.gen(n.sons[1])
|
||||
|
||||
proc genBreak(c: var Con; n: PNode) =
|
||||
let L1 = c.gotoI(n)
|
||||
if n.sons[0].kind == nkSym:
|
||||
#echo cast[int](n.sons[0].sym)
|
||||
for i in countdown(c.blocks.len-1, 0):
|
||||
if c.blocks[i].label == n.sons[0].sym:
|
||||
c.blocks[i].fixups.add L1
|
||||
return
|
||||
globalError(n.info, errGenerated, "VM problem: cannot find 'break' target")
|
||||
else:
|
||||
c.blocks[c.blocks.high].fixups.add L1
|
||||
|
||||
proc genIf(c: var Con, n: PNode) =
|
||||
var endings: seq[TPosition] = @[]
|
||||
for i in countup(0, len(n) - 1):
|
||||
var it = n.sons[i]
|
||||
if it.len == 2:
|
||||
c.gen(it.sons[0].sons[1])
|
||||
var elsePos = c.forkI(it.sons[0].sons[1])
|
||||
c.gen(it.sons[1])
|
||||
if i < sonsLen(n)-1:
|
||||
endings.add(c.gotoI(it.sons[1]))
|
||||
c.patch(elsePos)
|
||||
else:
|
||||
c.gen(it.sons[0])
|
||||
for endPos in endings: c.patch(endPos)
|
||||
|
||||
proc genAndOr(c: var Con; n: PNode) =
|
||||
# asgn dest, a
|
||||
# fork L1
|
||||
# asgn dest, b
|
||||
# L1:
|
||||
c.gen(n.sons[1])
|
||||
let L1 = c.forkI(n)
|
||||
c.gen(n.sons[2])
|
||||
c.patch(L1)
|
||||
|
||||
proc genCase(c: var Con; n: PNode) =
|
||||
# if (!expr1) goto L1;
|
||||
# thenPart
|
||||
# goto LEnd
|
||||
# L1:
|
||||
# if (!expr2) goto L2;
|
||||
# thenPart2
|
||||
# goto LEnd
|
||||
# L2:
|
||||
# elsePart
|
||||
# Lend:
|
||||
var endings: seq[TPosition] = @[]
|
||||
c.gen(n.sons[0])
|
||||
for i in 1 .. <n.len:
|
||||
let it = n.sons[i]
|
||||
if it.len == 1:
|
||||
c.gen(it.sons[0])
|
||||
else:
|
||||
let elsePos = c.forkI(it.lastSon)
|
||||
c.gen(it.lastSon)
|
||||
if i < sonsLen(n)-1:
|
||||
endings.add(c.gotoI(it.lastSon))
|
||||
c.patch(elsePos)
|
||||
for endPos in endings: c.patch(endPos)
|
||||
|
||||
proc genTry(c: var Con; n: PNode) =
|
||||
var endings: seq[TPosition] = @[]
|
||||
let elsePos = c.forkI(n)
|
||||
c.gen(n.sons[0])
|
||||
c.patch(elsePos)
|
||||
for i in 1 .. <n.len:
|
||||
let it = n.sons[i]
|
||||
if it.kind != nkFinally:
|
||||
var blen = len(it)
|
||||
let endExcept = c.forkI(it)
|
||||
c.gen(it.lastSon)
|
||||
if i < sonsLen(n)-1:
|
||||
endings.add(c.gotoI(it))
|
||||
c.patch(endExcept)
|
||||
for endPos in endings: c.patch(endPos)
|
||||
let fin = lastSon(n)
|
||||
if fin.kind == nkFinally:
|
||||
c.gen(fin.sons[0])
|
||||
|
||||
proc genRaise(c: var Con; n: PNode) =
|
||||
gen(c, n.sons[0])
|
||||
c.code.add Instr(n: n, kind: goto, dest: high(int))
|
||||
|
||||
proc genReturn(c: var Con; n: PNode) =
|
||||
if n.sons[0].kind != nkEmpty: gen(c, n.sons[0])
|
||||
c.code.add Instr(n: n, kind: goto, dest: high(int))
|
||||
|
||||
const
|
||||
InterestingSyms = {skVar, skResult}
|
||||
|
||||
proc genUse(c: var Con; n: PNode) =
|
||||
var n = n
|
||||
while n.kind in {nkDotExpr, nkCheckedFieldExpr,
|
||||
nkBracketExpr, nkDerefExpr, nkHiddenDeref,
|
||||
nkAddr, nkHiddenAddr}:
|
||||
n = n[0]
|
||||
if n.kind == nkSym and n.sym.kind in InterestingSyms:
|
||||
c.code.add Instr(n: n, kind: use, sym: n.sym)
|
||||
|
||||
proc genDef(c: var Con; n: PNode) =
|
||||
if n.kind == nkSym and n.sym.kind in InterestingSyms:
|
||||
c.code.add Instr(n: n, kind: def, sym: n.sym)
|
||||
|
||||
proc genCall(c: var Con; n: PNode) =
|
||||
gen(c, n[0])
|
||||
var t = n[0].typ
|
||||
if t != nil: t = t.skipTypes(abstractInst)
|
||||
for i in 1..<n.len:
|
||||
gen(c, n[i])
|
||||
if t != nil and i < t.len and t.sons[i].kind == tyVar:
|
||||
genDef(c, n[i])
|
||||
|
||||
proc genMagic(c: var Con; n: PNode; m: TMagic) =
|
||||
case m
|
||||
of mAnd, mOr: c.genAndOr(n)
|
||||
of mNew, mNewFinalize:
|
||||
genDef(c, n[1])
|
||||
for i in 2..<n.len: gen(c, n[i])
|
||||
of mExit:
|
||||
genCall(c, n)
|
||||
c.code.add Instr(n: n, kind: goto, dest: high(int))
|
||||
else:
|
||||
genCall(c, n)
|
||||
|
||||
proc genVarSection(c: var Con; n: PNode) =
|
||||
for a in n:
|
||||
if a.kind == nkCommentStmt: continue
|
||||
if a.kind == nkVarTuple:
|
||||
gen(c, a.lastSon)
|
||||
for i in 0 .. a.len-3: genDef(c, a[i])
|
||||
else:
|
||||
gen(c, a.lastSon)
|
||||
if a.lastSon.kind != nkEmpty:
|
||||
genDef(c, a.sons[0])
|
||||
|
||||
proc gen(c: var Con; n: PNode) =
|
||||
case n.kind
|
||||
of nkSym: genUse(c, n)
|
||||
of nkCallKinds:
|
||||
if n.sons[0].kind == nkSym:
|
||||
let s = n.sons[0].sym
|
||||
if s.magic != mNone:
|
||||
genMagic(c, n, s.magic)
|
||||
else:
|
||||
genCall(c, n)
|
||||
else:
|
||||
genCall(c, n)
|
||||
of nkCharLit..nkNilLit: discard
|
||||
of nkAsgn, nkFastAsgn:
|
||||
gen(c, n[1])
|
||||
genDef(c, n[0])
|
||||
of nkDotExpr, nkCheckedFieldExpr, nkBracketExpr,
|
||||
nkDerefExpr, nkHiddenDeref, nkAddr, nkHiddenAddr:
|
||||
gen(c, n[0])
|
||||
of nkIfStmt, nkIfExpr: genIf(c, n)
|
||||
of nkWhenStmt:
|
||||
# This is "when nimvm" node. Chose the first branch.
|
||||
gen(c, n.sons[0].sons[1])
|
||||
of nkCaseStmt: genCase(c, n)
|
||||
of nkWhileStmt: genWhile(c, n)
|
||||
of nkBlockExpr, nkBlockStmt: genBlock(c, n)
|
||||
of nkReturnStmt: genReturn(c, n)
|
||||
of nkRaiseStmt: genRaise(c, n)
|
||||
of nkBreakStmt: genBreak(c, n)
|
||||
of nkTryStmt: genTry(c, n)
|
||||
of nkStmtList, nkStmtListExpr, nkChckRangeF, nkChckRange64, nkChckRange,
|
||||
nkBracket, nkCurly, nkPar, nkClosure, nkObjConstr:
|
||||
for x in n: gen(c, x)
|
||||
of nkPragmaBlock: gen(c, n.lastSon)
|
||||
of nkDiscardStmt: gen(c, n.sons[0])
|
||||
of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkExprColonExpr, nkExprEqExpr,
|
||||
nkCast:
|
||||
gen(c, n.sons[1])
|
||||
of nkObjDownConv, nkStringToCString, nkCStringToString: gen(c, n.sons[0])
|
||||
of nkVarSection, nkLetSection: genVarSection(c, n)
|
||||
else: discard
|
||||
|
||||
proc dfa(code: seq[Instr]) =
|
||||
# We aggressively push 'undef' values for every 'use v' instruction
|
||||
# until they are eliminated via a 'def v' instructions.
|
||||
# If we manage to push one 'undef' to a 'use' instruction, we produce
|
||||
# an error:
|
||||
var undef = initIntSet()
|
||||
for i in 0..<code.len:
|
||||
if code[i].kind == use: undef.incl(code[i].sym.id)
|
||||
|
||||
var s = newSeq[IntSet](code.len)
|
||||
for i in 0..<code.len:
|
||||
assign(s[i], undef)
|
||||
|
||||
# In the original paper, W := {0,...,n} is done. This is wasteful, we
|
||||
# have no intention to analyse a program like
|
||||
#
|
||||
# return 3
|
||||
# echo a + b
|
||||
#
|
||||
# any further than necessary.
|
||||
var w = @[0]
|
||||
while w.len > 0:
|
||||
var pc = w.pop()
|
||||
#var undefB: IntSet
|
||||
#assign(undefB, undef)
|
||||
|
||||
#[
|
||||
new := ![I[pc]!](s[pc])
|
||||
if I[pc] = (goto l) then
|
||||
pc' := l
|
||||
else
|
||||
pc' := pc + 1
|
||||
if I[pc] = (if ψ goto l) and new < s[l] then
|
||||
W := W + l
|
||||
s[l] := new
|
||||
end
|
||||
end
|
||||
if new < s[pc] then
|
||||
s[pc'] := new
|
||||
pc := pc'
|
||||
else
|
||||
break
|
||||
end
|
||||
if pc >= code.len: break
|
||||
]#
|
||||
|
||||
# this simulates a single linear control flow execution:
|
||||
while true:
|
||||
case code[pc].kind
|
||||
of use:
|
||||
let s = code[pc].sym
|
||||
if undefB.contains(s.id):
|
||||
localError(code[pc].n.info, "variable read before initialized: " & s.name.s)
|
||||
break
|
||||
inc pc
|
||||
of def:
|
||||
let s = code[pc].sym
|
||||
# exclude 'undef' for s for this path through the graph.
|
||||
if not undefB.missingOrExcl(s.id):
|
||||
inc pc
|
||||
else:
|
||||
break
|
||||
#undefB.excl s.id
|
||||
#inc pc
|
||||
when false:
|
||||
let prev = bindings.getOrDefault(s.id)
|
||||
if prev != value:
|
||||
# well now it has a value and we made progress, so
|
||||
bindings[s.id] = value
|
||||
inc pc
|
||||
else:
|
||||
break
|
||||
of fork:
|
||||
let diff = code[pc].dest
|
||||
# we follow pc + 1 and remember the label for later:
|
||||
w.add pc+diff
|
||||
inc pc
|
||||
of goto:
|
||||
let diff = code[pc].dest
|
||||
pc = pc + diff
|
||||
if pc >= code.len: break
|
||||
|
||||
proc dataflowAnalysis*(s: PSym; body: PNode) =
|
||||
var c = Con(code: @[], blocks: @[])
|
||||
gen(c, body)
|
||||
echoCfg(c)
|
||||
dfa(c.code)
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
import
|
||||
intsets, ast, astalgo, msgs, renderer, magicsys, types, idents, trees,
|
||||
wordrecg, strutils, options, guards, writetracking
|
||||
wordrecg, strutils, options, guards, writetracking, dfa
|
||||
|
||||
# Second semantic checking pass over the AST. Necessary because the old
|
||||
# way had some inherent problems. Performs:
|
||||
@@ -979,7 +979,9 @@ proc trackProc*(s: PSym, body: PNode) =
|
||||
message(s.info, warnLockLevel,
|
||||
"declared lock level is $1, but real lock level is $2" %
|
||||
[$s.typ.lockLevel, $t.maxLockLevel])
|
||||
if s.kind == skFunc: trackWrites(s, body)
|
||||
if s.kind == skFunc:
|
||||
dataflowAnalysis(s, body)
|
||||
trackWrites(s, body)
|
||||
|
||||
proc trackTopLevelStmt*(module: PSym; n: PNode) =
|
||||
if n.kind in {nkPragma, nkMacroDef, nkTemplateDef, nkProcDef, nkFuncDef,
|
||||
|
||||
Reference in New Issue
Block a user