diff --git a/compiler/aliasanalysis.nim b/compiler/aliasanalysis.nim new file mode 100644 index 0000000000..22a66e4df9 --- /dev/null +++ b/compiler/aliasanalysis.nim @@ -0,0 +1,124 @@ + +import ast + +import std / assertions + +const + PathKinds0* = {nkDotExpr, nkCheckedFieldExpr, + nkBracketExpr, nkDerefExpr, nkHiddenDeref, + nkAddr, nkHiddenAddr, + nkObjDownConv, nkObjUpConv} + PathKinds1* = {nkHiddenStdConv, nkHiddenSubConv} + +proc skipConvDfa*(n: PNode): PNode = + result = n + while true: + case result.kind + of nkObjDownConv, nkObjUpConv: + result = result[0] + of PathKinds1: + result = result[1] + else: break + +proc isAnalysableFieldAccess*(orig: PNode; owner: PSym): bool = + var n = orig + while true: + case n.kind + of PathKinds0 - {nkHiddenDeref, nkDerefExpr}: + n = n[0] + of PathKinds1: + n = n[1] + of nkHiddenDeref, nkDerefExpr: + # We "own" sinkparam[].loc but not ourVar[].location as it is a nasty + # pointer indirection. + # bug #14159, we cannot reason about sinkParam[].location as it can + # still be shared for tyRef. + n = n[0] + return n.kind == nkSym and n.sym.owner == owner and + (n.sym.typ.skipTypes(abstractInst-{tyOwned}).kind in {tyOwned}) + else: break + # XXX Allow closure deref operations here if we know + # the owner controlled the closure allocation? + result = n.kind == nkSym and n.sym.owner == owner and + {sfGlobal, sfThread, sfCursor} * n.sym.flags == {} and + (n.sym.kind != skParam or isSinkParam(n.sym)) # or n.sym.typ.kind == tyVar) + # Note: There is a different move analyzer possible that checks for + # consume(param.key); param.key = newValue for all paths. Then code like + # + # let splited = split(move self.root, x) + # self.root = merge(splited.lower, splited.greater) + # + # could be written without the ``move self.root``. However, this would be + # wrong! Then the write barrier for the ``self.root`` assignment would + # free the old data and all is lost! Lesson: Don't be too smart, trust the + # lower level C++ optimizer to specialize this code. + +type AliasKind* = enum + yes, no, maybe + +proc aliases*(obj, field: PNode): AliasKind = + # obj -> field: + # x -> x: true + # x -> x.f: true + # x.f -> x: false + # x.f -> x.f: true + # x.f -> x.v: false + # x -> x[0]: true + # x[0] -> x: false + # x[0] -> x[0]: true + # x[0] -> x[1]: false + # x -> x[i]: true + # x[i] -> x: false + # x[i] -> x[i]: maybe; Further analysis could make this return true when i is a runtime-constant + # x[i] -> x[j]: maybe; also returns maybe if only one of i or j is a compiletime-constant + template collectImportantNodes(result, n) = + var result: seq[PNode] + var n = n + while true: + case n.kind + of PathKinds0 - {nkDotExpr, nkBracketExpr}: + n = n[0] + of PathKinds1: + n = n[1] + of nkDotExpr, nkBracketExpr: + result.add n + n = n[0] + of nkSym: + result.add n + break + else: return no + + collectImportantNodes(objImportantNodes, obj) + collectImportantNodes(fieldImportantNodes, field) + + # If field is less nested than obj, then it cannot be part of/aliased by obj + if fieldImportantNodes.len < objImportantNodes.len: return no + + result = yes + for i in 1..objImportantNodes.len: + # We compare the nodes leading to the location of obj and field + # with each other. + # We continue until they diverge, in which case we return no, or + # until we reach the location of obj, in which case we do not need + # to look further, since field must be part of/aliased by obj now. + # If we encounter an element access using an index which is a runtime value, + # we simply return maybe instead of yes; should further nodes not diverge. + let currFieldPath = fieldImportantNodes[^i] + let currObjPath = objImportantNodes[^i] + + if currFieldPath.kind != currObjPath.kind: + return no + + case currFieldPath.kind + of nkSym: + if currFieldPath.sym != currObjPath.sym: return no + of nkDotExpr: + if currFieldPath[1].sym != currObjPath[1].sym: return no + of nkBracketExpr: + if currFieldPath[1].kind in nkLiterals and currObjPath[1].kind in nkLiterals: + if currFieldPath[1].intVal != currObjPath[1].intVal: + return no + else: + result = maybe + else: assert false # unreachable + diff --git a/compiler/ast.nim b/compiler/ast.nim index 399d7e5613..e1d982a434 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -508,7 +508,8 @@ type # the flag is applied to proc default values and to calls nfExecuteOnReload # A top-level statement that will be executed during reloads nfLastRead # this node is a last read - nfFirstWrite# this node is a first write + nfFirstWrite # this node is a first write + nfFirstWrite2 # alternative first write implementation nfHasComment # node has a comment TNodeFlags* = set[TNodeFlag] @@ -1075,7 +1076,8 @@ const nfDotSetter, nfDotField, nfIsRef, nfIsPtr, nfPreventCg, nfLL, nfFromTemplate, nfDefaultRefsParam, - nfExecuteOnReload, nfLastRead, nfFirstWrite} + nfExecuteOnReload, nfLastRead, + nfFirstWrite, nfFirstWrite2} namePos* = 0 patternPos* = 1 # empty except for term rewriting macros genericParamsPos* = 2 diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim index ed5a5b0797..81e74d089f 100644 --- a/compiler/ccgcalls.nim +++ b/compiler/ccgcalls.nim @@ -313,7 +313,7 @@ proc genArgNoParam(p: BProc, n: PNode; result: var Rope; needsTmp = false) = initLocExprSingleUse(p, n, a) addRdLoc(withTmpIfNeeded(p, a, needsTmp), result) -from dfa import aliases, AliasKind +import aliasanalysis proc potentialAlias(n: PNode, potentialWrites: seq[PNode]): bool = for p in potentialWrites: diff --git a/compiler/dfa.nim b/compiler/dfa.nim index 669207151a..57fd3ae2b4 100644 --- a/compiler/dfa.nim +++ b/compiler/dfa.nim @@ -9,27 +9,20 @@ ## Data flow analysis for Nim. ## We transform the AST into a linear list of instructions first to -## make this easier to handle: There are only 2 different branching +## make this easier to handle: There are only 3 different branching ## instructions: 'goto X' is an unconditional goto, 'fork X' ## is a conditional goto (either the next instruction or 'X' can be -## taken). Exhaustive case statements are translated +## taken), 'loop X' is the only jump that jumps back. +## +## Exhaustive case statements are translated ## so that the last branch is transformed into an 'else' branch. ## ``return`` and ``break`` are all covered by 'goto'. ## -## Control flow through exception handling: -## Contrary to popular belief, exception handling doesn't cause -## many problems for this DFA representation, ``raise`` is a statement -## that ``goes to`` the outer ``finally`` or ``except`` if there is one, -## otherwise it is the same as ``return``. Every call is treated as -## a call that can potentially ``raise``. However, without a surrounding -## ``try`` we don't emit these ``fork ReturnLabel`` instructions in order -## to speed up the dataflow analysis passes. -## ## The data structures and algorithms used here are inspired by ## "A Graph–Free Approach to Data–Flow Analysis" by Markus Mohnen. ## https://link.springer.com/content/pdf/10.1007/3-540-45937-5_6.pdf -import ast, intsets, lineinfos, renderer +import ast, intsets, lineinfos, renderer, aliasanalysis import std/private/asciitables when defined(nimPreviewSlimSystem): @@ -37,12 +30,12 @@ when defined(nimPreviewSlimSystem): type InstrKind* = enum - goto, fork, def, use + goto, loop, fork, def, use Instr* = object - n*: PNode # contains the def/use location. case kind*: InstrKind - of goto, fork: dest*: int - else: discard + of goto, fork, loop: dest*: int + of def, use: + n*: PNode # contains the def/use location. ControlFlowGraph* = seq[Instr] @@ -59,9 +52,10 @@ type Con = object code: ControlFlowGraph - inTryStmt: int + inTryStmt, interestingInstructions: int blocks: seq[TBlock] owner: PSym + root: PSym proc codeListing(c: ControlFlowGraph, start = 0; last = -1): string = # for debugging purposes @@ -69,7 +63,7 @@ proc codeListing(c: ControlFlowGraph, start = 0; last = -1): string = var jumpTargets = initIntSet() let last = if last < 0: c.len-1 else: min(last, c.len-1) for i in start..last: - if c[i].kind in {goto, fork}: + if c[i].kind in {goto, fork, loop}: jumpTargets.incl(i+c[i].dest) var i = start while i <= last: @@ -80,12 +74,12 @@ proc codeListing(c: ControlFlowGraph, start = 0; last = -1): string = case c[i].kind of def, use: result.add renderTree(c[i].n) - of goto, fork: + result.add("\t#") + result.add($c[i].n.info.line) + result.add("\n") + of goto, fork, loop: result.add "L" result.addInt c[i].dest+i - result.add("\t#") - result.add($c[i].n.info.line) - result.add("\n") inc i if i in jumpTargets: result.add("L" & $i & ": End\n") @@ -93,181 +87,13 @@ proc echoCfg*(c: ControlFlowGraph; start = 0; last = -1) {.deprecated.} = ## echos the ControlFlowGraph for debugging purposes. echo codeListing(c, start, last).alignTable -proc forkI(c: var Con; n: PNode): TPosition = +proc forkI(c: var Con): TPosition = result = TPosition(c.code.len) - c.code.add Instr(n: n, kind: fork, dest: 0) + c.code.add Instr(kind: fork, dest: 0) -proc gotoI(c: var Con; n: PNode): TPosition = +proc gotoI(c: var Con): TPosition = result = TPosition(c.code.len) - c.code.add Instr(n: n, kind: goto, dest: 0) - -#[ - -Join is no more -=============== -Instead of generating join instructions we adapt our traversal of the CFG. - -When encountering a fork we split into two paths, we follow the path -starting at "pc + 1" until it encounters the joinpoint: "pc + forkInstr.dest". -If we encounter gotos that would jump further than the current joinpoint, -as can happen with gotos generated by unstructured controlflow such as break, raise or return, -we simply suspend following the current path, and follow the other path until the new joinpoint -which is simply the instruction pointer returned to us by the now suspended path. -If the path we are following now, also encounters a goto that exceeds the joinpoint -we repeat the process; suspending the current path and evaluating the other one with a new joinpoint. -If we eventually reach a common joinpoint we join the two paths. -This new "ping-pong" approach has the obvious advantage of not requiring join instructions, as such -cutting down on the CFG size but is also mandatory for correctly handling complicated cases -of unstructured controlflow. - - -Design of join -============== - -block: - if cond: break - def(x) - -use(x) - -Generates: - -L0: fork lab1 - join L0 # patched. - goto Louter -lab1: - def x - join L0 -Louter: - use x - - -block outer: - while a: - while b: - if foo: - if bar: - break outer # --> we need to 'join' every pushed 'fork' here - - -This works and then our abstract interpretation needs to deal with 'fork' -differently. It really causes a split in execution. Two threads are -"spawned" and both need to reach the 'join L' instruction. Afterwards -the abstract interpretations are joined and execution resumes single -threaded. - - -Abstract Interpretation ------------------------ - -proc interpret(pc, state, comesFrom): state = - result = state - # we need an explicit 'create' instruction (an explicit heap), in order - # to deal with 'var x = create(); var y = x; var z = y; destroy(z)' - while true: - case pc - of fork: - let a = interpret(pc+1, result, pc) - let b = interpret(forkTarget, result, pc) - result = a ++ b # ++ is a union operation - inc pc - of join: - if joinTarget == comesFrom: return result - else: inc pc - of use X: - if not result.contains(x): - error "variable not initialized " & x - inc pc - of def X: - if not result.contains(x): - result.incl X - else: - error "overwrite of variable causes memory leak " & x - inc pc - of destroy X: - result.excl X - -This is correct but still can lead to false positives: - -proc p(cond: bool) = - if cond: - new(x) - otherThings() - if cond: - destroy x - -Is not a leak. We should find a way to model *data* flow, not just -control flow. One solution is to rewrite the 'if' without a fork -instruction. The unstructured aspect can now be easily dealt with -the 'goto' and 'join' instructions. - -proc p(cond: bool) = - L0: fork Lend - new(x) - # do not 'join' here! - - Lend: - otherThings() - join L0 # SKIP THIS FOR new(x) SOMEHOW - destroy x - join L0 # but here. - - - -But if we follow 'goto Louter' we will never come to the join point. -We restore the bindings after popping pc from the stack then there -"no" problem?! - - -while cond: - prelude() - if not condB: break - postlude() - ----> -var setFlag = true -while cond and not setFlag: - prelude() - if not condB: - setFlag = true # BUT: Dependency - if not setFlag: # HERE - postlude() - ----> -var setFlag = true -while cond and not setFlag: - prelude() - if not condB: - postlude() - setFlag = true - - -------------------------------------------------- - -while cond: - prelude() - if more: - if not condB: break - stuffHere() - postlude() - ---> -var setFlag = true -while cond and not setFlag: - prelude() - if more: - if not condB: - setFlag = false - else: - stuffHere() - postlude() - else: - postlude() - -This is getting complicated. Instead we keep the whole 'join' idea but -duplicate the 'join' instructions on breaks and return exits! - -]# + c.code.add Instr(kind: goto, dest: 0) proc genLabel(c: Con): TPosition = TPosition(c.code.len) @@ -275,8 +101,8 @@ template checkedDistance(dist): int = doAssert low(int) div 2 + 1 < dist and dist < high(int) div 2 dist -proc jmpBack(c: var Con, n: PNode, p = TPosition(0)) = - c.code.add Instr(n: n, kind: goto, dest: checkedDistance(p.int - c.code.len)) +proc jmpBack(c: var Con, p = TPosition(0)) = + c.code.add Instr(kind: loop, dest: checkedDistance(p.int - c.code.len)) proc patch(c: var Con, p: TPosition) = # patch with current index @@ -286,12 +112,12 @@ proc gen(c: var Con; n: PNode) proc popBlock(c: var Con; oldLen: int) = var exits: seq[TPosition] - exits.add c.gotoI(newNode(nkEmpty)) + exits.add c.gotoI() for f in c.blocks[oldLen].breakFixups: c.patch(f[0]) for finale in f[1]: c.gen(finale) - exits.add c.gotoI(newNode(nkEmpty)) + exits.add c.gotoI() for e in exits: c.patch e c.blocks.setLen(oldLen) @@ -306,91 +132,29 @@ proc isTrue(n: PNode): bool = n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or n.kind == nkIntLit and n.intVal != 0 -when true: - proc genWhile(c: var Con; n: PNode) = - # We unroll every loop 3 times. We emulate 0, 1, 2 iterations - # through the loop. We need to prove this is correct for our - # purposes. But Herb Sutter claims it is. (Proof by authority.) - # - # EDIT: Actually, we only need to unroll 2 times - # because Nim doesn't have a way of breaking/goto-ing into - # a loop iteration. Unrolling 2 times is much better for compile - # times of nested loops than 3 times, so we do that here. - #[ - while cond: - body - - Becomes: - - block: - if cond: - body - if cond: - body - if cond: - body - - We still need to ensure 'break' resolves properly, so an AST to AST - translation is impossible. - - So the code to generate is: - - cond - fork L4 # F1 - body - cond - fork L5 # F2 - body - cond - fork L6 # F3 - body - L6: - join F3 - L5: - join F2 - L4: - join F1 - ]# - if isTrue(n[0]): - # 'while true' is an idiom in Nim and so we produce - # better code for it: - withBlock(nil): - for i in 0..1: - c.gen(n[1]) - else: - withBlock(nil): - var endings: array[2, TPosition] - for i in 0..1: - c.gen(n[0]) - endings[i] = c.forkI(n) - c.gen(n[1]) - for i in countdown(endings.high, 0): - c.patch(endings[i]) - -else: - proc genWhile(c: var Con; n: PNode) = - # lab1: - # cond, tmp - # fork tmp, lab2 - # body - # jmp lab1 - # lab2: - let lab1 = c.genLabel - withBlock(nil): - if isTrue(n[0]): - c.gen(n[1]) - c.jmpBack(n, lab1) - else: - c.gen(n[0]) - forkT(n): - c.gen(n[1]) - c.jmpBack(n, lab1) - -template forkT(n, body) = - let lab1 = c.forkI(n) +template forkT(body) = + let lab1 = c.forkI() body c.patch(lab1) +proc genWhile(c: var Con; n: PNode) = + # lab1: + # cond, tmp + # fork tmp, lab2 + # body + # jmp lab1 + # lab2: + let lab1 = c.genLabel + withBlock(nil): + if isTrue(n[0]): + c.gen(n[1]) + c.jmpBack(lab1) + else: + c.gen(n[0]) + forkT: + c.gen(n[1]) + c.jmpBack(lab1) + proc genIf(c: var Con, n: PNode) = #[ @@ -429,15 +193,22 @@ proc genIf(c: var Con, n: PNode) = ]# var endings: seq[TPosition] = @[] + let oldInteresting = c.interestingInstructions + let oldLen = c.code.len + for i in 0.. 0: for i in countdown(c.blocks.high, 0): @@ -554,13 +327,14 @@ proc genRaise(c: var Con; n: PNode) = return assert false #Unreachable else: - genNoReturn(c, n) + genNoReturn(c) proc genImplicitReturn(c: var Con) = if c.owner.kind in {skProc, skFunc, skMethod, skIterator, skConverter} and resultPos < c.owner.ast.len: gen(c, c.owner.ast[resultPos]) proc genReturn(c: var Con; n: PNode) = + inc c.interestingInstructions if n[0].kind != nkEmpty: gen(c, n[0]) else: @@ -569,122 +343,6 @@ proc genReturn(c: var Con; n: PNode) = const InterestingSyms = {skVar, skResult, skLet, skParam, skForVar, skTemp} - PathKinds0 = {nkDotExpr, nkCheckedFieldExpr, - nkBracketExpr, nkDerefExpr, nkHiddenDeref, - nkAddr, nkHiddenAddr, - nkObjDownConv, nkObjUpConv} - PathKinds1 = {nkHiddenStdConv, nkHiddenSubConv} - -proc skipConvDfa*(n: PNode): PNode = - result = n - while true: - case result.kind - of nkObjDownConv, nkObjUpConv: - result = result[0] - of PathKinds1: - result = result[1] - else: break - -type AliasKind* = enum - yes, no, maybe - -proc aliases*(obj, field: PNode): AliasKind = - # obj -> field: - # x -> x: true - # x -> x.f: true - # x.f -> x: false - # x.f -> x.f: true - # x.f -> x.v: false - # x -> x[0]: true - # x[0] -> x: false - # x[0] -> x[0]: true - # x[0] -> x[1]: false - # x -> x[i]: true - # x[i] -> x: false - # x[i] -> x[i]: maybe; Further analysis could make this return true when i is a runtime-constant - # x[i] -> x[j]: maybe; also returns maybe if only one of i or j is a compiletime-constant - template collectImportantNodes(result, n) = - var result: seq[PNode] - var n = n - while true: - case n.kind - of PathKinds0 - {nkDotExpr, nkBracketExpr}: - n = n[0] - of PathKinds1: - n = n[1] - of nkDotExpr, nkBracketExpr: - result.add n - n = n[0] - of nkSym: - result.add n; break - else: return no - - collectImportantNodes(objImportantNodes, obj) - collectImportantNodes(fieldImportantNodes, field) - - # If field is less nested than obj, then it cannot be part of/aliased by obj - if fieldImportantNodes.len < objImportantNodes.len: return no - - result = yes - for i in 1..objImportantNodes.len: - # We compare the nodes leading to the location of obj and field - # with each other. - # We continue until they diverge, in which case we return no, or - # until we reach the location of obj, in which case we do not need - # to look further, since field must be part of/aliased by obj now. - # If we encounter an element access using an index which is a runtime value, - # we simply return maybe instead of yes; should further nodes not diverge. - let currFieldPath = fieldImportantNodes[^i] - let currObjPath = objImportantNodes[^i] - - if currFieldPath.kind != currObjPath.kind: - return no - - case currFieldPath.kind - of nkSym: - if currFieldPath.sym != currObjPath.sym: return no - of nkDotExpr: - if currFieldPath[1].sym != currObjPath[1].sym: return no - of nkBracketExpr: - if currFieldPath[1].kind in nkLiterals and currObjPath[1].kind in nkLiterals: - if currFieldPath[1].intVal != currObjPath[1].intVal: - return no - else: - result = maybe - else: assert false # unreachable - -proc isAnalysableFieldAccess*(orig: PNode; owner: PSym): bool = - var n = orig - while true: - case n.kind - of PathKinds0 - {nkHiddenDeref, nkDerefExpr}: - n = n[0] - of PathKinds1: - n = n[1] - of nkHiddenDeref, nkDerefExpr: - # We "own" sinkparam[].loc but not ourVar[].location as it is a nasty - # pointer indirection. - # bug #14159, we cannot reason about sinkParam[].location as it can - # still be shared for tyRef. - n = n[0] - return n.kind == nkSym and n.sym.owner == owner and - (n.sym.typ.skipTypes(abstractInst-{tyOwned}).kind in {tyOwned}) - else: break - # XXX Allow closure deref operations here if we know - # the owner controlled the closure allocation? - result = n.kind == nkSym and n.sym.owner == owner and - {sfGlobal, sfThread, sfCursor} * n.sym.flags == {} and - (n.sym.kind != skParam or isSinkParam(n.sym)) # or n.sym.typ.kind == tyVar) - # Note: There is a different move analyzer possible that checks for - # consume(param.key); param.key = newValue for all paths. Then code like - # - # let splited = split(move self.root, x) - # self.root = merge(splited.lower, splited.greater) - # - # could be written without the ``move self.root``. However, this would be - # wrong! Then the write barrier for the ``self.root`` assignment would - # free the old data and all is lost! Lesson: Don't be too smart, trust the - # lower level C++ optimizer to specialize this code. proc skipTrivials(c: var Con, n: PNode): PNode = result = n @@ -703,8 +361,9 @@ proc genUse(c: var Con; orig: PNode) = let n = c.skipTrivials(orig) if n.kind == nkSym: - if n.sym.kind in InterestingSyms: - c.code.add Instr(n: orig, kind: use) + if n.sym.kind in InterestingSyms and n.sym == c.root: + c.code.add Instr(kind: use, n: orig) + inc c.interestingInstructions else: gen(c, n) @@ -712,7 +371,9 @@ proc genDef(c: var Con; orig: PNode) = let n = c.skipTrivials(orig) if n.kind == nkSym and n.sym.kind in InterestingSyms: - c.code.add Instr(n: orig, kind: def) + if n.sym == c.root: + c.code.add Instr(kind: def, n: orig) + inc c.interestingInstructions proc genCall(c: var Con; n: PNode) = gen(c, n[0]) @@ -725,13 +386,13 @@ proc genCall(c: var Con; n: PNode) = # Pass by 'out' is a 'must def'. Good enough for a move optimizer. genDef(c, n[i]) # every call can potentially raise: - if c.inTryStmt > 0 and canRaiseConservative(n[0]): + if false: # c.inTryStmt > 0 and canRaiseConservative(n[0]): # we generate the instruction sequence: # fork lab1 # goto exceptionHandler (except or finally) # lab1: # join F1 - forkT(n): + forkT: for i in countdown(c.blocks.high, 0): if c.blocks[i].isTryBlock: genBreakOrRaiseAux(c, i, n) @@ -769,7 +430,7 @@ proc gen(c: var Con; n: PNode) = else: genCall(c, n) if sfNoReturn in n[0].sym.flags: - genNoReturn(c, n) + genNoReturn(c) else: genCall(c, n) of nkCharLit..nkNilLit: discard @@ -810,13 +471,32 @@ proc gen(c: var Con; n: PNode) = of nkDefer: doAssert false, "dfa construction pass requires the elimination of 'defer'" else: discard -proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph = +when false: + proc optimizeJumps(c: var ControlFlowGraph) = + for i in 0.. pc: + pc = newPc + else: + break + c[i].dest = pc - i + of loop, def, use: discard + +proc constructCfg*(s: PSym; body: PNode; root: PSym): ControlFlowGraph = ## constructs a control flow graph for ``body``. - var c = Con(code: @[], blocks: @[], owner: s) + var c = Con(code: @[], blocks: @[], owner: s, root: root) withBlock(s): gen(c, body) - genImplicitReturn(c) + if root.kind == skResult: + genImplicitReturn(c) when defined(gcArc) or defined(gcOrc): result = c.code # will move else: shallowCopy(result, c.code) + when false: + optimizeJumps result diff --git a/compiler/injectdestructors.nim b/compiler/injectdestructors.nim index 63cfbbd9f4..298bc77116 100644 --- a/compiler/injectdestructors.nim +++ b/compiler/injectdestructors.nim @@ -15,9 +15,9 @@ import intsets, strtabs, ast, astalgo, msgs, renderer, magicsys, types, idents, - strutils, options, dfa, lowerings, tables, modulegraphs, + strutils, options, lowerings, tables, modulegraphs, lineinfos, parampatterns, sighashes, liftdestructors, optimizer, - varpartitions + varpartitions, aliasanalysis, dfa when defined(nimPreviewSlimSystem): import std/assertions @@ -27,12 +27,14 @@ from trees import exprStructuralEquivalent, getRoot type Con = object owner: PSym - g: ControlFlowGraph + when true: + g: ControlFlowGraph graph: ModuleGraph inLoop, inSpawn, inLoopCond: int uninit: IntSet # set of uninit'ed vars - uninitComputed: bool idgen: IdGenerator + body: PNode + otherUsage: TLineInfo Scope = object # we do scope-based memory management. # a scope is comparable to an nkStmtListExpr like @@ -40,6 +42,8 @@ type vars: seq[PSym] wasMoved: seq[PNode] final: seq[PNode] # finally section + locals: seq[PSym] + body: PNode needsTry: bool parent: ptr Scope @@ -70,160 +74,95 @@ proc getTemp(c: var Con; s: var Scope; typ: PType; info: TLineInfo): PNode = s.vars.add(sym) result = newSymNode(sym) -proc nestedScope(parent: var Scope): Scope = - Scope(vars: @[], wasMoved: @[], final: @[], needsTry: false, parent: addr(parent)) +proc nestedScope(parent: var Scope; body: PNode): Scope = + Scope(vars: @[], locals: @[], wasMoved: @[], final: @[], body: body, needsTry: false, parent: addr(parent)) proc p(n: PNode; c: var Con; s: var Scope; mode: ProcessMode): PNode proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope; isDecl = false): PNode -import sets, hashes +when false: + var + perfCounters: array[InstrKind, int] -proc hash(n: PNode): Hash = hash(cast[pointer](n)) + proc showCounters*() = + for i in low(InstrKind)..high(InstrKind): + echo "INSTR ", i, " ", perfCounters[i] -type - State = ref object - lastReads: IntSet - potentialLastReads: IntSet - notLastReads: IntSet - alreadySeen: HashSet[PNode] +proc isLastReadImpl(n: PNode; c: var Con; scope: var Scope): bool = + let root = parampatterns.exprRoot(n, allowCalls=false) + if root == nil: return false -proc preprocessCfg(cfg: var ControlFlowGraph) = - for i in 0.. cfg.len: - cfg[i].dest = cfg.len - i + var s = addr(scope) + while s != nil: + if s.locals.contains(root): break + s = s.parent -proc mergeStates(a: var State, b: sink State) = - # Inplace for performance: - # lastReads = a.lastReads + b.lastReads - # potentialLastReads = (a.potentialLastReads + b.potentialLastReads) - (a.notLastReads + b.notLastReads) - # notLastReads = a.notLastReads + b.notLastReads - # alreadySeen = a.alreadySeen + b.alreadySeen - # b is never nil - if a == nil: - a = b + c.g = constructCfg(c.owner, if s != nil: s.body else: c.body, root) + dbg: + echo "\n### ", c.owner.name.s, ":\nCFG:" + echoCfg(c.g) + #echo c.body + + var j = 0 + while j < c.g.len: + if c.g[j].kind == use and c.g[j].n == n: break + inc j + c.otherUsage = unknownLineInfo + if j < c.g.len: + var pcs = @[j+1] + var marked = initIntSet() + result = true + while pcs.len > 0: + var pc = pcs.pop() + if not marked.contains(pc): + let oldPc = pc + while pc < c.g.len: + dbg: + echo "EXEC ", c.g[pc].kind, " ", pc, " ", n + when false: + inc perfCounters[c.g[pc].kind] + case c.g[pc].kind + of loop: + let back = pc + c.g[pc].dest + if not marked.containsOrIncl(back): + pc = back + else: + break + of goto: + pc = pc + c.g[pc].dest + of fork: + if not marked.contains(pc+1): + pcs.add pc + 1 + pc = pc + c.g[pc].dest + of use: + if c.g[pc].n.aliases(n) != no or n.aliases(c.g[pc].n) != no: + c.otherUsage = c.g[pc].n.info + return false + inc pc + of def: + if c.g[pc].n.aliases(n) == yes: + # the path leads to a redefinition of 's' --> sink 's'. + break + elif n.aliases(c.g[pc].n) != no: + # only partially writes to 's' --> can't sink 's', so this def reads 's' + # or maybe writes to 's' --> can't sink 's' + c.otherUsage = c.g[pc].n.info + return false + inc pc + marked.incl oldPc else: - a.lastReads.incl b.lastReads - a.potentialLastReads.incl b.potentialLastReads - a.potentialLastReads.excl a.notLastReads - a.potentialLastReads.excl b.notLastReads - a.notLastReads.incl b.notLastReads - a.alreadySeen.incl b.alreadySeen + result = false -proc computeLastReadsAndFirstWrites(cfg: ControlFlowGraph) = - template aliasesCached(obj, field: PNode): AliasKind = - aliases(obj, field) +proc isLastRead(n: PNode; c: var Con; s: var Scope): bool = + if not hasDestructor(c, n.typ): return true - var cfg = cfg - preprocessCfg(cfg) - - var states = newSeq[State](cfg.len + 1) - states[0] = State() - - for pc in 0.. sink 's'. - state.lastReads.incl r - state.potentialLastReads.excl r - elif cfg[r].n.aliasesCached(cfg[pc].n) != no: - # only partially writes to 's' --> can't sink 's', so this def reads 's' - # or maybe writes to 's' --> can't sink 's' - cfg[r].n.comment = '\n' & $pc - state.potentialLastReads.excl r - state.notLastReads.incl r - - var alreadySeenThisNode = false - for s in state.alreadySeen: - if cfg[pc].n.aliasesCached(s) != no or s.aliasesCached(cfg[pc].n) != no: - alreadySeenThisNode = true; break - if alreadySeenThisNode: cfg[pc].n.flags.excl nfFirstWrite - else: cfg[pc].n.flags.incl nfFirstWrite - - state.alreadySeen.incl cfg[pc].n - - mergeStates(states[pc + 1], move(states[pc])) - of use: - var potentialLastReadsCopy = state.potentialLastReads - for r in potentialLastReadsCopy: - if cfg[pc].n.aliasesCached(cfg[r].n) != no or cfg[r].n.aliasesCached(cfg[pc].n) != no: - cfg[r].n.comment = '\n' & $pc - state.potentialLastReads.excl r - state.notLastReads.incl r - - state.potentialLastReads.incl pc - - state.alreadySeen.incl cfg[pc].n - - mergeStates(states[pc + 1], move(states[pc])) - of goto: - mergeStates(states[pc + cfg[pc].dest], move(states[pc])) - of fork: - var copy = State() - copy[] = states[pc][] - mergeStates(states[pc + cfg[pc].dest], copy) - mergeStates(states[pc + 1], move(states[pc])) - - let lastReads = (states[^1].lastReads + states[^1].potentialLastReads) - states[^1].notLastReads - var lastReadTable: Table[PNode, seq[int]] - for position, node in cfg: - if node.kind == use: - lastReadTable.mgetOrPut(node.n, @[]).add position - for node, positions in lastReadTable: - block checkIfAllPosLastRead: - for p in positions: - if p notin lastReads: break checkIfAllPosLastRead - node.flags.incl nfLastRead - -proc isLastRead(n: PNode; c: var Con): bool = - let m = dfa.skipConvDfa(n) - (m.kind == nkSym and sfSingleUsedTemp in m.sym.flags) or nfLastRead in m.flags + let m = skipConvDfa(n) + result = (m.kind == nkSym and sfSingleUsedTemp in m.sym.flags) or + isLastReadImpl(n, c, s) proc isFirstWrite(n: PNode; c: var Con): bool = - let m = dfa.skipConvDfa(n) - nfFirstWrite in m.flags - -proc initialized(code: ControlFlowGraph; pc: int, - init, uninit: var IntSet; until: int): int = - ## Computes the set of definitely initialized variables across all code paths - ## as an IntSet of IDs. - var pc = pc - while pc < code.len: - case code[pc].kind - of goto: - pc += code[pc].dest - of fork: - var initA = initIntSet() - var initB = initIntSet() - var variantA = pc + 1 - var variantB = pc + code[pc].dest - while variantA != variantB: - if max(variantA, variantB) > until: - break - if variantA < variantB: - variantA = initialized(code, variantA, initA, uninit, min(variantB, until)) - else: - variantB = initialized(code, variantB, initB, uninit, min(variantA, until)) - pc = min(variantA, variantB) - # we add vars if they are in both branches: - for v in initA: - if v in initB: - init.incl v - of use: - let v = code[pc].n.sym - if v.kind != skParam and v.id notin init: - # attempt to read an uninit'ed variable - uninit.incl v.id - inc pc - of def: - let v = code[pc].n.sym - init.incl v.id - inc pc - return pc + let m = skipConvDfa(n) + result = nfFirstWrite2 in m.flags proc isCursor(n: PNode): bool = case n.kind @@ -247,9 +186,11 @@ proc checkForErrorPragma(c: Con; t: PType; ri: PNode; opname: string) = m.add "; requires a copy because it's not the last read of '" m.add renderTree(ri) m.add '\'' - if ri.comment.startsWith('\n'): + if c.otherUsage != unknownLineInfo: + # ri.comment.startsWith('\n'): m.add "; another read is done here: " - m.add c.graph.config $ c.g[parseInt(ri.comment[1..^1])].n.info + m.add c.graph.config $ c.otherUsage + #m.add c.graph.config $ c.g[parseInt(ri.comment[1..^1])].n.info elif ri.kind == nkSym and ri.sym.kind == skParam and not isSinkType(ri.sym.typ): m.add "; try to make " m.add renderTree(ri) @@ -625,7 +566,7 @@ template handleNestedTempl(n, processCall: untyped, willProduceStmt = false) = var branch = shallowCopy(it) for j in 0 ..< it.len-1: branch[j] = copyTree(it[j]) - var ofScope = nestedScope(s) + var ofScope = nestedScope(s, it.lastSon) branch[^1] = if it[^1].typ.isEmptyType or willProduceStmt: processScope(c, ofScope, maybeVoid(it[^1], ofScope)) else: @@ -638,7 +579,7 @@ template handleNestedTempl(n, processCall: untyped, willProduceStmt = false) = result = copyNode(n) result.add p(n[0], c, s, normal) dec c.inLoopCond - var bodyScope = nestedScope(s) + var bodyScope = nestedScope(s, n[1]) let bodyResult = p(n[1], c, bodyScope, normal) result.add processScope(c, bodyScope, bodyResult) dec c.inLoop @@ -650,7 +591,7 @@ template handleNestedTempl(n, processCall: untyped, willProduceStmt = false) = for i in 0.. 0: shouldDebug = toDebug == owner.name.s or toDebug == "always" if sfGeneratedOp in owner.flags or (owner.kind == skIterator and isInlineIterator(owner.typ)): return n - var c = Con(owner: owner, graph: g, g: constructCfg(owner, n), idgen: idgen) - dbg: - echo "\n### ", owner.name.s, ":\nCFG:" - echoCfg(c.g) - echo n + var c = Con(owner: owner, graph: g, idgen: idgen, body: n, otherUsage: unknownLineInfo) if optCursorInference in g.config.options: computeCursors(owner, n, g) - computeLastReadsAndFirstWrites(c.g) - - var scope: Scope + var scope = Scope(body: n) let body = p(n, c, scope, normal) if owner.kind in {skProc, skFunc, skMethod, skIterator, skConverter}: diff --git a/compiler/int128.nim b/compiler/int128.nim index afa07094b3..d85f960840 100644 --- a/compiler/int128.nim +++ b/compiler/int128.nim @@ -57,15 +57,9 @@ proc toInt128*[T: SomeInteger | bool](arg: T): Int128 = template isNegative(arg: Int128): bool = arg.sdata(3) < 0 -template isNegative(arg: int32): bool = - arg < 0 - proc bitconcat(a, b: uint32): uint64 = (uint64(a) shl 32) or uint64(b) -proc bitsplit(a: uint64): (uint32, uint32) = - (cast[uint32](a shr 32), cast[uint32](a)) - proc toInt64*(arg: Int128): int64 = if isNegative(arg): assert(arg.sdata(3) == -1, "out of range") @@ -211,12 +205,6 @@ proc `==`*(a, b: Int128): bool = if a.udata[3] != b.udata[3]: return false return true -proc inplaceBitnot(a: var Int128) = - a.udata[0] = not a.udata[0] - a.udata[1] = not a.udata[1] - a.udata[2] = not a.udata[2] - a.udata[3] = not a.udata[3] - proc bitnot*(a: Int128): Int128 = result.udata[0] = not a.udata[0] result.udata[1] = not a.udata[1] @@ -357,18 +345,6 @@ proc low64(a: Int128): uint64 = bitconcat(a.udata[1], a.udata[0]) proc `*`*(lhs, rhs: Int128): Int128 = - let - a = cast[uint64](lhs.udata[0]) - b = cast[uint64](lhs.udata[1]) - c = cast[uint64](lhs.udata[2]) - d = cast[uint64](lhs.udata[3]) - - e = cast[uint64](rhs.udata[0]) - f = cast[uint64](rhs.udata[1]) - g = cast[uint64](rhs.udata[2]) - h = cast[uint64](rhs.udata[3]) - - let a32 = cast[uint64](lhs.udata[1]) let a00 = cast[uint64](lhs.udata[0]) let b32 = cast[uint64](rhs.udata[1]) @@ -444,11 +420,11 @@ proc divMod*(dividend, divisor: Int128): tuple[quotient, remainder: Int128] = result.remainder = dividend proc `div`*(a, b: Int128): Int128 = - let (a, b) = divMod(a, b) + let (a, _) = divMod(a, b) return a proc `mod`*(a, b: Int128): Int128 = - let (a, b) = divMod(a, b) + let (_, b) = divMod(a, b) return b proc addInt128*(result: var string; value: Int128) = diff --git a/compiler/lambdalifting.nim b/compiler/lambdalifting.nim index 96edba8c88..986879f8f1 100644 --- a/compiler/lambdalifting.nim +++ b/compiler/lambdalifting.nim @@ -245,6 +245,13 @@ proc createTypeBoundOpsLL(g: ModuleGraph; refType: PType; info: TLineInfo; idgen if tfHasAsgn in refType.flags or optSeqDestructors in g.config.globalOptions: owner.flags.incl sfInjectDestructors +proc genCreateEnv(env: PNode): PNode = + var c = newNodeIT(nkObjConstr, env.info, env.typ) + c.add newNodeIT(nkType, env.info, env.typ) + let e = copyTree(env) + e.flags.incl nfFirstWrite2 + result = newAsgnStmt(e, c) + proc liftIterSym*(g: ModuleGraph; n: PNode; idgen: IdGenerator; owner: PSym): PNode = # transforms (iter) to (let env = newClosure[iter](); (iter, env)) if liftingHarmful(g.config, owner): return n @@ -268,7 +275,8 @@ proc liftIterSym*(g: ModuleGraph; n: PNode; idgen: IdGenerator; owner: PSym): PN addVar(v, env) result.add(v) # add 'new' statement: - result.add newCall(getSysSym(g, n.info, "internalNew"), env) + #result.add newCall(getSysSym(g, n.info, "internalNew"), env) + result.add genCreateEnv(env) createTypeBoundOpsLL(g, env.typ, n.info, idgen, owner) result.add makeClosure(g, idgen, iter, env, n.info) @@ -596,7 +604,7 @@ proc rawClosureCreation(owner: PSym; addVar(v, unowned) # add 'new' statement: - result.add(newCall(getSysSym(d.graph, env.info, "internalNew"), env)) + result.add genCreateEnv(env) if optOwnedRefs in d.graph.config.globalOptions: let unowned = c.unownedEnvVars[owner.id] assert unowned != nil @@ -658,7 +666,7 @@ proc closureCreationForIter(iter: PNode; var vs = newNodeI(nkVarSection, iter.info) addVar(vs, vnode) result.add(vs) - result.add(newCall(getSysSym(d.graph, iter.info, "internalNew"), vnode)) + result.add genCreateEnv(vnode) createTypeBoundOpsLL(d.graph, vnode.typ, iter.info, d.idgen, owner) let upField = lookupInRecord(v.typ.skipTypes({tyOwned, tyRef, tyPtr}).n, getIdent(d.graph.cache, upName)) @@ -943,7 +951,7 @@ proc liftForLoop*(g: ModuleGraph; body: PNode; idgen: IdGenerator; owner: PSym): addVar(v, newSymNode(env)) result.add(v) # add 'new' statement: - result.add(newCall(getSysSym(g, env.info, "internalNew"), env.newSymNode)) + result.add genCreateEnv(env.newSymNode) createTypeBoundOpsLL(g, env.typ, body.info, idgen, owner) elif op.kind == nkStmtListExpr: diff --git a/compiler/parampatterns.nim b/compiler/parampatterns.nim index 38b7faa074..534c3b5d18 100644 --- a/compiler/parampatterns.nim +++ b/compiler/parampatterns.nim @@ -183,7 +183,7 @@ type arLentValue, # lent value arStrange # it is a strange beast like 'typedesc[var T]' -proc exprRoot*(n: PNode): PSym = +proc exprRoot*(n: PNode; allowCalls = true): PSym = var it = n while true: case it.kind @@ -204,7 +204,7 @@ proc exprRoot*(n: PNode): PSym = if it.len > 0 and it.typ != nil: it = it.lastSon else: break of nkCallKinds: - if it.typ != nil and it.typ.kind in {tyVar, tyLent} and it.len > 1: + if allowCalls and it.typ != nil and it.typ.kind in {tyVar, tyLent} and it.len > 1: # See RFC #7373, calls returning 'var T' are assumed to # return a view into the first argument (if there is one): it = it[1] diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 5d701c4961..94b3f4fbd6 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -67,10 +67,11 @@ type exc: PNode # stack of exceptions tags: PNode # list of tags forbids: PNode # list of tags - bottom, inTryStmt, inExceptOrFinallyStmt, leftPartOfAsgn, inIfStmt: int + bottom, inTryStmt, inExceptOrFinallyStmt, leftPartOfAsgn, inIfStmt, currentBlock: int owner: PSym ownerModule: PSym init: seq[int] # list of initialized variables + scopes: Table[int, int] # maps var-id to its scope (see also `currentBlock`). guards: TModel # nested guards locked: seq[PNode] # locked locations gcUnsafe, isRecursive, isTopLevel, hasSideEffect, inEnforcedGcSafe: bool @@ -189,6 +190,10 @@ proc makeVolatile(a: PEffects; s: PSym) {.inline.} = if a.inTryStmt > 0 and a.config.exc == excSetjmp: incl(s.flags, sfVolatile) +proc varDecl(a: PEffects; n: PNode) {.inline.} = + if n.kind == nkSym: + a.scopes[n.sym.id] = a.currentBlock + proc initVar(a: PEffects, n: PNode; volatileCheck: bool) = if n.kind != nkSym: return let s = n.sym @@ -197,6 +202,23 @@ proc initVar(a: PEffects, n: PNode; volatileCheck: bool) = for x in a.init: if x == s.id: return a.init.add s.id + if a.scopes.getOrDefault(s.id) == a.currentBlock: + #[ Consider this case: + + var x: T + while true: + if cond: + x = T() #1 + else: + x = T() #2 + use x + + Even though both #1 and #2 are first writes we must use the `=copy` + here so that the old value is destroyed because `x`'s destructor is + run outside of the while loop. This is why we need the check here that + the assignment is done in the same logical block as `x` was declared in. + ]# + n.flags.incl nfFirstWrite2 proc initVarViaNew(a: PEffects, n: PNode) = if n.kind != nkSym: return @@ -1102,18 +1124,22 @@ proc track(tracked: PEffects, n: PNode) = createTypeBoundOps(tracked, child[i].typ, child.info) else: createTypeBoundOps(tracked, skipPragmaExpr(child[0]).typ, child.info) - if child.kind == nkIdentDefs and last.kind != nkEmpty: + if child.kind == nkIdentDefs: for i in 0..; requires a copy because it's not the last read of ':envAlt.b1'; another read is done here: tuse_ownedref_after_move.nim(52, 4)" + errormsg: "'=copy' is not available for type ; requires a copy because it's not the last read of ':envAlt.b1'; routine: main" line: 48 """