ARC: Analysis in one pass v2 (#17000)

* Analyse last reads all at once * Integrate firstWrite analysis * Small cleanup * Use sets instead of seqs * Remove instrTargets * Reap the benefits * Implement error diagnostics * Operate on DFA index for lastRead analysis * Use mgetOrPut * Cache alias results This improves performance by a lot, since many CFG locations map to a single PNode * Improve performance * Improve performance * Cleanup
2026-02-19 01:18:32 +00:00 · 2021-02-10 15:03:57 +01:00
parent 3189c0a2a9
commit 216be4060a
7 changed files with 147 additions and 130 deletions
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -491,6 +491,8 @@ type
    nfDefaultRefsParam # a default param value references another parameter
                       # the flag is applied to proc default values and to calls
    nfExecuteOnReload  # A top-level statement that will be executed during reloads
+    nfLastRead  # this node is a last read
+    nfFirstWrite# this node is a first write

  TNodeFlags* = set[TNodeFlag]
  TTypeFlag* = enum   # keep below 32 for efficiency reasons (now: ~40)
@@ -1014,7 +1016,7 @@ const
                                      nfDotSetter, nfDotField,
                                      nfIsRef, nfIsPtr, nfPreventCg, nfLL,
                                      nfFromTemplate, nfDefaultRefsParam,
-                                      nfExecuteOnReload}
+                                      nfExecuteOnReload, nfLastRead, nfFirstWrite}
  namePos* = 0
  patternPos* = 1    # empty except for term rewriting macros
  genericParamsPos* = 2
--- a/compiler/ccgcalls.nim
+++ b/compiler/ccgcalls.nim
@@ -298,11 +298,11 @@ proc genArgNoParam(p: BProc, n: PNode, needsTmp = false): Rope =
    initLocExprSingleUse(p, n, a)
    result = rdLoc(withTmpIfNeeded(p, a, needsTmp))

-from dfa import instrTargets, InstrTargetKind
+from dfa import aliases, AliasKind

 proc potentialAlias(n: PNode, potentialWrites: seq[PNode]): bool =
  for p in potentialWrites:
-    if instrTargets(p, n) != None:
+    if p.aliases(n) != no or n.aliases(p) != no:
      return true

 proc skipTrivialIndirections(n: PNode): PNode =
--- a/compiler/dfa.nim
+++ b/compiler/dfa.nim
@@ -632,8 +632,10 @@ proc aliases*(obj, field: PNode): AliasKind =
    case currFieldPath.kind
    of nkSym:
      if currFieldPath.sym != currObjPath.sym: return no
-    of nkDotExpr, nkCheckedFieldExpr:
+    of nkDotExpr:
      if currFieldPath[1].sym != currObjPath[1].sym: return no
+    of nkCheckedFieldExpr:
+      if currFieldPath[0][1].sym != currObjPath[0][1].sym: return no
    of nkBracketExpr:
      if currFieldPath[1].kind in nkLiterals and currObjPath[1].kind in nkLiterals:
        if currFieldPath[1].intVal != currObjPath[1].intVal:
@@ -642,19 +644,6 @@ proc aliases*(obj, field: PNode): AliasKind =
        result = maybe
    else: assert false # unreachable

-type InstrTargetKind* = enum
-  None, Full, Partial
-
-proc instrTargets*(insloc, loc: PNode): InstrTargetKind =
-  case insloc.aliases(loc)
-  of yes:
-    Full    # x -> x; x -> x.f
-  of maybe:
-    Partial # We treat this like a partial write/read
-  elif loc.aliases(insloc) != no:
-    Partial # x.f -> x
-  else: None
-
 proc isAnalysableFieldAccess*(orig: PNode; owner: PSym): bool =
  var n = orig
  while true:
--- a/compiler/injectdestructors.nim
+++ b/compiler/injectdestructors.nim
@@ -26,7 +26,6 @@ type
    owner: PSym
    g: ControlFlowGraph
    graph: ModuleGraph
-    otherRead: PNode
    inLoop, inSpawn, inLoopCond: int
    uninit: IntSet # set of uninit'ed vars
    uninitComputed: bool
@@ -72,23 +71,56 @@ proc nestedScope(parent: var Scope): Scope =
 proc p(n: PNode; c: var Con; s: var Scope; mode: ProcessMode): PNode
 proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope; isDecl = false): PNode

-proc isLastRead(location: PNode; cfg: ControlFlowGraph; otherRead: var PNode; pc, until: int): int =
-  var pc = pc
-  while pc < cfg.len and pc < until:
+import sets, hashes, tables
+
+proc hash(n: PNode): Hash = hash(cast[pointer](n))
+
+type AliasCache = Table[(PNode, PNode), AliasKind]
+proc aliasesCached(cache: var AliasCache, obj, field: PNode): AliasKind =
+  let key = (obj, field)
+  if not cache.hasKey(key):
+    cache[key] = aliases(obj, field)
+  cache[key]
+
+proc collectLastReads(cfg: ControlFlowGraph; cache: var AliasCache, alreadySeen: var HashSet[PNode], lastReads, potLastReads: var IntSet; pc: var int, until: int) =
+  template aliasesCached(obj, field: PNode): untyped =
+    aliasesCached(cache, obj, field)
+  while pc < until:
    case cfg[pc].kind
    of def:
-      if instrTargets(cfg[pc].n, location) == Full:
-        # the path leads to a redefinition of 's' --> abandon it.
-        return high(int)
-      elif instrTargets(cfg[pc].n, location) == Partial:
-        # only partially writes to 's' --> can't sink 's', so this def reads 's'
-        otherRead = cfg[pc].n
-        return -1
+      let potLastReadsCopy = potLastReads
+      for r in potLastReadsCopy:
+        if cfg[pc].n.aliasesCached(cfg[r].n) == yes:
+          # the path leads to a redefinition of 's' --> sink 's'.
+          lastReads.incl r
+          potLastReads.excl r
+        elif cfg[r].n.aliasesCached(cfg[pc].n) != no:
+          # only partially writes to 's' --> can't sink 's', so this def reads 's'
+          # or maybe writes to 's' --> can't sink 's'
+          cfg[r].n.comment = '\n' & $pc
+          potLastReads.excl r
+
+      var alreadySeenThisNode = false
+      for s in alreadySeen:
+        if cfg[pc].n.aliases(s) != no or s.aliases(cfg[pc].n) != no:
+          alreadySeenThisNode = true; break
+      if alreadySeenThisNode: cfg[pc].n.flags.excl nfFirstWrite
+      else: cfg[pc].n.flags.incl nfFirstWrite
+
+      alreadySeen.incl cfg[pc].n
+
      inc pc
    of use:
-      if instrTargets(cfg[pc].n, location) != None:
-        otherRead = cfg[pc].n
-        return -1
+      let potLastReadsCopy = potLastReads
+      for r in potLastReadsCopy:
+        if cfg[pc].n.aliasesCached(cfg[r].n) != no or cfg[r].n.aliasesCached(cfg[pc].n) != no:
+          cfg[r].n.comment = '\n' & $pc
+          potLastReads.excl r
+
+      potLastReads.incl pc
+
+      alreadySeen.incl cfg[pc].n
+
      inc pc
    of goto:
      pc += cfg[pc].dest
@@ -96,99 +128,37 @@ proc isLastRead(location: PNode; cfg: ControlFlowGraph; otherRead: var PNode; pc
      # every branch must lead to the last read of the location:
      var variantA = pc + 1
      var variantB = pc + cfg[pc].dest
-      while variantA != variantB:
-        if min(variantA, variantB) < 0: return -1
-        if max(variantA, variantB) >= cfg.len or min(variantA, variantB) >= until:
-          break
+      var potLastReadsA, potLastReadsB = potLastReads
+      var lastReadsA, lastReadsB: IntSet
+      var alreadySeenA, alreadySeenB = alreadySeen
+      while variantA != variantB and max(variantA, variantB) < cfg.len and min(variantA, variantB) < until:
        if variantA < variantB:
-          variantA = isLastRead(location, cfg, otherRead, variantA, min(variantB, until))
+          collectLastReads(cfg, cache, alreadySeenA, lastReadsA, potLastReadsA, variantA, min(variantB, until))
        else:
-          variantB = isLastRead(location, cfg, otherRead, variantB, min(variantA, until))
-      pc = min(variantA, variantB)
-  return pc
+          collectLastReads(cfg, cache, alreadySeenB, lastReadsB, potLastReadsB, variantB, min(variantA, until))

-proc isCursor(n: PNode; c: Con): bool =
-  case n.kind
-  of nkSym:
-    sfCursor in n.sym.flags
-  of nkDotExpr:
-    isCursor(n[1], c)
-  of nkCheckedFieldExpr:
-    isCursor(n[0], c)
-  else:
-    false
+      alreadySeen.incl alreadySeenA + alreadySeenB
+
+      lastReads.incl lastReadsA * lastReadsB
+      lastReads.incl (lastReadsA + lastReadsB) - potLastReads
+
+      let oldPotLastReads = potLastReads
+      potLastReads = initIntSet()
+
+      potLastReads.incl (lastReadsA + lastReadsB) - lastReads
+
+      potLastReads.incl potLastReadsA * potLastReadsB
+      potLastReads.incl (potLastReadsA + potLastReadsB) - oldPotLastReads
+
+      pc = min(variantA, variantB)

 proc isLastRead(n: PNode; c: var Con): bool =
-  # first we need to search for the instruction that belongs to 'n':
-  var instr = -1
  let m = dfa.skipConvDfa(n)
-  if m.kind == nkSym and sfSingleUsedTemp in m.sym.flags: return true
-
-  for i in 0..<c.g.len:
-    # This comparison is correct and MUST not be ``instrTargets``:
-    if c.g[i].kind == use and c.g[i].n == m:
-      if instr < 0:
-        instr = i
-        break
-
-  dbg: echo "starting point for ", n, " is ", instr, " ", n.kind
-
-  if instr < 0: return false
-  # we go through all paths beginning from 'instr+1' and need to
-  # ensure that we don't find another 'use X' instruction.
-  if instr+1 >= c.g.len: return true
-
-  c.otherRead = nil
-  result = isLastRead(n, c.g, c.otherRead, instr+1, int.high) >= 0
-  dbg: echo "ugh ", c.otherRead.isNil, " ", result
-
-proc isFirstWrite(location: PNode; cfg: ControlFlowGraph; pc, until: int): int =
-  var pc = pc
-  while pc < until:
-    case cfg[pc].kind
-    of def:
-      if instrTargets(cfg[pc].n, location) != None:
-        # a definition of 's' before ours makes ours not the first write
-        return -1
-      inc pc
-    of use:
-      if instrTargets(cfg[pc].n, location) != None:
-        return -1
-      inc pc
-    of goto:
-      pc += cfg[pc].dest
-    of fork:
-      # every branch must not contain a def/use of our location:
-      var variantA = pc + 1
-      var variantB = pc + cfg[pc].dest
-      while variantA != variantB:
-        if min(variantA, variantB) < 0: return -1
-        if max(variantA, variantB) > until:
-          break
-        if variantA < variantB:
-          variantA = isFirstWrite(location, cfg, variantA, min(variantB, until))
-        else:
-          variantB = isFirstWrite(location, cfg, variantB, min(variantA, until))
-      pc = min(variantA, variantB)
-  return pc
+  (m.kind == nkSym and sfSingleUsedTemp in m.sym.flags) or nfLastRead in m.flags

 proc isFirstWrite(n: PNode; c: var Con): bool =
-  # first we need to search for the instruction that belongs to 'n':
-  var instr = -1
  let m = dfa.skipConvDfa(n)
-
-  for i in countdown(c.g.len-1, 0): # We search backwards here to treat loops correctly
-    if c.g[i].kind == def and c.g[i].n == m:
-      if instr < 0:
-        instr = i
-        break
-
-  if instr < 0: return false
-  # we go through all paths going to 'instr' and need to
-  # ensure that we don't find another 'def/use X' instruction.
-  if instr == 0: return true
-
-  result = isFirstWrite(n, c.g, 0, instr) >= 0
+  nfFirstWrite in m.flags

 proc initialized(code: ControlFlowGraph; pc: int,
                 init, uninit: var IntSet; until: int): int =
@@ -228,20 +198,33 @@ proc initialized(code: ControlFlowGraph; pc: int,
      inc pc
  return pc

+proc isCursor(n: PNode; c: Con): bool =
+  case n.kind
+  of nkSym:
+    sfCursor in n.sym.flags
+  of nkDotExpr:
+    isCursor(n[1], c)
+  of nkCheckedFieldExpr:
+    isCursor(n[0], c)
+  else:
+    false
+
 template isUnpackedTuple(n: PNode): bool =
  ## we move out all elements of unpacked tuples,
  ## hence unpacked tuples themselves don't need to be destroyed
  (n.kind == nkSym and n.sym.kind == skTemp and n.sym.typ.kind == tyTuple)

+from strutils import parseInt
+
 proc checkForErrorPragma(c: Con; t: PType; ri: PNode; opname: string) =
  var m = "'" & opname & "' is not available for type <" & typeToString(t) & ">"
  if (opname == "=" or opname == "=copy") and ri != nil:
    m.add "; requires a copy because it's not the last read of '"
    m.add renderTree(ri)
    m.add '\''
-    if c.otherRead != nil:
+    if ri.comment.startsWith('\n'):
      m.add "; another read is done here: "
-      m.add c.graph.config $ c.otherRead.info
+      m.add c.graph.config $ c.g[parseInt(ri.comment[1..^1])].n.info
    elif ri.kind == nkSym and ri.sym.kind == skParam and not isSinkType(ri.sym.typ):
      m.add "; try to make "
      m.add renderTree(ri)
@@ -355,7 +338,6 @@ proc genCopy(c: var Con; dest, ri: PNode): PNode =
  let t = dest.typ
  if tfHasOwned in t.flags and ri.kind != nkNilLit:
    # try to improve the error message here:
-    if c.otherRead == nil: discard isLastRead(ri, c)
    c.checkForErrorPragma(t, ri, "=copy")
  result = c.genCopyNoCheck(dest, ri)

@@ -1097,6 +1079,24 @@ proc injectDestructorCalls*(g: ModuleGraph; idgen: IdGenerator; owner: PSym; n:
  if optCursorInference in g.config.options:
    computeCursors(owner, n, g)

+  block:
+    var cache = initTable[(PNode, PNode), AliasKind]()
+    var alreadySeen: HashSet[PNode]
+    var lastReads, potLastReads: IntSet
+    var pc = 0
+    collectLastReads(c.g, cache, alreadySeen, lastReads, potLastReads, pc, c.g.len)
+    lastReads.incl potLastReads
+    var lastReadTable: Table[PNode, seq[int]]
+    for position, node in c.g:
+      if node.kind == use:
+        lastReadTable.mgetOrPut(node.n, @[]).add position
+    for node, positions in lastReadTable:
+      var allPositionsLastRead = true
+      for p in positions:
+        if p notin lastReads: allPositionsLastRead = false; break
+      if allPositionsLastRead:
+        node.flags.incl nfLastRead
+
  var scope: Scope
  let body = p(n, c, scope, normal)

--- a/compiler/renderer.nim
+++ b/compiler/renderer.nim
@@ -296,11 +296,11 @@ proc popAllComs(g: var TSrcGen) =
 const
  Space = " "

-proc shouldRenderComment(g: var TSrcGen, n: PNode): bool =
-  result = false
-  if n.comment.len > 0:
-    result = (renderNoComments notin g.flags) or
-        (renderDocComments in g.flags)
+proc shouldRenderComment(g: TSrcGen): bool {.inline.} =
+  (renderNoComments notin g.flags or renderDocComments in g.flags)
+
+proc shouldRenderComment(g: TSrcGen, n: PNode): bool {.inline.} =
+  shouldRenderComment(g) and n.comment.len > 0

 proc gcom(g: var TSrcGen, n: PNode) =
  assert(n != nil)
@@ -430,7 +430,7 @@ proc lsons(g: TSrcGen; n: PNode, start: int = 0, theEnd: int = - 1): int =
 proc lsub(g: TSrcGen; n: PNode): int =
  # computes the length of a tree
  if isNil(n): return 0
-  if n.comment.len > 0: return MaxLineLen + 1
+  if shouldRenderComment(g, n): return MaxLineLen + 1
  case n.kind
  of nkEmpty: result = 0
  of nkTripleStrLit:
@@ -598,7 +598,7 @@ proc gcommaAux(g: var TSrcGen, n: PNode, ind: int, start: int = 0,
    if c:
      if g.tokens.len > oldLen:
        putWithSpace(g, separator, $separator)
-      if hasCom(n[i]):
+      if shouldRenderComment(g) and hasCom(n[i]):
        gcoms(g)
        optNL(g, ind)

@@ -639,7 +639,7 @@ proc gsection(g: var TSrcGen, n: PNode, c: TContext, kind: TokType,
  dedent(g)

 proc longMode(g: TSrcGen; n: PNode, start: int = 0, theEnd: int = - 1): bool =
-  result = n.comment.len > 0
+  result = shouldRenderComment(g, n)
  if not result:
    # check further
    for i in start..n.len + theEnd:
@@ -980,7 +980,7 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
  if isNil(n): return
  var
    a: TContext
-  if n.comment.len > 0: pushCom(g, n)
+  if shouldRenderComment(g, n): pushCom(g, n)
  case n.kind                 # atoms:
  of nkTripleStrLit: put(g, tkTripleStrLit, atom(g, n))
  of nkEmpty: discard
--- a/tests/arc/tmovebug.nim
+++ b/tests/arc/tmovebug.nim
@@ -93,7 +93,6 @@ destroy
 destroy
 destroy
 sink
-sink
 destroy
 copy
 (f: 1)
@@ -687,7 +686,7 @@ caseNotAConstant()

 proc potentialSelfAssign(i: var int) =
  var a: array[2, OO]
-  a[i] = OO(f: 1)
+  a[i] = OO(f: 1) # turned into a memcopy
  a[1] = OO(f: 2)
  a[i+1] = a[i] # This must not =sink, but =copy
  inc i
@@ -744,3 +743,30 @@ proc partToWholeUnownedRef =

 partToWholeUnownedRef()

+
+#--------------------------------------------------------------------
+# test that nodes that get copied during the transformation
+# (like dot exprs) don't loose their firstWrite/lastRead property
+
+type
+  OOO = object
+    initialized: bool
+
+  C = object
+    o: OOO
+
+proc `=destroy`(o: var OOO) =
+  doAssert o.initialized, "OOO was destroyed before initialization!"
+
+proc initO(): OOO =
+  OOO(initialized: true)
+
+proc initC(): C =
+  C(o: initO())
+
+proc pair(): tuple[a: C, b: C] =
+  result.a = initC() # <- when firstWrite tries to find this node to start its analysis it fails, because injectdestructors uses copyTree/shallowCopy
+  result.b = initC()
+
+discard pair()
+
--- a/tests/arc/topt_no_cursor.nim
+++ b/tests/arc/topt_no_cursor.nim
@@ -52,7 +52,7 @@ _ = (
  blitTmp, ";")
 lvalue = _[0]
 lnext = _[1]
-`=sink`(result.value, move lvalue)
+result.value = move lvalue
 `=destroy`(lnext)
 `=destroy_1`(lvalue)
 -- end of expandArc ------------------------