first implementation of write tracking and escape analysis; still disabled

2026-02-13 14:53:46 +00:00 · 2015-08-31 11:09:12 +02:00
parent 0e2a798af4
commit e733debec7
5 changed files with 319 additions and 21 deletions
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -291,12 +291,12 @@ const
  sfNoForward* = sfRegister
    # forward declarations are not required (per module)

-  sfNoRoot* = sfBorrow # a local variable is provably no root so it doesn't
-                       # require RC ops
  sfCompileToCpp* = sfInfixCall       # compile the module as C++ code
  sfCompileToObjc* = sfNamedParamCall # compile the module as Objective-C code
  sfExperimental* = sfOverriden       # module uses the .experimental switch
  sfGoto* = sfOverriden               # var is used for 'goto' code generation
+  sfWrittenTo* = sfBorrow             # param is assigned to
+  sfEscapes* = sfProcvar              # param escapes

 const
  # getting ready for the future expr/stmt merge
@@ -527,6 +527,7 @@ const
    # deprecated and this mess can be cleaned up.
  tfVoid* = tfVarargs # for historical reasons we conflated 'void' with
                      # 'empty' ('@[]' has the type 'seq[empty]').
+  tfReturnsNew* = tfInheritable
  skError* = skUnknown

  # type flags that are essential for type equality:
--- a/compiler/options.nim
+++ b/compiler/options.nim
@@ -13,6 +13,7 @@ import
 const
  hasTinyCBackend* = defined(tinyc)
  useEffectSystem* = true
+  useWriteTracking* = false
  hasFFI* = defined(useFFI)
  newScopeForIf* = true
  useCaas* = not defined(noCaas)
--- a/compiler/sempass2.nim
+++ b/compiler/sempass2.nim
@@ -9,7 +9,7 @@

 import
  intsets, ast, astalgo, msgs, renderer, magicsys, types, idents, trees,
-  wordrecg, strutils, options, guards
+  wordrecg, strutils, options, guards, writetracking

 # Second semantic checking pass over the AST. Necessary because the old
 # way had some inherent problems. Performs:
@@ -17,7 +17,7 @@ import
 # * effect+exception tracking
 # * "usage before definition" checking
 # * checks for invalid usages of compiletime magics (not implemented)
-# * checks for invalid usages of PNimNode (not implemented)
+# * checks for invalid usages of NimNode (not implemented)
 # * later: will do an escape analysis for closures at least

 # Predefined effects:
@@ -29,21 +29,6 @@ import
 #   --> a TR macro can annotate the proc with user defined annotations
 #   --> the effect system can access these

-# Load&Store analysis is performed on *paths*. A path is an access like
-# obj.x.y[i].z; splitting paths up causes some problems:
-#
-# var x = obj.x
-# var z = x.y[i].z
-#
-# Alias analysis is affected by this too! A good solution is *type splitting*:
-# T becomes T1 and T2 if it's known that T1 and T2 can't alias.
-#
-# An aliasing problem and a race condition are effectively the same problem.
-# Type based alias analysis is nice but not sufficient; especially splitting
-# an array and filling it in parallel should be supported but is not easily
-# done: It essentially requires a built-in 'indexSplit' operation and dependent
-# typing.
-
 # ------------------------ exception and tag tracking -------------------------

 discard """
@@ -438,17 +423,41 @@ proc documentEffect(n, x: PNode, effectType: TSpecialWord, idx: int): PNode =
    result = newNode(nkExprColonExpr, n.info, @[
      newIdentNode(getIdent(specialWords[effectType]), n.info), effects])

+proc documentWriteEffect(n: PNode; flag: TSymFlag; pragmaName: string): PNode =
+  let s = n.sons[namePos].sym
+  let params = s.typ.n
+
+  var effects = newNodeI(nkBracket, n.info)
+  for i in 1 ..< params.len:
+    if params[i].kind == nkSym and flag in params[i].sym.flags:
+      effects.add params[i]
+
+  if effects.len > 0:
+    result = newNode(nkExprColonExpr, n.info, @[
+      newIdentNode(getIdent(pragmaName), n.info), effects])
+
+proc documentNewEffect(n: PNode): PNode =
+  let s = n.sons[namePos].sym
+  if tfReturnsNew in s.typ.flags:
+    result = newIdentNode(getIdent("new"), n.info)
+
 proc documentRaises*(n: PNode) =
  if n.sons[namePos].kind != nkSym: return
  let pragmas = n.sons[pragmasPos]
  let p1 = documentEffect(n, pragmas, wRaises, exceptionEffects)
  let p2 = documentEffect(n, pragmas, wTags, tagEffects)
+  let p3 = documentWriteEffect(n, sfWrittenTo, "writes")
+  let p4 = documentNewEffect(n)
+  let p5 = documentWriteEffect(n, sfEscapes, "escapes")

-  if p1 != nil or p2 != nil:
+  if p1 != nil or p2 != nil or p3 != nil or p4 != nil or p5 != nil:
    if pragmas.kind == nkEmpty:
      n.sons[pragmasPos] = newNodeI(nkPragma, n.info)
    if p1 != nil: n.sons[pragmasPos].add p1
    if p2 != nil: n.sons[pragmasPos].add p2
+    if p3 != nil: n.sons[pragmasPos].add p3
+    if p4 != nil: n.sons[pragmasPos].add p4
+    if p5 != nil: n.sons[pragmasPos].add p5

 template notGcSafe(t): expr = {tfGcSafe, tfNoSideEffect} * t.flags == {}

@@ -900,6 +909,7 @@ proc trackProc*(s: PSym, body: PNode) =
    message(s.info, warnLockLevel,
      "declared lock level is $1, but real lock level is $2" %
        [$s.typ.lockLevel, $t.maxLockLevel])
+  when useWriteTracking: trackWrites(s, body)

 proc trackTopLevelStmt*(module: PSym; n: PNode) =
  if n.kind in {nkPragma, nkMacroDef, nkTemplateDef, nkProcDef,
--- a/compiler/transf.nim
+++ b/compiler/transf.nim
@@ -115,7 +115,7 @@ proc transformSymAux(c: PTransf, n: PNode): PNode =
  #  return liftIterSym(n)
  var b: PNode
  var tc = c.transCon
-  if sfBorrow in n.sym.flags:
+  if sfBorrow in n.sym.flags and n.sym.kind in routineKinds:
    # simply exchange the symbol:
    b = n.sym.getBody
    if b.kind != nkSym: internalError(n.info, "wrong AST for borrowed symbol")
--- a/compiler/writetracking.nim
+++ b/compiler/writetracking.nim
@@ -0,0 +1,286 @@
+#
+#
+#           The Nim Compiler
+#        (c) Copyright 2015 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module implements the write tracking analysis. Read my block post for
+## a basic description of the algorithm and ideas.
+
+import idents, ast, astalgo, trees, renderer, msgs, types
+
+const
+  debug = false
+
+type
+  AssignToResult = enum
+    asgnNil,   # 'nil' is fine
+    asgnNew,   # 'new(result)'
+    asgnOther  # result = fooBar # not a 'new' --> 'result' might not 'new'
+  NewLocation = enum
+    newNone,
+    newLit,
+    newCall
+  W = object # WriteTrackContext
+    owner: PSym
+    returnsNew: AssignToResult # assignments to 'result'
+    markAsWrittenTo, markAsEscaping: PNode
+    assignments: seq[(PNode, PNode)] # list of all assignments in this proc
+
+proc returnsNewExpr*(n: PNode): NewLocation =
+  case n.kind
+  of nkCharLit..nkInt64Lit, nkStrLit..nkTripleStrLit,
+      nkFloatLit..nkFloat64Lit, nkNilLit:
+    result = newLit
+  of nkExprEqExpr, nkExprColonExpr, nkHiddenStdConv, nkHiddenSubConv,
+      nkStmtList, nkStmtListExpr, nkBlockStmt, nkBlockExpr, nkOfBranch,
+      nkElifBranch, nkElse, nkExceptBranch, nkFinally, nkCast:
+    result = returnsNewExpr(n.lastSon)
+  of nkCurly, nkBracket, nkPar, nkObjConstr, nkClosure,
+      nkIfExpr, nkIfStmt, nkWhenStmt, nkCaseStmt, nkTryStmt:
+    result = newLit
+    for i in 0 .. <n.len:
+      let x = returnsNewExpr(n.sons[i])
+      case x
+      of newNone: return newNone
+      of newLit: discard
+      of newCall: result = newCall
+  of nkCallKinds:
+    if n.sons[0].typ != nil and tfReturnsNew in n.sons[0].typ.flags:
+      result = newCall
+  else:
+    result = newNone
+
+proc root(owner: PSym; n: PNode; heapAccess: var bool): PSym =
+  ## returns 'nil' if the 'root' could not be detected.
+  case n.kind
+  of nkSym:
+    result = n.sym
+  of nkHiddenDeref, nkDerefExpr:
+    result = root(owner, n.sons[0], heapAccess)
+    heapAccess = true
+  of nkHiddenAddr, nkObjUpConv, nkObjDownConv,
+      nkDotExpr, nkBracketExpr, nkCheckedFieldExpr:
+    result = root(owner, n.sons[0], heapAccess)
+  of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkStmtList, nkStmtListExpr,
+      nkBlockStmt, nkBlockExpr, nkCast:
+    result = root(owner, n.lastSon, heapAccess)
+  of nkCallKinds:
+    # builtin slice keeps lvalue-ness:
+    if getMagic(n) == mSlice:
+      result = root(owner, n.sons[1], heapAccess)
+      heapAccess = true
+    else:
+      # 'p().foo = x' --> treat as  'let tmp = p(); tmp.foo = x'
+      result = newSym(skTemp, getIdent(":tmp"), owner, n.sons[0].info)
+      #result.typ = n.sons[0].typ.sons[0]
+      #result.ast = n.sons[0]
+      # XXX
+  of nkPar:
+    localError(n.info, "writeSetAnalysis: too implement")
+  else:
+    discard
+
+proc deps(w: var W; dest, src: PNode) =
+  # let x = (localA, localB)
+  # compute 'returnsNew' property:
+  let retNew = returnsNewExpr(src)
+  if dest.kind == nkSym and dest.sym.kind == skResult:
+    if retNew != newNone:
+      if w.returnsNew != asgnOther: w.returnsNew = asgnNew
+    else:
+      w.returnsNew = asgnOther
+  # mark the dependency, but
+  # rule out obviously innocent assignments like 'somebool = true'
+  if dest.kind == nkSym and retNew == newLit: discard
+  else: w.assignments.add((dest, src))
+
+proc depsArgs(w: var W; n: PNode) =
+  if n.sons[0].typ.isNil: return
+  var typ = skipTypes(n.sons[0].typ, abstractInst)
+  if typ.kind != tyProc: return
+  # echo n.info, " ", n, " ", w.owner.name.s, " ", typeToString(typ)
+  assert(sonsLen(typ) == sonsLen(typ.n))
+  for i in 1 ..< n.len:
+    let it = n.sons[i]
+    if i < sonsLen(typ):
+      assert(typ.n.sons[i].kind == nkSym)
+      let paramType = typ.n.sons[i]
+      if paramType.typ.isCompileTimeOnly: continue
+      if sfWrittenTo in paramType.sym.flags or paramType.typ.kind == tyVar:
+        # p(f(x, y), X, g(h, z))
+        deps(w, it, w.markAsWrittenTo)
+      if sfEscapes in paramType.sym.flags or paramType.typ.kind == tyVar:
+        deps(w, it, w.markAsEscaping)
+
+proc deps(w: var W; n: PNode) =
+  case n.kind
+  of nkLetSection, nkVarSection:
+    for child in n:
+      let last = lastSon(child)
+      if last.kind == nkEmpty: continue
+      if child.kind == nkVarTuple and last.kind == nkPar:
+        internalAssert child.len-2 == last.len
+        for i in 0 .. child.len-3:
+          deps(w, child.sons[i], last.sons[i])
+      else:
+        for i in 0 .. child.len-3:
+          deps(w, child.sons[i], last)
+  of nkAsgn, nkFastAsgn:
+    deps(w, n.sons[0], n.sons[1])
+  else:
+    for i in 0 ..< n.safeLen:
+      deps(w, n.sons[i])
+    if n.kind in nkCallKinds:
+      if getMagic(n) in {mNew, mNewFinalize, mNewSeq}:
+        # may not look like an assignment, but it is:
+        deps(w, n.sons[1], newNodeIT(nkObjConstr, n.info, n.sons[1].typ))
+      else:
+        depsArgs(w, n)
+
+proc allRoots(n: PNode; result: var seq[PSym]) =
+  case n.kind
+  of nkSym:
+    if n.sym notin result: result.add n.sym
+  of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr, nkCheckedFieldExpr,
+      nkHiddenAddr, nkObjUpConv, nkObjDownConv:
+    allRoots(n.sons[0], result)
+  of nkExprEqExpr, nkExprColonExpr, nkHiddenStdConv, nkHiddenSubConv, nkConv,
+      nkStmtList, nkStmtListExpr, nkBlockStmt, nkBlockExpr, nkOfBranch,
+      nkElifBranch, nkElse, nkExceptBranch, nkFinally, nkCast:
+    allRoots(n.lastSon, result)
+  of nkCallKinds:
+    if getMagic(n) == mSlice:
+      allRoots(n.sons[1], result)
+    else:
+      # we do significantly better here by using the available escape
+      # information:
+      if n.sons[0].typ.isNil: return
+      var typ = n.sons[0].typ
+      if typ != nil:
+        typ = skipTypes(typ, abstractInst)
+        if typ.kind != tyProc: typ = nil
+        else: assert(sonsLen(typ) == sonsLen(typ.n))
+
+      for i in 1 ..< n.len:
+        let it = n.sons[i]
+        if typ != nil and i < sonsLen(typ):
+          assert(typ.n.sons[i].kind == nkSym)
+          let paramType = typ.n.sons[i]
+          if paramType.typ.isCompileTimeOnly: continue
+          if sfEscapes in paramType.sym.flags or paramType.typ.kind == tyVar:
+            allRoots(it, result)
+        else:
+          allRoots(it, result)
+  else:
+    for i in 0..<n.safeLen:
+      allRoots(n.sons[i], result)
+
+proc hasSym(n: PNode; x: PSym): bool =
+  when false:
+    if n.kind == nkSym:
+      result = n.sym == x
+    else:
+      for i in 0..safeLen(n)-1:
+        if hasSym(n.sons[i], x): return true
+  else:
+    var tmp: seq[PSym] = @[]
+    allRoots(n, tmp)
+    result = x in tmp
+
+when debug:
+  proc `$`*(x: PSym): string = x.name.s
+
+proc possibleAliases(w: W; result: var seq[PSym]) =
+  var todo = 0
+  # this is an expensive fixpoint iteration. We could speed up this analysis
+  # by a smarter data-structure but we wait until prolifing shows us it's
+  # expensive. Usually 'w.assignments' is small enough.
+  while todo < result.len:
+    let x = result[todo]
+    inc todo
+    when debug:
+      if w.owner.name.s == "m3": echo "select ", x, " ", todo, " ", result.len
+    var dummy = false
+    for dest, src in items(w.assignments):
+      if src.hasSym(x):
+        # dest = f(..., s, ...)
+        let r = root(w.owner, dest, dummy)
+        if r != nil and r notin result:
+          result.add r
+          when debug:
+            if w.owner.name.s == "m3": echo "A ", result
+      elif dest.kind == nkSym and dest.sym == x:
+        # s = f(..., x, ....)
+        allRoots(src, result)
+        when debug:
+          if w.owner.name.s == "m3": echo "B ", result
+      else:
+        when debug:
+          if w.owner.name.s == "m3": echo "C ", x, " ", todo, " ", result.len
+
+proc possibleAliases(w: W; s: PSym): seq[PSym] =
+  result = @[s]
+  possibleAliases(w, result)
+
+proc markDirty(w: W) =
+  for dest, src in items(w.assignments):
+    var heapAccess = src == w.markAsWrittenTo
+    let r = root(w.owner, dest, heapAccess)
+    when debug:
+      if w.owner.info ?? "temp18":
+        echo "ASGN ", dest,  " = ", src, " |", heapAccess, " ", r.name.s
+    if heapAccess and r != nil:
+      if r.kind in {skParam, skVar, skTemp, skLet, skResult, skForVar}:
+        # we have an assignment like:
+        # local.foo = bar
+        # --> check which parameter it may alias and mark these parameters
+        # as dirty:
+        let aliases = possibleAliases(w, r)
+        for a in aliases:
+          if a.kind == skParam and a.owner == w.owner:
+            incl(a.flags, sfWrittenTo)
+      else:
+        internalError(dest.info, "dunno what to do " & $r.kind)
+
+proc markEscaping(w: W) =
+  # let p1 = p
+  # let p2 = q
+  # p2.x = call(..., p1, ...)
+  for dest, src in items(w.assignments):
+    var heapAccess = src == w.markAsEscaping
+    let r = root(w.owner, dest, heapAccess)
+    if r != nil and (heapAccess or r.kind == skResult):
+      if r.kind in {skParam, skVar, skTemp, skLet, skResult, skForVar}:
+        let aliases = possibleAliases(w, r)
+        var destIsParam = false
+        for a in aliases:
+          if a.kind in {skResult, skParam} and a.owner == w.owner:
+            destIsParam = true
+            break
+        if destIsParam:
+          var victims: seq[PSym] = @[]
+          allRoots(src, victims)
+          possibleAliases(w, victims)
+          for v in victims:
+            if v.kind == skParam and v.owner == w.owner:
+              incl(v.flags, sfEscapes)
+      else:
+        internalError(dest.info, "dunno what to do " & $r.kind)
+
+proc trackWrites*(owner: PSym; body: PNode) =
+  var w: W
+  w.owner = owner
+  w.markAsWrittenTo = newNodeI(nkArgList, body.info)
+  w.markAsEscaping = newNodeI(nkArgList, body.info)
+  w.assignments = @[]
+  deps(w, body)
+  markDirty(w)
+  markEscaping(w)
+  if w.returnsNew != asgnOther and not isEmptyType(owner.typ.sons[0]) and
+      containsGarbageCollectedRef(owner.typ.sons[0]):
+    incl(owner.typ.flags, tfReturnsNew)
+