explicit default values for a leaner codegen, .cursor variables for optimization

2026-02-17 16:38:33 +00:00 · 2019-03-07 00:37:23 +01:00
parent 2b5e48d807
commit f8bf24623c
7 changed files with 172 additions and 46 deletions
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -287,6 +287,11 @@ const
  sfImmediate* = sfDispatcher
    # macro or template is immediately expanded
    # without considering any possible overloads
+
+  sfCursor* = sfDispatcher
+    # local variable has been computed to be a "cursor".
+    # see cursors.nim for details about what that means.
+
  sfAllUntyped* = sfVolatile # macro or template is immediately expanded \
    # in a generic context

--- a/compiler/cursors.nim
+++ b/compiler/cursors.nim
@@ -0,0 +1,72 @@
+#
+#
+#           The Nim Compiler
+#        (c) Copyright 2019 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+import
+  intsets, ast, astalgo, msgs, renderer, magicsys, types, idents, trees,
+  strutils, options, dfa, lowerings, tables, modulegraphs, msgs,
+  lineinfos, parampatterns
+
+##[
+This module implements "cursor" detection. A cursor is a local variable
+that is used for navigation in a datastructure, it does not "own" the
+data it aliases but it might update the underlying datastructure.
+
+Two primary examples for cursors that I have in mind and that are critical
+for optimization:
+
+1. Local string variable introduced by ``for x in a``::
+
+  var i = 0
+  while i < a.len:
+    let cursor = a[i]
+    use cursor
+    inc i
+
+2. Local ``ref`` variable for navigation::
+
+  var cursor = listHead
+  while cursor != nil:
+    use cursor
+    cursor = cursor.next
+
+Cursors are very interesting for the optimizer because they can be copyMem'ed
+and don't need a destructor.
+
+More formally, a cursor is a variable that is set on all paths to
+a *location* or a proc call that produced a ``lent/var`` type. All statements
+that come after these assignments MUST not mutate what the cursor aliases.
+
+Mutations *through* the cursor are allowed if the cursor has ref semantics.
+
+Look at this complex real world example taken from the compiler itself:
+
+.. code-block:: Nim
+
+  proc getTypeName(m: BModule; typ: PType; sig: SigHash): Rope =
+    var t = typ
+    while true:
+      if t.sym != nil and {sfImportc, sfExportc} * t.sym.flags != {}:
+        return t.sym.loc.r
+
+      if t.kind in irrelevantForBackend:
+        t = t.lastSon
+      else:
+        break
+    let typ = if typ.kind in {tyAlias, tySink, tyOwned}: typ.lastSon else: typ
+    if typ.loc.r == nil:
+      typ.loc.r = typ.typeName & $sig
+    result = typ.loc.r
+    if result == nil: internalError(m.config, "getTypeName: " & $typ.kind)
+
+Here `t` is a cursor but without a control flow based analysis we are unlikely
+to detect it.
+
+]##
+
+# Araq: I owe you an implementation. For now use the .cursor pragma. :-/
--- a/compiler/destroyer.nim
+++ b/compiler/destroyer.nim
@@ -161,6 +161,8 @@ type
    graph: ModuleGraph
    emptyNode: PNode
    otherRead: PNode
+    uninit: IntSet # set of uninit'ed vars
+    uninitComputed: bool

 proc isLastRead(s: PSym; c: var Con; pc, comesFrom: int): int =
  var pc = pc
@@ -247,6 +249,42 @@ proc isLastRead(n: PNode; c: var Con): bool =
    #echo c.graph.config $ n.info, " last read here!"
    return true

+proc initialized(code: ControlFlowGraph; pc: int,
+                 init, uninit: var IntSet; comesFrom: int): int =
+  ## Computes the set of definitely initialized variables accross all code paths
+  ## as an IntSet of IDs.
+  var pc = pc
+  while pc < code.len:
+    case code[pc].kind
+    of goto:
+      pc = pc + code[pc].dest
+    of fork:
+      let target = pc + code[pc].dest
+      var initA = initIntSet()
+      var initB = initIntSet()
+      let pcA = initialized(code, pc+1, initA, uninit, pc)
+      discard initialized(code, target, initB, uninit, pc)
+      # we add vars if they are in both branches:
+      for v in initA:
+        if v in initB:
+          init.incl v
+      pc = pcA+1
+    of InstrKind.join:
+      let target = pc + code[pc].dest
+      if comesFrom == target: return pc
+      inc pc
+    of use:
+      let v = code[pc].sym
+      if v.kind != skParam and v.id notin init:
+        # attempt to read an uninit'ed variable
+        uninit.incl v.id
+      inc pc
+    of def:
+      let v = code[pc].sym
+      init.incl v.id
+      inc pc
+  return pc
+
 template interestingSym(s: PSym): bool =
  s.owner == c.owner and s.kind in InterestingSyms and hasDestructor(s.typ)

@@ -353,6 +391,11 @@ proc genWasMoved(n: PNode; c: var Con): PNode =
  # The mWasMoved builtin does not take the address.
  result = genMagicCall(n, c, "wasMoved", mWasMoved)

+proc genDefaultCall(t: PType; c: Con; info: TLineInfo): PNode =
+  result = newNodeI(nkCall, info)
+  result.add(newSymNode(createMagic(c.graph, "default", mDefault)))
+  result.typ = t
+
 proc destructiveMoveVar(n: PNode; c: var Con): PNode =
  # generate: (let tmp = v; reset(v); tmp)
  # XXX: Strictly speaking we can only move if there is a ``=sink`` defined
@@ -572,6 +615,38 @@ proc moveOrCopy(dest, ri: PNode; c: var Con): PNode =
    result = genCopy(c, dest.typ, dest, ri)
    result.add p(ri, c)

+proc computeUninit(c: var Con) =
+  if not c.uninitComputed:
+    c.uninitComputed = true
+    c.uninit = initIntSet()
+    var init = initIntSet()
+    discard initialized(c.g, pc = 0, init, c.uninit, comesFrom = -1)
+
+proc injectDefaultCalls(n: PNode, c: var Con) =
+  case n.kind
+  of nkVarSection, nkLetSection:
+    for i in 0..<n.len:
+      let it = n[i]
+      let L = it.len-1
+      let ri = it[L]
+      if it.kind == nkIdentDefs and ri.kind == nkEmpty:
+        computeUninit(c)
+        for j in 0..L-2:
+          let v = it[j]
+          doAssert v.kind == nkSym
+          if c.uninit.contains(v.sym.id):
+            it[L] = genDefaultCall(v.sym.typ, c, v.info)
+            break
+  of nkNone..nkNilLit, nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef,
+      nkIteratorDef, nkMacroDef, nkTemplateDef, nkLambda, nkDo, nkFuncDef:
+    discard
+  else:
+    for i in 0..<safeLen(n):
+      injectDefaultCalls(n[i], c)
+
+proc isCursor(n: PNode): bool {.inline.} =
+  result = n.kind == nkSym and sfCursor in n.sym.flags
+
 proc p(n: PNode; c: var Con): PNode =
  case n.kind
  of nkVarSection, nkLetSection:
@@ -585,7 +660,7 @@ proc p(n: PNode; c: var Con): PNode =
      if it.kind == nkVarTuple and hasDestructor(ri.typ):
        let x = lowerTupleUnpacking(c.graph, it, c.owner)
        result.add p(x, c)
-      elif it.kind == nkIdentDefs and hasDestructor(it[0].typ):
+      elif it.kind == nkIdentDefs and hasDestructor(it[0].typ) and not isCursor(it[0]):
        for j in 0..L-2:
          let v = it[j]
          doAssert v.kind == nkSym
@@ -669,6 +744,8 @@ proc injectDestructorCalls*(g: ModuleGraph; owner: PSym; n: PNode): PNode =
      if param.typ.kind == tySink and hasDestructor(param.typ.sons[0]):
        c.destroys.add genDestroy(c, param.typ.skipTypes({tyGenericInst, tyAlias, tySink}), params[i])

+  if optNimV2 in c.graph.config.globalOptions:
+    injectDefaultCalls(n, c)
  let body = p(n, c)
  result = newNodeI(nkStmtList, n.info)
  if c.topLevelVars.len > 0:
--- a/compiler/dfa.nim
+++ b/compiler/dfa.nim
@@ -686,42 +686,3 @@ proc constructCfg*(s: PSym; body: PNode): ControlFlowGraph =
  gen(c, body)
  genImplicitReturn(c)
  shallowCopy(result, c.code)
-
-proc interpret(code: ControlFlowGraph; pc: int, state: seq[PSym], comesFrom: int; threadId: int): (seq[PSym], int) =
-  var res = state
-  var pc = pc
-  while pc < code.len:
-    #echo threadId, " ", code[pc].kind
-    case code[pc].kind
-    of goto:
-      pc = pc + code[pc].dest
-    of fork:
-      let target = pc + code[pc].dest
-      let (branchA, pcA) = interpret(code, pc+1, res, pc, threadId+1)
-      let (branchB, _) = interpret(code, target, res, pc, threadId+2)
-      # we add vars if they are in both branches:
-      for v in branchB:
-        if v in branchA:
-          if v notin res:
-            res.add v
-      pc = pcA+1
-    of join:
-      let target = pc + code[pc].dest
-      if comesFrom == target: return (res, pc)
-      inc pc
-    of use:
-      let v = code[pc].sym
-      if v notin res and v.kind != skParam:
-        echo "attempt to read uninitialized variable ", v.name.s
-      inc pc
-    of def:
-      let v = code[pc].sym
-      if v notin res:
-        res.add v
-      inc pc
-  return (res, pc)
-
-proc dataflowAnalysis*(s: PSym; body: PNode) =
-  let c = constructCfg(s, body)
-  #echoCfg c
-  discard interpret(c, 0, @[], -1, 1)
--- a/compiler/pragmas.nim
+++ b/compiler/pragmas.nim
@@ -24,7 +24,7 @@ const
    wCompilerProc, wNonReloadable, wCore, wProcVar, wDeprecated, wVarargs, wCompileTime, wMerge,
    wBorrow, wExtern, wImportCompilerProc, wThread, wImportCpp, wImportObjC,
    wAsmNoStackFrame, wError, wDiscardable, wNoInit, wCodegenDecl,
-    wGensym, wInject, wRaises, wTags, wLocks, wDelegator, wGcSafe, wOverride,
+    wGensym, wInject, wRaises, wTags, wLocks, wDelegator, wGcSafe,
    wConstructor, wExportNims, wUsed, wLiftLocals, wStacktrace, wLinetrace}
  converterPragmas* = procPragmas
  methodPragmas* = procPragmas+{wBase}-{wImportCpp}
@@ -64,7 +64,7 @@ const
  varPragmas* = {wImportc, wExportc, wVolatile, wRegister, wThreadVar, wNodecl,
    wMagic, wHeader, wDeprecated, wCompilerProc, wCore, wDynlib, wExtern,
    wImportCpp, wImportObjC, wError, wNoInit, wCompileTime, wGlobal,
-    wGensym, wInject, wCodegenDecl, wGuard, wGoto, wExportNims, wUsed}
+    wGensym, wInject, wCodegenDecl, wGuard, wGoto, wExportNims, wUsed, wCursor}
  constPragmas* = {wImportc, wExportc, wHeader, wDeprecated, wMagic, wNodecl,
    wExtern, wImportCpp, wImportObjC, wError, wGensym, wInject, wExportNims,
    wIntDefine, wStrDefine, wBoolDefine, wUsed, wCompilerProc, wCore}
@@ -863,8 +863,6 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: var int,
        incl(sym.loc.flags, lfNoDecl)
        # implies nodecl, because otherwise header would not make sense
        if sym.loc.r == nil: sym.loc.r = rope(sym.name.s)
-      of wOverride:
-        sym.flags.incl sfOverriden
      of wNosideeffect:
        noVal(c, it)
        if sym != nil:
@@ -1091,6 +1089,11 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: var int,
          invalidPragma(c, it)
        else:
          sym.flags.incl sfGoto
+      of wCursor:
+        if sym == nil or sym.kind notin {skVar, skLet}:
+          invalidPragma(c, it)
+        else:
+          sym.flags.incl sfCursor
      of wExportNims:
        if sym == nil: invalidPragma(c, it)
        else: magicsys.registerNimScriptSymbol(c.graph, sym)
--- a/compiler/wordrecg.nim
+++ b/compiler/wordrecg.nim
@@ -37,7 +37,7 @@ type
    wMagic, wThread, wFinal, wProfiler, wMemTracker, wObjChecks,
    wIntDefine, wStrDefine, wBoolDefine

-    wDestroy,
+    wCursor,

    wImmediate, wConstructor, wDestructor, wDelegator, wOverride,
    wImportCpp, wImportObjC,
@@ -126,7 +126,7 @@ const
    "magic", "thread", "final", "profiler", "memtracker", "objchecks",
    "intdefine", "strdefine", "booldefine",

-    "destroy",
+    "cursor",

    "immediate", "constructor", "destructor", "delegator", "override",
    "importcpp", "importobjc",
--- a/doc/nimc.rst
+++ b/doc/nimc.rst
@@ -517,6 +517,14 @@ on Linux::
  nim c --dynlibOverride:lua --passL:liblua.lib program.nim


+Cursor pragma
+=============
+
+The ``.cursor`` pragma is a temporary tool for optimization purposes
+and this property will be computed by Nim's optimizer eventually. Thus it
+remains undocumented.
+
+
 Backend language options
 ========================