first (non working) implementation of global thread analysis

2026-02-14 23:33:28 +00:00 · 2011-06-13 01:40:22 +02:00
parent 9365cb710e
commit c019d17561
9 changed files with 338 additions and 97 deletions
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -309,7 +309,8 @@ type
  TSymKinds* = set[TSymKind]

  TMagic* = enum # symbols that require compiler magic:
-    mNone, mDefined, mDefinedInScope, mLow, mHigh, mSizeOf, mIs, mEcho, 
+    mNone, mDefined, mDefinedInScope, mLow, mHigh, mSizeOf, mIs, 
+    mEcho, mCreateThread,
    mUnaryLt, mSucc, 
    mPred, mInc, mDec, mOrd, mNew, mNewFinalize, mNewSeq, mLengthOpenArray, 
    mLengthStr, mLengthArray, mLengthSeq, mIncl, mExcl, mCard, mChr, mGCref, 
--- a/compiler/msgs.nim
+++ b/compiler/msgs.nim
@@ -86,12 +86,14 @@ type
    errCannotInterpretNodeX, errFieldXNotFound, errInvalidConversionFromTypeX, 
    errAssertionFailed, errCannotGenerateCodeForX, errXRequiresOneArgument, 
    errUnhandledExceptionX, errCyclicTree, errXisNoMacroOrTemplate, 
-    errXhasSideEffects, errIteratorExpected, errUser, warnCannotOpenFile, 
+    errXhasSideEffects, errIteratorExpected, errDifferentHeaps, 
+    errUser, 
+    warnCannotOpenFile, 
    warnOctalEscape, warnXIsNeverRead, warnXmightNotBeenInit, 
    warnCannotWriteMO2, warnCannotReadMO2, warnDeprecated, 
    warnSmallLshouldNotBeUsed, warnUnknownMagic, warnRedefinitionOfLabel, 
    warnUnknownSubstitutionX, warnLanguageXNotSupported, warnCommentXIgnored, 
-    warnXisPassedToProcVar, warnDerefDeprecated,
+    warnXisPassedToProcVar, warnDerefDeprecated, warnAnalysisLoophole,
    warnUser, 
    hintSuccess, hintSuccessX, 
    hintLineTooLong, hintXDeclaredButNotUsed, hintConvToBaseNotNeeded, 
@@ -308,6 +310,7 @@ const
    errXisNoMacroOrTemplate: "\'$1\' is no macro or template",
    errXhasSideEffects: "\'$1\' can have side effects", 
    errIteratorExpected: "iterator within for loop context expected",
+    errDifferentHeaps: "possible inconsistency of thread local heaps", 
    errUser: "$1", 
    warnCannotOpenFile: "cannot open \'$1\' [CannotOpenFile]",
    warnOctalEscape: "octal escape sequences do not exist; leading zero is ignored [OctalEscape]", 
@@ -324,6 +327,7 @@ const
    warnCommentXIgnored: "comment \'$1\' ignored [CommentXIgnored]", 
    warnXisPassedToProcVar: "\'$1\' is passed to a procvar; deprecated [XisPassedToProcVar]", 
    warnDerefDeprecated: "p^ is deprecated; use p[] instead [DerefDeprecated]",
+    warnAnalysisLoophole: "thread analysis incomplete due to indirect call '$1' [AnalysisLoophole]",
    warnUser: "$1 [User]", 
    hintSuccess: "operation successful [Success]", 
    hintSuccessX: "operation successful ($1 lines compiled; $2 sec total) [SuccessX]", 
@@ -341,11 +345,12 @@ const
    hintUser: "$1 [User]"]

 const 
-  WarningsToStr*: array[0..15, string] = ["CannotOpenFile", "OctalEscape", 
+  WarningsToStr*: array[0..16, string] = ["CannotOpenFile", "OctalEscape", 
    "XIsNeverRead", "XmightNotBeenInit", "CannotWriteMO2", "CannotReadMO2", 
    "Deprecated", "SmallLshouldNotBeUsed", "UnknownMagic", 
    "RedefinitionOfLabel", "UnknownSubstitutionX", "LanguageXNotSupported", 
-    "CommentXIgnored", "XisPassedToProcVar", "DerefDeprecated", "User"]
+    "CommentXIgnored", "XisPassedToProcVar", "DerefDeprecated",
+    "AnalysisLoophole", "User"]

  HintsToStr*: array[0..13, string] = ["Success", "SuccessX", "LineTooLong", 
    "XDeclaredButNotUsed", "ConvToBaseNotNeeded", "ConvFromXtoItselfNotNeeded", 
--- a/compiler/sem.nim
+++ b/compiler/sem.nim
@@ -13,7 +13,8 @@ import
  strutils, hashes, lists, options, lexer, ast, astalgo, trees, treetab, 
  wordrecg, ropes, msgs, os, condsyms, idents, renderer, types, platform, math, 
  magicsys, parser, nversion, nimsets, semdata, evals, semfold, importer, 
-  procfind, lookups, rodread, pragmas, passes, semtypinst, sigmatch, suggest
+  procfind, lookups, rodread, pragmas, passes, semtypinst, sigmatch, suggest,
+  semthreads

 proc semPass*(): TPass
 # implementation
--- a/compiler/semexprs.nim
+++ b/compiler/semexprs.nim
@@ -566,6 +566,12 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode =
  of mSizeOf: result = semSizeof(c, setMs(n, s))
  of mIs: result = semIs(c, setMs(n, s))
  of mEcho: result = semEcho(c, setMs(n, s))
+  of mCreateThread: 
+    result = semDirectOp(c, n, flags)
+    if optThreads in gGlobalOptions:
+      # XXX This analysis should be done as late as possible
+      # (forward references!)
+      semthreads.AnalyseThread(result)
  else: result = semDirectOp(c, n, flags)
  
 proc isTypeExpr(n: PNode): bool = 
--- a/compiler/semthreads.nim
+++ b/compiler/semthreads.nim
@@ -0,0 +1,316 @@
+#
+#
+#           The Nimrod Compiler
+#        (c) Copyright 2011 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Semantic analysis that deals with threads: Possible race conditions should
+## be reported some day.
+##
+## 
+## ========================
+## No heap sharing analysis
+## ========================
+##
+## The only crucial operation that can violate the heap invariants is the
+## write access. The analysis needs to distinguish between 'unknown', 'mine',
+## and 'theirs' memory and pointers. Assignments 'whatever <- unknown' are 
+## invalid, and so are 'theirs <- mine' but not 'mine <- theirs'. Since
+## strings and sequences are heap allocated they are affected too:
+##
+## .. code-block:: nimrod
+##   proc p() = 
+##     global = "alloc this string" # ugh!
+##
+## Thus the analysis is concerned with any type that contains a GC'ed
+## reference...
+## If the type system would distinguish between 'ref' and '!ref' and threads
+## could not have '!ref' as input parameters the analysis could simply need to
+## reject any write access to a global variable which contains GC'ed data.
+## However, '!ref' is not implemented yet and this scheme would be too
+## restrictive anyway.
+##
+## The assignment target is essential for the algorithm: only 
+## write access to heap locations and global variables are critical and need
+## to be checked. Access via 'var' parameters is no problem to analyse since
+## we need the arguments' locations in the analysis.
+##
+## However, this is tricky: 
+##  
+##  var x = globalVar     # 'x' points to 'theirs'
+##  while true:
+##    globalVar = x       # OK: 'theirs <- theirs'
+##    x = "new string"    # ugh: 'x is toUnknown'!
+##
+##  --> Solution: toUnknown is never allowed anywhere!
+##
+##
+## Beware that the same proc might need to be
+## analysed multiple times! Oh and watch out for recursion! Recursion is handled
+## by a stack of symbols that we are processing, if we come back to the same
+## symbol, we have to skip this check (assume no error in the recursive case).
+## However this is wrong. We need to check for the particular combination
+## of (procsym, threadOwner(arg1), threadOwner(arg2), ...)!
+
+import
+  ast, astalgo, strutils, hashes, options, msgs, idents, types, os,
+  renderer, tables
+
+type
+  TThreadOwner = enum
+    toUndefined, # not computed yet 
+    toVoid,      # no return type
+    toNil,       # cycle in computation or nil: can be overwritten
+    toTheirs,    # some other heap
+    toMine       # mine heap
+
+  TCall = object {.pure.}
+    callee: PSym              # what if callee is an indirect call?
+    args: seq[TThreadOwner]
+
+  PProcCtx = ref TProcCtx
+  TProcCtx = object {.pure.}
+    nxt: PProcCtx             # can be stacked
+    mapping: tables.TTable[int, TThreadOwner] # int = symbol ID
+    owner: PSym               # current owner
+
+var
+  computed = tables.initTable[TCall, TThreadOwner]()
+
+proc hash(c: TCall): THash =
+  result = hash(c.callee.id)
+  for a in items(c.args): result = result !& hash(ord(a))
+  result = !$result
+
+proc `==`(a, b: TCall): bool =
+  if a.callee != b.callee: return
+  if a.args.len != b.args.len: return
+  for i in 0..a.args.len-1:
+    if a.args[i] != b.args[i]: return
+  result = true
+
+proc newProcCtx(owner: PSym): PProcCtx =
+  assert owner != nil
+  new(result)
+  result.mapping = tables.InitTable[int, TThreadOwner]()
+  result.owner = owner
+
+proc analyse(c: PProcCtx, n: PNode): TThreadOwner
+
+proc analyseSym(c: PProcCtx, n: PNode): TThreadOwner =
+  var v = n.sym
+  result = c.mapping[v.id]
+  if result != toUndefined: return
+  case v.kind
+  of skVar:
+    if sfGlobal in v.flags:
+      result = if sfThreadVar in v.flags: toMine else: toTheirs
+    else:
+      result = toNil
+  of skTemp, skForVar: result = toNil
+  of skConst: result = toMine
+  of skParam: 
+    result = c.mapping[v.id]
+    if result == toUndefined:
+      InternalError(n.info, "param not set: " & v.name.s)
+  else:
+    result = toNil
+  c.mapping[v.id] = result
+
+proc lvalueSym(n: PNode): PNode =
+  result = n
+  while result.kind in {nkDotExpr, nkBracketExpr, nkDerefExpr, nkHiddenDeref}:
+    result = result.sons[0]
+
+proc writeAccess(c: PProcCtx, n: PNode, owner: TThreadOwner) =
+  if owner notin {toNil, toMine, toTheirs}:
+    InternalError(n.info, "writeAccess: " & $owner)
+  var a = lvalueSym(n)
+  if a.kind == nkSym: 
+    var v = a.sym
+    var lastOwner = analyseSym(c, a)
+    case lastOwner
+    of toNil:
+      c.mapping[v.id] = owner # fine, toNil can be overwritten
+    of toVoid, toUndefined: InternalError(n.info, "writeAccess")
+    of toTheirs, toMine:
+      if lastOwner != owner and owner != toNil:
+        LocalError(n.info, errDifferentHeaps)
+  else:
+    # we could not backtrack to a concrete symbol, but that's fine:
+    var lastOwner = analyseSym(c, n)
+    case lastOwner
+    of toNil: nil # fine, toNil can be overwritten
+    of toVoid, toUndefined: InternalError(n.info, "writeAccess")
+    of toTheirs, toMine:
+      if lastOwner != owner and owner != toNil:
+        LocalError(n.info, errDifferentHeaps)
+
+proc analyseAssign(c: PProcCtx, le, ri: PNode) =
+  var y = analyse(c, ri) # read access; ok
+  writeAccess(c, le, y)
+
+proc analyseAssign(c: PProcCtx, n: PNode) =
+  analyseAssign(c, n.sons[0], n.sons[1])
+
+proc analyseCall(c: PProcCtx, n: PNode): TThreadOwner =
+  var prc = n[0].sym
+  var newCtx = newProcCtx(prc)
+  var call: TCall
+  call.callee = prc
+  newSeq(call.args, n.len-1)
+  for i in 1..n.len-1:
+    call.args[i-1] = analyse(c, n[i])
+  if not computed.hasKey(call):
+    computed[call] = toUndefined # we are computing it
+    for i in 1..n.len-1: 
+      var formal = skipTypes(prc.typ, abstractInst).n.sons[i].sym 
+      newCtx.mapping[formal.id] = call.args[i-1]
+    pushInfoContext(n.info)
+    computed[call] = analyse(newCtx, prc.ast.sons[codePos])
+    popInfoContext()
+  else:
+    # ugh, cycle! We are already computing it but don't know the outcome yet...
+    if prc.typ.sons[0] == nil: result = toVoid
+    else: result = toNil
+
+proc analyseVarTuple(c: PProcCtx, n: PNode) =
+  if n.kind != nkVarTuple: InternalError(n.info, "analyseVarTuple")
+  var L = n.len
+  for i in countup(0, L-3): AnalyseAssign(c, n.sons[i], n.sons[L-1])
+
+proc analyseSingleVar(c: PProcCtx, a: PNode) =
+  if a.sons[2].kind != nkEmpty: AnalyseAssign(c, a.sons[0], a.sons[2])
+
+proc analyseVarSection(c: PProcCtx, n: PNode): TThreadOwner = 
+  for i in countup(0, sonsLen(n) - 1): 
+    var a = n.sons[i]
+    if a.kind == nkCommentStmt: continue 
+    if a.kind == nkIdentDefs: 
+      assert(a.sons[0].kind == nkSym)
+      analyseSingleVar(c, a)
+    else:
+      analyseVarTuple(c, a)
+  result = toVoid
+
+proc analyseConstSection(c: PProcCtx, t: PNode): TThreadOwner =
+  for i in countup(0, sonsLen(t) - 1): 
+    var it = t.sons[i]
+    if it.kind == nkCommentStmt: continue 
+    if it.kind != nkConstDef: InternalError(t.info, "analyseConstSection")
+    if sfFakeConst in it.sons[0].sym.flags: analyseSingleVar(c, it)
+  result = toVoid
+
+template aggregateOwner(result, ana: expr) =
+  var a = ana # eval once
+  if result != a:
+    if result == toNil: result = a
+    else: localError(n.info, errDifferentHeaps)
+
+proc analyse(c: PProcCtx, n: PNode): TThreadOwner =
+  case n.kind
+  of nkCall, nkInfix, nkPrefix, nkPostfix, nkCommand, 
+     nkCallStrLit, nkHiddenCallConv:
+    if n[0].kind != nkSym or n[0].sym.kind != skProc:
+      Message(n.info, warnAnalysisLoophole, renderTree(n))
+      result = toNil
+    else:
+      var prc = n[0].sym
+      # XXX create thread!?
+      case prc.magic
+      of mNew, mNewFinalize, mNewSeq, mSetLengthStr, mSetLengthSeq,
+          mAppendSeqElem, mReset, mAppendStrCh, mAppendStrStr:
+        writeAccess(c, n[1], toMine)
+        result = toVoid
+      of mSwap:
+        var a = analyse(c, n[2])
+        writeAccess(c, n[1], a)
+        writeAccess(c, n[2], a)
+        result = toVoid
+      else:
+        result = analyseCall(c, n)
+  of nkAsgn, nkFastAsgn:
+    analyseAssign(c, n)
+    result = toVoid
+  of nkSym: result = analyseSym(c, n)
+  of nkEmpty, nkNone: result = toVoid
+  of nkNilLit, nkCharLit..nkFloat64Lit: result = toNil
+  of nkStrLit..nkTripleStrLit: result = toMine
+  of nkDotExpr, nkBracketExpr, nkDerefExpr, nkHiddenDeref:
+    # field access:
+    # pointer deref or array access:
+    result = analyse(c, n.sons[0])    
+  of nkBind: result = analyse(c, n.sons[0])
+  of nkPar, nkCurly, nkBracket:
+    # container construction:
+    result = toNil # nothing until later
+    for i in 0..n.len-1: aggregateOwner(result, analyse(c, n[i]))
+  of nkAddr, nkHiddenAddr:
+    var a = lvalueSym(n)
+    if a.kind == nkSym:
+      result = analyseSym(c, a)
+      assert result in {toNil, toMine, toTheirs}
+      if result == toNil:
+        # assume toMine here for consistency:
+        c.mapping[a.sym.id] = toMine
+        result = toMine
+    else:
+      # should never really happen:
+      result = analyse(c, n.sons[0])
+  of nkIfExpr: 
+    result = toNil
+    for i in countup(0, sonsLen(n) - 1):
+      var it = n.sons[i]
+      case it.kind
+      of nkElifExpr:
+        discard analyse(c, it.sons[0])
+        aggregateOwner(result, analyse(c, it.sons[1]))
+      of nkElseExpr:
+        aggregateOwner(result, analyse(c, it.sons[0]))
+      else: internalError(n.info, "analyseIfExpr()")
+  of nkStmtListExpr, nkBlockExpr:
+    var n = if n.kind == nkBlockExpr: n.sons[1] else: n
+    var L = sonsLen(n)
+    for i in countup(0, L-2): discard analyse(c, n.sons[i])
+    if L > 0: result = analyse(c, n.sons[L-1])
+    else: result = toVoid
+  of nkHiddenStdConv, nkHiddenSubConv, nkConv, nkCast: 
+    result = analyse(c, n.sons[1])
+  of nkStringToCString, nkCStringToString, nkChckRangeF, nkChckRange64,
+     nkChckRange, nkCheckedFieldExpr, nkPassAsOpenArray, nkObjDownConv, 
+     nkObjUpConv:
+    result = analyse(c, n.sons[0])
+  of nkRaiseStmt:
+    var a = analyse(c, n.sons[0])
+    if a != toMine: LocalError(n.info, errDifferentHeaps)
+    result = toVoid
+  of nkVarSection: result = analyseVarSection(c, n)
+  of nkConstSection: result = analyseConstSection(c, n)
+  of nkTypeSection, nkCommentStmt: result = toVoid
+  of nkIfStmt, nkWhileStmt, nkTryStmt, nkCaseStmt, nkStmtList, nkBlockStmt:
+    for i in 0 .. <n.len: discard analyse(c, n[i])
+    result = toVoid
+  of nkBreakStmt, nkContinueStmt: result = toVoid
+  of nkReturnStmt, nkDiscardStmt: 
+    if n.sons[0].kind != nkEmpty: result = analyse(c, n.sons[0])
+    else: result = toVoid
+  of nkAsmStmt, nkPragma, nkIteratorDef, nkProcDef, nkMethodDef,
+     nkConverterDef, nkMacroDef, nkTemplateDef: 
+      result = toVoid
+  else: InternalError(n.info, "analysis not implemented for: " & $n.kind)
+
+proc AnalyseThread*(threadCreation: PNode) =
+  var n = threadCreation
+  # thread proc is second param of ``createThread``:
+  if n[2].kind != nkSym or n[2].sym.kind != skProc:
+    Message(n.info, warnAnalysisLoophole, renderTree(n))
+    return
+  var prc = n[2].sym
+  var c = newProcCtx(prc)
+  var formal = skipTypes(prc.typ, abstractInst).n.sons[1].sym 
+  c.mapping[formal.id] = toTheirs # thread receives foreign data!
+  discard analyse(c, prc.ast.sons[codePos])
+
--- a/gpl.html
+++ b/gpl.html
@@ -403,91 +403,4 @@ POSSIBILITY OF SUCH DAMAGES.


 <h2>END OF TERMS AND CONDITIONS</h2>
-
-
-
-<h2><a name="SEC4" href="gpl.html#TOC4">How to Apply These Terms to Your New Programs</a></h2>
-
-<p>
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-</p>
-<p>
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-</p>
-
-<pre>
-<var>one line to give the program's name and an idea of what it does.</var>
-Copyright (C) <var>yyyy</var>  <var>name of author</var>
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-</pre>
-
-<p>
-Also add information on how to contact you by electronic and paper mail.
-
-</p>
-<p>
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-</p>
-
-<pre>
-Gnomovision version 69, Copyright (C) <var>year</var> <var>name of author</var>
-Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
-type `show w'.  This is free software, and you are welcome
-to redistribute it under certain conditions; type `show c' 
-for details.
-</pre>
-
-<p>
-The hypothetical commands <samp>`show w'</samp> and <samp>`show c'</samp> should show
-the appropriate parts of the General Public License.  Of course, the
-commands you use may be called something other than <samp>`show w'</samp> and
-<samp>`show c'</samp>; they could even be mouse-clicks or menu items--whatever
-suits your program.
-
-</p>
-<p>
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary.  Here is a sample; alter the names:
-
-</p>
-
-<pre>
-Yoyodyne, Inc., hereby disclaims all copyright
-interest in the program `Gnomovision'
-(which makes passes at compilers) written 
-by James Hacker.
-
-<var>signature of Ty Coon</var>, 1 April 1989
-Ty Coon, President of Vice
-</pre>
-
-<p>
-This General Public License does not permit incorporating your program into
-proprietary programs.  If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library.  If this is what you want to do, use the GNU Library General
-Public License instead of this License.
 </body></html>
--- a/lib/system/threads.nim
+++ b/lib/system/threads.nim
@@ -347,7 +347,8 @@ proc destroyThread*[TParam](t: var TThread[TParam]) {.inline.} =
 proc createThread*[TParam](t: var TThread[TParam], 
                           tp: proc (param: TParam), 
                           param: TParam,
-                           stackSize = 1024*256*sizeof(int)) = 
+                           stackSize = 1024*256*sizeof(int)) {.
+                           magic: "CreateThread".} = 
  ## creates a new thread `t` and starts its execution. Entry point is the
  ## proc `tp`. `param` is passed to `tp`.
  t.data = param
--- a/start.bat
+++ b/start.bat
@@ -3,3 +3,4 @@ REM COLOR 0A
 SET NIMRODPATH=.
 SET PATH=%NIMRODPATH%\bin;%NIMRODPATH%\dist\mingw\bin;%PATH%
 cmd
+
--- a/todo.txt
+++ b/todo.txt
@@ -1,7 +1,4 @@
 * codegen for threadvars
-* two issues for thread local heaps:
-  - must prevent to construct a data structure that contains memory 
-    from different heaps:  n.next = otherHeapPtr

 * add --deadlock_prevention:on|off switch? timeout for locks?
 * test the sort implementation again