progress for the 'parallel' statement

2026-06-05 03:14:08 +00:00 · 2014-05-14 01:51:44 +02:00
parent 6195dbe491
commit c43e8df90c
10 changed files with 122 additions and 94 deletions
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -605,9 +605,9 @@ const
  # thus cannot be overloaded (also documented in the spec!):
  SpecialSemMagics* = {
    mDefined, mDefinedInScope, mCompiles, mLow, mHigh, mSizeOf, mIs, mOf, 
-    mEcho, mShallowCopy, mExpandToAst}
+    mEcho, mShallowCopy, mExpandToAst, mParallel}

-type 
+type
  PNode* = ref TNode
  TNodeSeq* = seq[PNode]
  PType* = ref TType
--- a/compiler/ccgexprs.nim
+++ b/compiler/ccgexprs.nim
@@ -1638,6 +1638,9 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) =
  of mSpawn:
    let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1])
    expr(p, n, d)
+  of mParallel:
+    let n = semparallel.liftParallel(p.module.module, e)
+    expr(p, n, d)
  else: internalError(e.info, "genMagicExpr: " & $op)

 proc genConstExpr(p: BProc, n: PNode): PRope
--- a/compiler/cgen.nim
+++ b/compiler/cgen.nim
@@ -14,7 +14,8 @@ import
  options, intsets,
  nversion, nimsets, msgs, crc, bitsets, idents, lists, types, ccgutils, os,
  times, ropes, math, passes, rodread, wordrecg, treetab, cgmeth,
-  rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings
+  rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings,
+  semparallel

 when options.hasTinyCBackend:
  import tccgen
--- a/compiler/guards.nim
+++ b/compiler/guards.nim
@@ -160,13 +160,13 @@ proc buildCall(op: PSym; a: PNode): PNode =
  result.sons[1] = a

 proc buildCall(op: PSym; a, b: PNode): PNode =
-  result = newNodeI(nkCall, a.info, 3)
+  result = newNodeI(nkInfix, a.info, 3)
  result.sons[0] = newSymNode(op)
  result.sons[1] = a
  result.sons[2] = b

 proc `+@`*(a: PNode; b: BiggestInt): PNode =
-  opAdd.buildCall(a, nkIntLit.newIntNode(b))
+  (if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a)

 proc `|+|`(a, b: PNode): PNode =
  result = copyNode(a)
--- a/compiler/lowerings.nim
+++ b/compiler/lowerings.nim
@@ -68,6 +68,7 @@ proc addField*(obj: PType; s: PSym) =
  var field = newSym(skField, getIdent(s.name.s & $s.id), s.owner, s.info)
  let t = skipIntLit(s.typ)
  field.typ = t
+  assert t.kind != tyStmt
  field.position = sonsLen(obj.n)
  addSon(obj.n, newSymNode(field))

--- a/compiler/sem.nim
+++ b/compiler/sem.nim
@@ -15,7 +15,8 @@ import
  magicsys, parser, nversion, nimsets, semfold, importer,
  procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch,
  intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting,
-  evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity
+  evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity,
+  semparallel

 # implementation

--- a/compiler/semexprs.nim
+++ b/compiler/semexprs.nim
@@ -1388,11 +1388,6 @@ proc semDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PNode =
  result.info = n.info
  result.typ = getSysType(tyBool)

-proc setMs(n: PNode, s: PSym): PNode = 
-  result = n
-  n.sons[0] = newSymNode(s)
-  n.sons[0].info = n.info
-
 proc expectMacroOrTemplateCall(c: PContext, n: PNode): PSym =
  ## The argument to the proc should be nkCall(...) or similar
  ## Returns the macro/template symbol
@@ -1584,6 +1579,11 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode =
  else:
    result = semDirectOp(c, n, flags)

+proc setMs(n: PNode, s: PSym): PNode = 
+  result = n
+  n.sons[0] = newSymNode(s)
+  n.sons[0].info = n.info
+
 proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = 
  # this is a hotspot in the compiler!
  # DON'T forget to update ast.SpecialSemMagics if you add a magic here!
@@ -1605,6 +1605,11 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode =
    checkSonsLen(n, 2)
    result = newStrNodeT(renderTree(n[1], {renderNoComments}), n)
    result.typ = getSysType(tyString)
+  of mParallel:
+    result = setMs(n, s)
+    var x = n.lastSon
+    if x.kind == nkDo: x = x.sons[bodyPos]
+    result.sons[1] = semStmt(c, x)
  else: result = semDirectOp(c, n, flags)

 proc semWhen(c: PContext, n: PNode, semCheck = true): PNode =
--- a/compiler/semmagic.nim
+++ b/compiler/semmagic.nim
@@ -1,7 +1,7 @@
 #
 #
 #           The Nimrod Compiler
-#        (c) Copyright 2013 Andreas Rumpf
+#        (c) Copyright 2014 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
--- a/compiler/semparallel.nim
+++ b/compiler/semparallel.nim
@@ -19,7 +19,11 @@
 # - passed slices need to be ensured to be disjoint (+)
 # - output slices need special logic

-import lowerings, guards, sempass2
+import
+  ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs,
+  renderer
+from trees import getMagic
+from strutils import `%`

 discard """

@@ -75,12 +79,17 @@ proc initAnalysisCtx(): AnalysisCtx =
  result.args = @[]
  result.guards = @[]

-proc getSlot(c: var AnalysisCtx; s: PSym): ptr MonotonicVar =
-  var L = c.locals.len
-  for i in 0.. <L:
-    if c.locals[i].v == s: return addr(c.locals[i])
+proc lookupSlot(c: AnalysisCtx; s: PSym): int =
+  for i in 0.. <c.locals.len:
+    if c.locals[i].v == s: return i
+  return -1
+
+proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar =
+  let s = lookupSlot(c, v)
+  if s >= 0: return addr(c.locals[s])
+  let L = c.locals.len
  c.locals.setLen(L+1)
-  c.locals[L].v = s
+  c.locals[L].v = v
  return addr(c.locals[L])

 proc getRoot(n: PNode): PSym =
@@ -110,25 +119,28 @@ proc gatherArgs(c: var AnalysisCtx; n: PNode) =
        c.args.add root
    gatherArgs(c, n[i])

-proc isLocal(s: PSym): bool = 
-  s.kind in {skResult, skTemp, skForVar, skVar, skLet} and
-        {sfAddrTaken, sfGlobal} * s.flags == {}
+proc isLocal(n: PNode): bool =
+  n.kind == nkSym and (let s = n.sym;
+    s.kind in {skResult, skTemp, skForVar, skVar, skLet} and
+          {sfAddrTaken, sfGlobal} * s.flags == {})

-proc checkLocal(c: var AnalysisCtx; n: PNode) =
-  if n.kind == nkSym and isLocal(n.sym):
-    let slot = c.getSlot(n[1].sym)
-    if slot.stride != nil:
+proc checkLocal(c: AnalysisCtx; n: PNode) =
+  if isLocal(n):
+    let s = c.lookupSlot(n.sym)
+    if s >= 0 and c.locals[s].stride != nil:
      localError(n.info, "invalid usage of counter after increment")
  else:
    for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i])

+template `?`(x): expr = x.renderTree
+
 proc checkLe(c: AnalysisCtx; a, b: PNode) =
  case proveLe(c.guards, a, b)
-  of impUnkown:
-    localError(n.info, "cannot prove: " & a.renderTree & " <= " & b.renderTree)
+  of impUnknown:
+    localError(a.info, "cannot prove: " & ?a & " <= " & ?b)
  of impYes: discard
  of impNo:
-    localError(n.info, "can prove: " & a.renderTree & " > " & b.renderTree)
+    localError(a.info, "can prove: " & ?a & " > " & ?b)

 proc checkBounds(c: AnalysisCtx; arr, idx: PNode) =
  checkLe(c, arr.lowBound, idx)
@@ -139,11 +151,8 @@ proc addLowerBoundAsFacts(c: var AnalysisCtx) =
    if not v.blacklisted:
      c.guards.addFactLe(v.lower, newSymNode(v.v))

-proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) =
+proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) =
  checkLocal(c, n)
-  let le = n.sons[le]
-  let ri = n.sons[ri]
-  let x = n.sons[x]
  # perform static bounds checking here; and not later!
  let oldState = c.guards.len
  addLowerBoundAsFacts(c)
@@ -152,17 +161,15 @@ proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) =
  c.guards.setLen(oldState)
  c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0))

-template `?`(x): expr = x.renderTree
-
 proc overlap(m: TModel; x,y,c,d: PNode) =
-  #  X..Y and C..D overlap iff (X <= D and Y >= C)
+  #  X..Y and C..D overlap iff (X <= D and C <= Y)
  case proveLe(m, x, d)
-  of impUnkown:
+  of impUnknown:
    localError(x.info,
      "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" %
        [?x, ?d, ?x, ?y, ?c, ?d])
  of impYes:
-    case proveLe(m, y, c)
+    case proveLe(m, c, y)
    of impUnknown:
      localError(x.info,
        "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" %
@@ -175,12 +182,12 @@ proc overlap(m: TModel; x,y,c,d: PNode) =
 proc stride(c: AnalysisCtx; n: PNode): BiggestInt =
  # note: 0 if it cannot be determined is just right because then
  # we analyse 'i..i' and 'i+0 .. i+0' and these are not disjoint!
-  if n.kind == nkSym and isLocal(n.sym):
-    let slot = c.getSlot(n[1].sym)
-    if slot.stride != nil:
-      result = slot.stride.intVal
+  if isLocal(n):
+    let s = c.lookupSlot(n.sym)
+    if s >= 0 and c.locals[s].stride != nil:
+      result = c.locals[s].stride.intVal
  else:
-    for i in 0 .. <n.safeLen: inc(result, stride(c, n.sons[i]))
+    for i in 0 .. <n.safeLen: result += stride(c, n.sons[i])

 proc checkSlicesAreDisjoint(c: var AnalysisCtx) =
  # this is the only thing that we need to perform after we have traversed
@@ -209,10 +216,10 @@ proc checkSlicesAreDisjoint(c: var AnalysisCtx) =
  # be feasible for many useful examples. Instead we attach the slice to
  # a spawn and if the attached spawns differ, we bail out:
  for i in 0 .. high(c.slices):
-    for j in 0 .. high(c.slices):
+    for j in i+1 .. high(c.slices):
      let x = c.slices[i]
      let y = c.slices[j]
-      if i != j and x.spawnId != y.spawnId and guards.sameTree(x.x, y.x):
+      if x.spawnId != y.spawnId and guards.sameTree(x.x, y.x):
        if not x.inLoop and not y.inLoop:
          overlap(c.guards, x.a, x.b, y.a, y.b)
        else:
@@ -233,6 +240,8 @@ proc min(a, b: PNode): PNode =
  elif a.intVal < b.intVal: result = a
  else: result = b

+proc fromSystem(op: PSym): bool = sfSystemModule in getModule(op).flags
+
 proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) =
  if op.magic == mSpawn:
    inc c.spawns
@@ -241,18 +250,18 @@ proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) =
    gatherArgs(c, n[1])
    analyseSons(c, n)
    c.currentSpawnId = oldSpawnId
-  elif op.magic == mInc or (op.name.s == "+=" and sfSystemModule in op.owner.flags):
-    if n[1].kind == nkSym and n[1].isLocal:
-      let incr = n[1].skipConv
+  elif op.magic == mInc or (op.name.s == "+=" and op.fromSystem):
+    if n[1].isLocal:
+      let incr = n[2].skipConv
      if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0:
        let slot = c.getSlot(n[1].sym)
        slot.stride = min(slot.stride, incr)
    analyseSons(c, n)
-  elif op.name.s == "[]" and sfSystemModule in op.owner.flags:
-    c.addSlice(n, 1, 2, 3)
+  elif op.name.s == "[]" and op.fromSystem:
+    c.addSlice(n, n[1], n[2][1], n[2][2])
    analyseSons(c, n)
-  elif op.name.s == "[]=" and sfSystemModule in op.owner.flags:
-    c.addSlice(n, 1, 2, 3)
+  elif op.name.s == "[]=" and op.fromSystem:
+    c.addSlice(n, n[1], n[2][1], n[2][2])
    analyseSons(c, n)
  else:
    analyseSons(c, n)
@@ -296,18 +305,18 @@ proc analyse(c: var AnalysisCtx; n: PNode) =
  of nkAsgn, nkFastAsgn:
    # since we already ensure sfAddrTaken is not in s.flags, we only need to
    # prevent direct assignments to the monotonic variable:
-    if n[0].kind == nkSym and n[0].isLocal:
-      let slot = c.getSlot(it[j].sym)
+    if n[0].isLocal:
+      let slot = c.getSlot(n[0].sym)
      slot.blackListed = true
-    invalidateFacts(c.guards, n.sons[0])
+    invalidateFacts(c.guards, n[0])
    analyseSons(c, n)
-    addAsgnFact(c.guards, n.sons[0], n.sons[1])
+    addAsgnFact(c.guards, n[0], n[1])
  of nkCallKinds:
    # direct call:
    if n[0].kind == nkSym: analyseCall(c, n, n[0].sym)
    else: analyseSons(c, n)
-  of nkBracket:
-    c.addSlice(n, 0, 1, 1)
+  of nkBracketExpr:
+    c.addSlice(n, n[0], n[1], n[1])
    analyseSons(c, n)
  of nkReturnStmt, nkRaiseStmt, nkTryStmt:
    localError(n.info, "invalid control flow for 'parallel'")
@@ -315,14 +324,14 @@ proc analyse(c: var AnalysisCtx; n: PNode) =
    # or maybe we should generate a 'try' XXX
  of nkVarSection:
    for it in n:
-      if it.sons[it.len-1].kind != nkEmpty:
+      let value = it.lastSon
+      if value.kind != nkEmpty:
        for j in 0 .. it.len-3:
-          if it[j].kind == nkSym and it[j].isLocal:
+          if it[j].isLocal:
            let slot = c.getSlot(it[j].sym)
-            if slot.lower.isNil: slot.lower = it.sons[it.len-1]
+            if slot.lower.isNil: slot.lower = value
            else: internalError(it.info, "slot already has a lower bound")
-    analyseSons(c, n)
-
+        analyse(c, value)
  of nkCaseStmt: analyseCase(c, n)
  of nkIfStmt, nkIfExpr: analyseIf(c, n)
  of nkWhileStmt:
@@ -340,7 +349,7 @@ proc analyse(c: var AnalysisCtx; n: PNode) =
      setLen(c.locals, oldState)
      setLen(c.guards, oldFacts)
      # we know after the loop the negation holds:
-      if not containsNode(n.sons[1], nkBreakStmt):
+      if not hasSubnodeWith(n.sons[1], nkBreakStmt):
        addFactNeg(c.guards, n.sons[0])
    dec c.inLoop
  of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef,
@@ -350,33 +359,42 @@ proc analyse(c: var AnalysisCtx; n: PNode) =
    analyseSons(c, n)

 proc transformSlices(n: PNode): PNode =
-  if n.kind in nkCalls and n[0].kind == nkSym:
+  if n.kind in nkCallKinds and n[0].kind == nkSym:
    let op = n[0].sym
-    if op.name.s == "[]" and sfSystemModule in op.owner.flags:
-      result = copyTree(n)
-      result.sons[0] = opSlice
+    if op.name.s == "[]" and op.fromSystem:
+      result = copyNode(n)
+      result.add opSlice.newSymNode
+      result.add n[1]
+      result.add n[2][1]
+      result.add n[2][2]
      return result
  if n.safeLen > 0:
-    result = copyNode(n.kind, n.info, n.len)
+    result = copyNode(n)
    for i in 0 .. < n.len:
-      result.sons[i] = transformSlices(n.sons[i])
+      result.add transformSlices(n.sons[i])
  else:
    result = n

 proc transformSpawn(owner: PSym; n, barrier: PNode): PNode =
-  if n.kind in nkCalls:
+  if n.kind in nkCallKinds:
    if n[0].kind == nkSym:
      let op = n[0].sym
      if op.magic == mSpawn:
        result = transformSlices(n)
-        return wrapProcForSpawn(owner, result, barrier)
+        return wrapProcForSpawn(owner, result[1], barrier)
  elif n.safeLen > 0:
-    result = copyNode(n.kind, n.info, n.len)
+    result = copyNode(n)
    for i in 0 .. < n.len:
-      result.sons[i] = transformSpawn(owner, n.sons[i], barrier)
+      result.add transformSpawn(owner, n.sons[i], barrier)
  else:
    result = n

+proc checkArgs(a: var AnalysisCtx; n: PNode) =
+  discard "too implement"
+
+proc generateAliasChecks(a: AnalysisCtx; result: PNode) =
+  discard "too implement"
+
 proc liftParallel*(owner: PSym; n: PNode): PNode =
  # this needs to be called after the 'for' loop elimination

@@ -390,22 +408,17 @@ proc liftParallel*(owner: PSym; n: PNode): PNode =
  analyse(a, body)
  if a.spawns == 0:
    localError(n.info, "'parallel' section without 'spawn'")
-  checkSlices(a)
+  checkSlicesAreDisjoint(a)
  checkArgs(a, body)

  var varSection = newNodeI(nkVarSection, n.info)
-  var temp = newSym(skTemp, "barrier", owner, n.info)
+  var temp = newSym(skTemp, getIdent"barrier", owner, n.info)
  temp.typ = magicsys.getCompilerProc("Barrier").typ
  incl(temp.flags, sfFromGeneric)
+  let tempNode = newSymNode(temp)
+  varSection.addVar tempNode

-  var vpart = newNodeI(nkIdentDefs, n.info, 3)
-  vpart.sons[0] = newSymNode(temp)
-  vpart.sons[1] = ast.emptyNode
-  vpart.sons[2] = indirectAccess(castExpr, field, n.info)
-  varSection.add vpart
-
-  barrier = genAddrOf(vpart[0])
-
+  let barrier = genAddrOf(tempNode)
  result = newNodeI(nkStmtList, n.info)
  generateAliasChecks(a, result)
  result.add varSection
--- a/lib/pure/concurrency/threadpool.nim
+++ b/lib/pure/concurrency/threadpool.nim
@@ -74,12 +74,20 @@ type
    data: pointer
    ready: bool # put it here for correct alignment!
    initialized: bool # whether it has even been initialized
+    shutdown: bool # the pool requests to shut down this worker thread

 proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
  let w = cast[ptr Worker](p)
  signal(w.taskStarted)

+const
+  MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads
+                           ## should be good enough for anybody ;-)
+
 var
+  currentPoolSize: int
+  maxPoolSize = MaxThreadPoolSize
+  minPoolSize = 4
  gSomeReady = createCondVar()
  readyWorker: ptr Worker

@@ -91,15 +99,9 @@ proc slave(w: ptr Worker) {.thread.} =
    await(w.taskArrived)
    assert(not w.ready)
    w.f(w, w.data)
-
-const
-  MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads
-                           ## should be good enough for anybody ;-)
-
-var
-  currentPoolSize: int
-  maxPoolSize = MaxThreadPoolSize
-  minPoolSize = 4
+    if w.shutdown:
+      w.shutdown = false
+      atomicDec currentPoolSize

 proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) =
  ## sets the minimal thread pool size. The default value of this is 4.
@@ -183,13 +185,15 @@ proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
            if not workersData[currentPoolSize].initialized:
              activateThread(currentPoolSize)
            let w = addr(workersData[currentPoolSize])
-            inc currentPoolSize
+            atomicInc currentPoolSize
            if selectWorker(w, fn, data):
              release(stateLock)
              return
            # else we didn't succeed but some other thread, so do nothing.
        of doShutdownThread:
-          if currentPoolSize > minPoolSize: dec currentPoolSize
+          if currentPoolSize > minPoolSize:
+            let w = addr(workersData[currentPoolSize-1])
+            w.shutdown = true
          # we don't free anything here. Too dangerous.
        release(stateLock)
      # else the acquire failed, but this means some