From 6195dbe491ccd864c5dcb59f87826291ac1f1ff4 Mon Sep 17 00:00:00 2001 From: Araq Date: Mon, 12 May 2014 11:12:37 +0200 Subject: [PATCH 01/13] initial non-compiling version of 'parallel' --- compiler/guards.nim | 191 +++++++++++-- compiler/lowerings.nim | 22 +- compiler/semparallel.nim | 414 ++++++++++++++++++++++++++++ compiler/sempass2.nim | 4 +- compiler/vm.nim | 5 +- config/nimrod.cfg | 1 + lib/pure/concurrency/cpuinfo.nim | 58 ++++ lib/pure/concurrency/cpuload.nim | 96 +++++++ lib/pure/concurrency/threadpool.nim | 210 ++++++++++++++ lib/pure/osproc.nim | 38 +-- lib/system.nim | 3 - lib/system/atomics.nim | 31 ++- lib/system/sysspawn.nim | 47 ++-- tests/system/tsysspawn.nim | 10 +- tests/system/tsysspawnbadarg.nim | 2 + web/news.txt | 17 ++ 16 files changed, 1058 insertions(+), 91 deletions(-) create mode 100644 compiler/semparallel.nim create mode 100644 lib/pure/concurrency/cpuinfo.nim create mode 100644 lib/pure/concurrency/cpuload.nim create mode 100644 lib/pure/concurrency/threadpool.nim diff --git a/compiler/guards.nim b/compiler/guards.nim index f475f50680..57cd73b11a 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -9,7 +9,8 @@ ## This module implements the 'implies' relation for guards. -import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents +import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents, + saturate const someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc, @@ -25,6 +26,17 @@ const someIn = {mInRange, mInSet} + someHigh = {mHigh} + # we don't list unsigned here because wrap around semantics suck for + # proving anything: + someAdd = {mAddI, mAddI64, mAddF64, mSucc} + someSub = {mSubI, mSubI64, mSubF64, mPred} + someMul = {mMulI, mMulI64, mMulF64} + someDiv = {mDivI, mDivI64, mDivF64} + someMod = {mModI, mModI64} + someMax = {mMaxI, mMaxI64, mMaxF64} + someMin = {mMinI, mMinI64, mMinF64} + proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit} proc isLocation(n: PNode): bool = not n.isValue @@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool = proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true) -proc getMagicOp(name: string, m: TMagic): PSym = +proc createMagic*(name: string, m: TMagic): PSym = result = newSym(skProc, getIdent(name), nil, unknownLineInfo()) result.magic = m let - opLe = getMagicOp("<=", mLeI) - opLt = getMagicOp("<", mLtI) - opAnd = getMagicOp("and", mAnd) - opOr = getMagicOp("or", mOr) - opNot = getMagicOp("not", mNot) - opIsNil = getMagicOp("isnil", mIsNil) - opContains = getMagicOp("contains", mInSet) - opEq = getMagicOp("==", mEqI) + opLe = createMagic("<=", mLeI) + opLt = createMagic("<", mLtI) + opAnd = createMagic("and", mAnd) + opOr = createMagic("or", mOr) + opNot = createMagic("not", mNot) + opIsNil = createMagic("isnil", mIsNil) + opContains = createMagic("contains", mInSet) + opEq = createMagic("==", mEqI) + opAdd = createMagic("+", mAddI) + opSub = createMagic("-", mSubI) + opMul = createMagic("*", mMulI) + opDiv = createMagic("div", mDivI) + opLen = createMagic("len", mLengthSeq) proc swapArgs(fact: PNode, newOp: PSym): PNode = result = newNodeI(nkCall, fact.info, 3) @@ -137,17 +154,118 @@ proc neg(n: PNode): PNode = result.sons[0] = newSymNode(opNot) result.sons[1] = n -proc buildIsNil(arg: PNode): PNode = - result = newNodeI(nkCall, arg.info, 2) - result.sons[0] = newSymNode(opIsNil) - result.sons[1] = arg +proc buildCall(op: PSym; a: PNode): PNode = + result = newNodeI(nkCall, a.info, 2) + result.sons[0] = newSymNode(op) + result.sons[1] = a + +proc buildCall(op: PSym; a, b: PNode): PNode = + result = newNodeI(nkCall, a.info, 3) + result.sons[0] = newSymNode(op) + result.sons[1] = a + result.sons[2] = b + +proc `+@`*(a: PNode; b: BiggestInt): PNode = + opAdd.buildCall(a, nkIntLit.newIntNode(b)) + +proc `|+|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal + else: result.floatVal = a.floatVal + b.floatVal + +proc `|*|`(a, b: PNode): PNode = + result = copyNode(a) + if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal + else: result.floatVal = a.floatVal * b.floatVal + +proc zero(): PNode = nkIntLit.newIntNode(0) +proc one(): PNode = nkIntLit.newIntNode(1) +proc minusOne(): PNode = nkIntLit.newIntNode(-1) + +proc lowBound*(x: PNode): PNode = nkIntLit.newIntNode(firstOrd(x.typ)) +proc highBound*(x: PNode): PNode = + if x.typ.skipTypes(abstractInst).kind == tyArray: + nkIntLit.newIntNode(lastOrd(x.typ)) + else: + opAdd.buildCall(opLen.buildCall(x), minusOne()) + +proc canon*(n: PNode): PNode = + # XXX for now only the new code in 'semparallel' uses this + if n.safeLen >= 1: + result = newNodeI(n.kind, n.info, n.len) + for i in 0 .. < n.safeLen: + result.sons[i] = canon(n.sons[i]) + else: + result = n + case result.getMagic + of someEq, someAdd, someMul, someMin, someMax: + # these are symmetric; put value as last: + if result.sons[1].isValue and not result.sons[2].isValue: + result = swapArgs(result, result.sons[0].sym) + # (4 + foo) + 2 --> (foo + 4) + 2 + of someHigh: + # high == len+(-1) + result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne()) + of mUnaryMinusI, mUnaryMinusI64: + result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1)) + of someSub: + # x - 4 --> x + (-4) + var b = result[2] + if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt): + b = copyNode(b) + b.intVal = -b.intVal + result = buildCall(opAdd, result[1], b) + elif b.kind in {nkFloatLit..nkFloat64Lit}: + b = copyNode(b) + b.floatVal = -b.floatVal + result = buildCall(opAdd, result[1], b) + of someLen: + result.sons[0] = opLen.newSymNode + else: discard + + # re-association: + # (foo+5)+5 --> foo+10; same for '*' + case result.getMagic + of someAdd: + if result[2].isValue and + result[1].getMagic in someAdd and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2]) + of someMul: + if result[2].isValue and + result[1].getMagic in someMul and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2]) + else: discard + + # most important rule: (x-4) < a.len --> x < a.len+4 + case result.getMagic + of someLe, someLt: + let x = result[1] + let y = result[2] + if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and + isLetLocation(x[1], true): + case x.getMagic + of someSub: + result = buildCall(result[0].sym, x[1], opAdd.buildCall(y, x[2])) + of someAdd: + result = buildCall(result[0].sym, x[1], opSub.buildCall(y, x[2])) + else: discard + elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and + isLetLocation(y[1], true): + # a.len < x-3 + case y.getMagic + of someSub: + result = buildCall(result[0].sym, y[1], opAdd.buildCall(x, y[2])) + of someAdd: + result = buildCall(result[0].sym, y[1], opSub.buildCall(x, y[2])) + else: discard + else: discard proc usefulFact(n: PNode): PNode = case n.getMagic of someEq: if skipConv(n.sons[2]).kind == nkNilLit and ( isLetLocation(n.sons[1], false) or isVar(n.sons[1])): - result = buildIsNil(n.sons[1]) + result = opIsNil.buildCall(n.sons[1]) else: if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true): # XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1' @@ -217,7 +335,7 @@ proc addFactNeg*(m: var TModel, n: PNode) = let n = n.neg if n != nil: addFact(m, n) -proc sameTree(a, b: PNode): bool = +proc sameTree*(a, b: PNode): bool = result = false if a == b: result = true @@ -519,7 +637,46 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication = if result != impUnknown: return proc impliesNotNil*(facts: TModel, arg: PNode): TImplication = - result = doesImply(facts, buildIsNil(arg).neg) + result = doesImply(facts, opIsNil.buildCall(arg).neg) + +proc proveLe*(m: TModel; a, b: PNode): TImplication = + let res = canon(opLe.buildCall(a, b)) + # we hardcode lots of axioms here: + let a = res[1] + let b = res[2] + # 0 <= 3 + if a.isValue and b.isValue: + return if leValue(a, b): impYes else: impNo + + # use type information too: x <= 4 iff high(x) <= 4 + if b.isValue and a.typ != nil and a.typ.isOrdinalType: + if lastOrd(a.typ) <= b.intVal: return impYes + # 3 <= x iff low(x) <= 3 + if a.isValue and b.typ != nil and b.typ.isOrdinalType: + if firstOrd(b.typ) <= a.intVal: return impYes + + # x <= x + if sameTree(a, b): return impYes + + # x <= x+c iff 0 <= c + if b.getMagic in someAdd and sameTree(a, b[1]): + return proveLe(m, zero(), b[2]) + + # x <= x*c if 1 <= c and 0 <= x: + if b.getMagic in someMul and sameTree(a, b[1]): + if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes: + return impYes + + # x div c <= x if 1 <= c and 0 <= x: + if a.getMagic in someDiv and sameTree(a[1], b): + if proveLe(m, one(), a[2]) == impYes and proveLe(m, zero(), b) == impYes: + return impYes + + # use the knowledge base: + return doesImply(m, res) + +proc addFactLe*(m: var TModel; a, b: PNode) = + m.add canon(opLe.buildCall(a, b)) proc settype(n: PNode): PType = result = newType(tySet, n.typ.owner) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 1b9e5fe0f4..93bfd84257 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -114,11 +114,15 @@ proc callCodegenProc*(name: string, arg1: PNode; if arg3 != nil: result.add arg3 proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call: PNode): PSym = + varSection, call, barrier: PNode): PSym = var body = newNodeI(nkStmtList, f.info) body.add varSection + if barrier != nil: + body.add callCodeGenProc("barrierEnter", barrier) body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam)) body.add call + if barrier != nil: + body.add callCodeGenProc("barrierLeave", barrier) var params = newNodeI(nkFormalParams, f.info) params.add emptyNode @@ -146,7 +150,7 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = +proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = result = newNodeI(nkStmtList, n.info) if n.kind notin nkCallKinds or not n.typ.isEmptyType: localError(n.info, "'spawn' takes a call expression of type void") @@ -162,6 +166,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = threadParam.typ = ptrType argsParam.typ = ptrType argsParam.position = 1 + var objType = createObj(owner, n.info) incl(objType.flags, tfFinal) let castExpr = createCastExpr(argsParam, objType) @@ -223,6 +228,17 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode = call.add(newSymNode(temp)) - let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call) + var barrierAsExpr: PNode = nil + if barrier != nil: + let typ = newType(tyPtr, owner) + typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ) + var field = newSym(skField, getIdent"barrier", owner, n.info) + field.typ = typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier) + barrierAsExpr = indirectAccess(castExpr, field, n.info) + + let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, + barrierAsExpr) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim new file mode 100644 index 0000000000..34a1f3af82 --- /dev/null +++ b/compiler/semparallel.nim @@ -0,0 +1,414 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Semantic checking for 'parallel'. + +# - slices should become "nocopy" to openArray (+) +# - need to perform bound checks (+) +# +# - parallel needs to insert a barrier (+) +# - passed arguments need to be ensured to be "const" +# - what about 'f(a)'? --> f shouldn't have side effects anyway +# - passed arrays need to be ensured not to alias +# - passed slices need to be ensured to be disjoint (+) +# - output slices need special logic + +import lowerings, guards, sempass2 + +discard """ + +one major problem: + spawn f(a[i]) + inc i + spawn f(a[i]) +is valid, but + spawn f(a[i]) + spawn f(a[i]) + inc i +is not! However, + spawn f(a[i]) + if guard: inc i + spawn f(a[i]) +is not valid either! --> We need a flow dependent analysis here. + +However: + while foo: + spawn f(a[i]) + inc i + spawn f(a[i]) + +Is not valid either! --> We should really restrict 'inc' to loop endings? + +The heuristic that we implement here (that has no false positives) is: Usage +of 'i' in a slice *after* we determined the stride is invalid! +""" + +type + TDirection = enum + ascending, descending + MonotonicVar = object + v: PSym + lower, upper, stride: PNode + dir: TDirection + blacklisted: bool # blacklisted variables that are not monotonic + AnalysisCtx = object + locals: seq[MonotonicVar] + slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]] + guards: TModel # nested guards + args: seq[PSym] # args must be deeply immutable + spawns: int # we can check that at last 1 spawn is used in + # the 'parallel' section + currentSpawnId: int + inLoop: int + +let opSlice = createMagic("slice", mSlice) + +proc initAnalysisCtx(): AnalysisCtx = + result.locals = @[] + result.slices = @[] + result.args = @[] + result.guards = @[] + +proc getSlot(c: var AnalysisCtx; s: PSym): ptr MonotonicVar = + var L = c.locals.len + for i in 0.. " & b.renderTree) + +proc checkBounds(c: AnalysisCtx; arr, idx: PNode) = + checkLe(c, arr.lowBound, idx) + checkLe(c, idx, arr.highBound) + +proc addLowerBoundAsFacts(c: var AnalysisCtx) = + for v in c.locals: + if not v.blacklisted: + c.guards.addFactLe(v.lower, newSymNode(v.v)) + +proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) = + checkLocal(c, n) + let le = n.sons[le] + let ri = n.sons[ri] + let x = n.sons[x] + # perform static bounds checking here; and not later! + let oldState = c.guards.len + addLowerBoundAsFacts(c) + c.checkBounds(x, le) + c.checkBounds(x, ri) + c.guards.setLen(oldState) + c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0)) + +template `?`(x): expr = x.renderTree + +proc overlap(m: TModel; x,y,c,d: PNode) = + # X..Y and C..D overlap iff (X <= D and Y >= C) + case proveLe(m, x, d) + of impUnkown: + localError(x.info, + "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % + [?x, ?d, ?x, ?y, ?c, ?d]) + of impYes: + case proveLe(m, y, c) + of impUnknown: + localError(x.info, + "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % + [?y, ?d, ?x, ?y, ?c, ?d]) + of impYes: + localError(x.info, "$#..$# not disjoint from $#..$#" % [?x, ?y, ?c, ?d]) + of impNo: discard + of impNo: discard + +proc stride(c: AnalysisCtx; n: PNode): BiggestInt = + # note: 0 if it cannot be determined is just right because then + # we analyse 'i..i' and 'i+0 .. i+0' and these are not disjoint! + if n.kind == nkSym and isLocal(n.sym): + let slot = c.getSlot(n[1].sym) + if slot.stride != nil: + result = slot.stride.intVal + else: + for i in 0 .. 0: + result = copyNode(n.kind, n.info, n.len) + for i in 0 .. < n.len: + result.sons[i] = transformSlices(n.sons[i]) + else: + result = n + +proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = + if n.kind in nkCalls: + if n[0].kind == nkSym: + let op = n[0].sym + if op.magic == mSpawn: + result = transformSlices(n) + return wrapProcForSpawn(owner, result, barrier) + elif n.safeLen > 0: + result = copyNode(n.kind, n.info, n.len) + for i in 0 .. < n.len: + result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + else: + result = n + +proc liftParallel*(owner: PSym; n: PNode): PNode = + # this needs to be called after the 'for' loop elimination + + # first pass: + # - detect monotonic local integer variables + # - detect used slices + # - detect used arguments + + var a = initAnalysisCtx() + let body = n.lastSon + analyse(a, body) + if a.spawns == 0: + localError(n.info, "'parallel' section without 'spawn'") + checkSlices(a) + checkArgs(a, body) + + var varSection = newNodeI(nkVarSection, n.info) + var temp = newSym(skTemp, "barrier", owner, n.info) + temp.typ = magicsys.getCompilerProc("Barrier").typ + incl(temp.flags, sfFromGeneric) + + var vpart = newNodeI(nkIdentDefs, n.info, 3) + vpart.sons[0] = newSymNode(temp) + vpart.sons[1] = ast.emptyNode + vpart.sons[2] = indirectAccess(castExpr, field, n.info) + varSection.add vpart + + barrier = genAddrOf(vpart[0]) + + result = newNodeI(nkStmtList, n.info) + generateAliasChecks(a, result) + result.add varSection + result.add callCodeGenProc("openBarrier", barrier) + result.add transformSpawn(owner, body, barrier) + result.add callCodeGenProc("closeBarrier", barrier) diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 6afde5f059..c8ce5e7875 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) = if n.kind != nkSym: return let s = n.sym if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}: - # 'x' is not nil, but that doesn't mean it's not nil children + # 'x' is not nil, but that doesn't mean its "not nil" children # are initialized: initVar(a, n) @@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) = else: track(tracked, n) -proc isTrue(n: PNode): bool = +proc isTrue*(n: PNode): bool = n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or n.kind == nkIntLit and n.intVal != 0 diff --git a/compiler/vm.nim b/compiler/vm.nim index 218369fa1b..0c2c23987b 100644 --- a/compiler/vm.nim +++ b/compiler/vm.nim @@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) = nfAllConst in x.node.flags: # XXX this is hacky; tests/txmlgen triggers it: x.node = newNode(nkStrLit) - # debug x.node - #assert x.node.kind in {nkStrLit..nkTripleStrLit} + # It not only hackey, it is also wrong for tgentemplate. The primary + # cause of bugs like these is that the VM does not properly distinguish + # between variable defintions (var foo = e) and variable updates (foo = e). template createStr(x) = x.node = newNode(nkStrLit) diff --git a/config/nimrod.cfg b/config/nimrod.cfg index 2817eac559..df3835ace7 100644 --- a/config/nimrod.cfg +++ b/config/nimrod.cfg @@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc" path="$lib/core" path="$lib/pure" path="$lib/pure/collections" +path="$lib/pure/concurrency" path="$lib/impure" path="$lib/wrappers" # path="$lib/wrappers/cairo" diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim new file mode 100644 index 0000000000..dfa819f646 --- /dev/null +++ b/lib/pure/concurrency/cpuinfo.nim @@ -0,0 +1,58 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements procs to determine the number of CPUs / cores. + +include "system/inclrtl" + +import strutils, os + +when not defined(windows): + import posix + +when defined(linux): + import linux + +when defined(macosx) or defined(bsd): + const + CTL_HW = 6 + HW_AVAILCPU = 25 + HW_NCPU = 3 + proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, + a: var csize, b: pointer, c: int): cint {. + importc: "sysctl", header: "".} + +proc countProcessors*(): int {.rtl, extern: "ncpi$1".} = + ## returns the numer of the processors/cores the machine has. + ## Returns 0 if it cannot be detected. + when defined(windows): + var x = getEnv("NUMBER_OF_PROCESSORS") + if x.len > 0: result = parseInt(x.string) + elif defined(macosx) or defined(bsd): + var + mib: array[0..3, cint] + numCPU: int + len: csize + mib[0] = CTL_HW + mib[1] = HW_AVAILCPU + len = sizeof(numCPU) + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + if numCPU < 1: + mib[1] = HW_NCPU + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + result = numCPU + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "".}: cint + result = sysconf(SC_NPROC_ONLN) + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result <= 0: result = 1 + diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim new file mode 100644 index 0000000000..3cf6a73920 --- /dev/null +++ b/lib/pure/concurrency/cpuload.nim @@ -0,0 +1,96 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a helper for a thread pool to determine whether +## creating a thread is a good idea. + +when defined(windows): + import winlean, os, strutils, math + + proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime +elif defined(linux): + from cpuinfo import countProcessors + +type + ThreadPoolAdvice* = enum + doNothing, + doCreateThread, # create additional thread for throughput + doShutdownThread # too many threads are busy, shutdown one + + ThreadPoolState* = object + when defined(windows): + prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME + calls*: int + +proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = + when defined(windows): + var + sysIdle, sysKernel, sysUser, + procCreation, procExit, procKernel, procUser: TFILETIME + if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or + getProcessTimes(THandle(-1), procCreation, procExit, + procKernel, procUser) == 0: + return doNothing + if s.calls > 0: + let + sysKernelDiff = sysKernel - s.prevSysKernel + sysUserDiff = sysUser - s.prevSysUser + + procKernelDiff = procKernel - s.prevProcKernel + procUserDiff = procUser - s.prevProcUser + + sysTotal = int(sysKernelDiff + sysUserDiff) + procTotal = int(procKernelDiff + procUserDiff) + # total CPU usage < 85% --> create a new worker thread. + # Measurements show that 100% and often even 90% is not reached even + # if all my cores are busy. + if sysTotal == 0 or procTotal / sysTotal < 0.85: + result = doCreateThread + s.prevSysKernel = sysKernel + s.prevSysUser = sysUser + s.prevProcKernel = procKernel + s.prevProcUser = procUser + elif defined(linux): + proc fscanf(c: TFile, frmt: cstring) {.varargs, importc, + header: "".} + + var f = open("/proc/loadavg") + var b: float + var busy, total: int + fscanf(f,"%lf %lf %lf %ld/%ld", + addr b, addr b, addr b, addr busy, addr total) + f.close() + let cpus = countProcessors() + if busy-1 < cpus: + result = doCreateThread + elif busy-1 >= cpus*2: + result = doShutdownThread + else: + result = doNothing + else: + # XXX implement this for other OSes + result = doNothing + inc s.calls + +when isMainModule: + proc busyLoop() = + while true: + discard random(80) + os.sleep(100) + + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + spawn busyLoop() + + var s: ThreadPoolState + + for i in 1 .. 70: + echo advice(s) + os.sleep(1000) diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim new file mode 100644 index 0000000000..856820c6e0 --- /dev/null +++ b/lib/pure/concurrency/threadpool.nim @@ -0,0 +1,210 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2014 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Implements Nimrod's 'spawn'. + +import cpuinfo, cpuload, locks + +{.push stackTrace:off.} + +type + CondVar = object + c: TCond + L: TLock + counter: int + +proc createCondVar(): CondVar = + initCond(result.c) + initLock(result.L) + +proc destroyCondVar(cv: var CondVar) {.inline.} = + deinitCond(cv.c) + deinitLock(cv.L) + +proc await(cv: var CondVar) = + acquire(cv.L) + while cv.counter <= 0: + wait(cv.c, cv.L) + dec cv.counter + release(cv.L) + +proc signal(cv: var CondVar) = + acquire(cv.L) + inc cv.counter + release(cv.L) + signal(cv.c) + +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + await(b.cv) + destroyCondVar(b.cv) + +{.pop.} + +# ---------------------------------------------------------------------------- + +type + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} + Worker = object + taskArrived: CondVar + taskStarted: CondVar #\ + # task data: + f: WorkerProc + data: pointer + ready: bool # put it here for correct alignment! + initialized: bool # whether it has even been initialized + +proc nimArgsPassingDone(p: pointer) {.compilerProc.} = + let w = cast[ptr Worker](p) + signal(w.taskStarted) + +var + gSomeReady = createCondVar() + readyWorker: ptr Worker + +proc slave(w: ptr Worker) {.thread.} = + while true: + w.ready = true + readyWorker = w + signal(gSomeReady) + await(w.taskArrived) + assert(not w.ready) + w.f(w, w.data) + +const + MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads + ## should be good enough for anybody ;-) + +var + currentPoolSize: int + maxPoolSize = MaxThreadPoolSize + minPoolSize = 4 + +proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this is 4. + minPoolSize = size + +proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) = + ## sets the minimal thread pool size. The default value of this + ## is ``MaxThreadPoolSize``. + maxPoolSize = size + +var + workers: array[MaxThreadPoolSize, TThread[ptr Worker]] + workersData: array[MaxThreadPoolSize, Worker] + +proc activateThread(i: int) {.noinline.} = + workersData[i].taskArrived = createCondVar() + workersData[i].taskStarted = createCondVar() + workersData[i].initialized = true + createThread(workers[i], slave, addr(workersData[i])) + +proc setup() = + currentPoolSize = min(countProcessors(), MaxThreadPoolSize) + readyWorker = addr(workersData[0]) + for i in 0.. 0 + +proc spawn*(call: stmt) {.magic: "Spawn".} + ## always spawns a new task, so that the 'call' is never executed on + ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + +template spawnX*(call: stmt) = + ## spawns a new task if a CPU core is ready, otherwise executes the + ## call in the calling thread. Usually it is advised to + ## use 'spawn' in order to not block the producer for an unknown + ## amount of time. 'call' has to be proc call 'p(...)' where 'p' + ## is gcsafe and has 'void' as the return type. + if preferSpawn(): spawn call + else: call + +proc parallel*(body: stmt) {.magic: "Parallel".} + ## a parallel section can be used to execute a block in parallel. ``body`` + ## has to be in a DSL that is a particular subset of the language. Please + ## refer to the manual for further information. + +var + state: ThreadPoolState + stateLock: TLock + +initLock stateLock + +proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = + if cas(addr w.ready, true, false): + w.data = data + w.f = fn + signal(w.taskArrived) + await(w.taskStarted) + result = true + +proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = + # implementation of 'spawn' that is used by the code generator. + while true: + if selectWorker(readyWorker, fn, data): return + for i in 0.. minPoolSize: dec currentPoolSize + # we don't free anything here. Too dangerous. + release(stateLock) + # else the acquire failed, but this means some + # other thread succeeded, so we don't need to do anything here. + await(gSomeReady) + +proc sync*() = + ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate + ## waiting, you have to use an explicit barrier. + while true: + var allReady = true + for i in 0 .. ".} - proc countProcessors*(): int {.rtl, extern: "nosp$1".} = ## returns the numer of the processors/cores the machine has. ## Returns 0 if it cannot be detected. - when defined(windows): - var x = getEnv("NUMBER_OF_PROCESSORS") - if x.len > 0: result = parseInt(x.string) - elif defined(macosx) or defined(bsd): - var - mib: array[0..3, cint] - numCPU: int - len: csize - mib[0] = CTL_HW - mib[1] = HW_AVAILCPU - len = sizeof(numCPU) - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - if numCPU < 1: - mib[1] = HW_NCPU - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - result = numCPU - elif defined(hpux): - result = mpctl(MPC_GETNUMSPUS, nil, nil) - elif defined(irix): - var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "".}: cint - result = sysconf(SC_NPROC_ONLN) - else: - result = sysconf(SC_NPROCESSORS_ONLN) - if result <= 0: result = 1 + result = cpuinfo.countProcessors() proc execProcesses*(cmds: openArray[string], options = {poStdErrToStdOut, poParentStreams}, diff --git a/lib/system.nim b/lib/system.nim index ad98540a7c..fbd905afab 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -2934,6 +2934,3 @@ when not defined(booting): template isStatic*(x): expr = compiles(static(x)) # checks whether `x` is a value known at compile-time - -when hasThreadSupport: - when hostOS != "standalone": include "system/sysspawn" diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index b1a96b2097..c6c603b19e 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -1,13 +1,14 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Atomic operations for Nimrod. +{.push stackTrace:off.} when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport: type @@ -203,3 +204,31 @@ proc atomicDec*(memLoc: var int, x: int = 1): int = else: dec(memLoc, x) result = memLoc + +when defined(windows) and not defined(gcc): + proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 + {.importc: "InterlockedCompareExchange", header: "", cdecl.} + + proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool = + interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 + +else: + # this is valid for GCC and Intel C++ + proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool + {.importc: "__sync_bool_compare_and_swap", nodecl.} + # XXX is this valid for 'int'? + + +when (defined(x86) or defined(amd64)) and defined(gcc): + proc cpuRelax {.inline.} = + {.emit: """asm volatile("pause" ::: "memory");""".} +elif (defined(x86) or defined(amd64)) and defined(vcc): + proc cpuRelax {.importc: "YieldProcessor", header: "".} +elif defined(intelc): + proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} +elif false: + from os import sleep + + proc cpuRelax {.inline.} = os.sleep(1) + +{.pop.} diff --git a/lib/system/sysspawn.nim b/lib/system/sysspawn.nim index dabf35a3e5..95cdba65d1 100644 --- a/lib/system/sysspawn.nim +++ b/lib/system/sysspawn.nim @@ -14,30 +14,6 @@ when not defined(NimString): {.push stackTrace:off.} -when (defined(x86) or defined(amd64)) and defined(gcc): - proc cpuRelax {.inline.} = - {.emit: """asm volatile("pause" ::: "memory");""".} -elif (defined(x86) or defined(amd64)) and defined(vcc): - proc cpuRelax {.importc: "YieldProcessor", header: "".} -elif defined(intelc): - proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".} -elif false: - from os import sleep - - proc cpuRelax {.inline.} = os.sleep(1) - -when defined(windows) and not defined(gcc): - proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 - {.importc: "InterlockedCompareExchange", header: "", cdecl.} - - proc cas(p: ptr bool; oldValue, newValue: bool): bool = - interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 - -else: - # this is valid for GCC and Intel C++ - proc cas(p: ptr bool; oldValue, newValue: bool): bool - {.importc: "__sync_bool_compare_and_swap", nodecl.} - # We declare our own condition variables here to get rid of the dummy lock # on Windows: @@ -54,6 +30,9 @@ proc createCondVar(): CondVar = initSysLock(result.stupidLock) #acquireSys(result.stupidLock) +proc destroyCondVar(c: var CondVar) {.inline.} = + deinitSysCond(c.c) + proc await(cv: var CondVar) = when defined(posix): acquireSys(cv.stupidLock) @@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) = #if cas(addr cv.slowPath, true, false): signal(cv.slow) +type + Barrier* {.compilerProc.} = object + counter: int + cv: CondVar + +proc barrierEnter*(b: ptr Barrier) {.compilerProc.} = + atomicInc b.counter + +proc barrierLeave*(b: ptr Barrier) {.compilerProc.} = + atomicDec b.counter + if b.counter <= 0: signal(b.cv) + +proc openBarrier*(b: ptr Barrier) {.compilerProc.} = + b.counter = 0 + b.cv = createCondVar() + +proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = + await(b.cv) + destroyCondVar(b.cv) + {.pop.} # ---------------------------------------------------------------------------- diff --git a/tests/system/tsysspawn.nim b/tests/system/tsysspawn.nim index 0388918aa8..fc7921b0e1 100644 --- a/tests/system/tsysspawn.nim +++ b/tests/system/tsysspawn.nim @@ -4,20 +4,22 @@ discard """ cmd: "nimrod $target --threads:on $options $file" """ +import threadpool + var x, y = 0 proc p1 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc x + atomicInc x proc p2 = - for i in 0 .. 1_000_000: + for i in 0 .. 10_000: discard - inc y, 2 + atomicInc y, 2 for i in 0.. 3: spawn(p1()) diff --git a/tests/system/tsysspawnbadarg.nim b/tests/system/tsysspawnbadarg.nim index ace074602c..ce3c5611b5 100644 --- a/tests/system/tsysspawnbadarg.nim +++ b/tests/system/tsysspawnbadarg.nim @@ -4,4 +4,6 @@ discard """ cmd: "nimrod $target --threads:on $options $file" """ +import threadpool + spawn(1) diff --git a/web/news.txt b/web/news.txt index 0bbae7b7b9..b7403a3c72 100644 --- a/web/news.txt +++ b/web/news.txt @@ -2,6 +2,23 @@ News ==== +.. + 2014-06-29 Version 0.9.6 released + ================================= + + Changes affecting backwards compatibility + ----------------------------------------- + + - ``spawn`` now uses an elaborate self-adapting thread pool and as such + has been moved into its own module. So to use it, you now have to import + ``threadpool``. + + + Library Additions + ----------------- + + - Added module ``cpuinfo``. + - Added module ``threadpool``. 2014-04-21 Version 0.9.4 released From c43e8df90cc5d52c6c57452a28f433075bf66236 Mon Sep 17 00:00:00 2001 From: Araq Date: Wed, 14 May 2014 01:51:44 +0200 Subject: [PATCH 02/13] progress for the 'parallel' statement --- compiler/ast.nim | 4 +- compiler/ccgexprs.nim | 3 + compiler/cgen.nim | 3 +- compiler/guards.nim | 4 +- compiler/lowerings.nim | 1 + compiler/sem.nim | 3 +- compiler/semexprs.nim | 15 ++- compiler/semmagic.nim | 2 +- compiler/semparallel.nim | 155 +++++++++++++++------------- lib/pure/concurrency/threadpool.nim | 26 +++-- 10 files changed, 122 insertions(+), 94 deletions(-) diff --git a/compiler/ast.nim b/compiler/ast.nim index 80b9e9bb27..58b01d5e8a 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -605,9 +605,9 @@ const # thus cannot be overloaded (also documented in the spec!): SpecialSemMagics* = { mDefined, mDefinedInScope, mCompiles, mLow, mHigh, mSizeOf, mIs, mOf, - mEcho, mShallowCopy, mExpandToAst} + mEcho, mShallowCopy, mExpandToAst, mParallel} -type +type PNode* = ref TNode TNodeSeq* = seq[PNode] PType* = ref TType diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 94a6f4781b..7fb6af8965 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1638,6 +1638,9 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) = of mSpawn: let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1]) expr(p, n, d) + of mParallel: + let n = semparallel.liftParallel(p.module.module, e) + expr(p, n, d) else: internalError(e.info, "genMagicExpr: " & $op) proc genConstExpr(p: BProc, n: PNode): PRope diff --git a/compiler/cgen.nim b/compiler/cgen.nim index 8d66d7a3b1..3e5ac485b9 100644 --- a/compiler/cgen.nim +++ b/compiler/cgen.nim @@ -14,7 +14,8 @@ import options, intsets, nversion, nimsets, msgs, crc, bitsets, idents, lists, types, ccgutils, os, times, ropes, math, passes, rodread, wordrecg, treetab, cgmeth, - rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings + rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings, + semparallel when options.hasTinyCBackend: import tccgen diff --git a/compiler/guards.nim b/compiler/guards.nim index 57cd73b11a..551a112565 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -160,13 +160,13 @@ proc buildCall(op: PSym; a: PNode): PNode = result.sons[1] = a proc buildCall(op: PSym; a, b: PNode): PNode = - result = newNodeI(nkCall, a.info, 3) + result = newNodeI(nkInfix, a.info, 3) result.sons[0] = newSymNode(op) result.sons[1] = a result.sons[2] = b proc `+@`*(a: PNode; b: BiggestInt): PNode = - opAdd.buildCall(a, nkIntLit.newIntNode(b)) + (if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a) proc `|+|`(a, b: PNode): PNode = result = copyNode(a) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 93bfd84257..704cfbcdd0 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -68,6 +68,7 @@ proc addField*(obj: PType; s: PSym) = var field = newSym(skField, getIdent(s.name.s & $s.id), s.owner, s.info) let t = skipIntLit(s.typ) field.typ = t + assert t.kind != tyStmt field.position = sonsLen(obj.n) addSon(obj.n, newSymNode(field)) diff --git a/compiler/sem.nim b/compiler/sem.nim index 7d129caf44..cf52d1cf0c 100644 --- a/compiler/sem.nim +++ b/compiler/sem.nim @@ -15,7 +15,8 @@ import magicsys, parser, nversion, nimsets, semfold, importer, procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch, intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting, - evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity + evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity, + semparallel # implementation diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 9ea93a15e9..505c289ea9 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1388,11 +1388,6 @@ proc semDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PNode = result.info = n.info result.typ = getSysType(tyBool) -proc setMs(n: PNode, s: PSym): PNode = - result = n - n.sons[0] = newSymNode(s) - n.sons[0].info = n.info - proc expectMacroOrTemplateCall(c: PContext, n: PNode): PSym = ## The argument to the proc should be nkCall(...) or similar ## Returns the macro/template symbol @@ -1584,6 +1579,11 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode = else: result = semDirectOp(c, n, flags) +proc setMs(n: PNode, s: PSym): PNode = + result = n + n.sons[0] = newSymNode(s) + n.sons[0].info = n.info + proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = # this is a hotspot in the compiler! # DON'T forget to update ast.SpecialSemMagics if you add a magic here! @@ -1605,6 +1605,11 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = checkSonsLen(n, 2) result = newStrNodeT(renderTree(n[1], {renderNoComments}), n) result.typ = getSysType(tyString) + of mParallel: + result = setMs(n, s) + var x = n.lastSon + if x.kind == nkDo: x = x.sons[bodyPos] + result.sons[1] = semStmt(c, x) else: result = semDirectOp(c, n, flags) proc semWhen(c: PContext, n: PNode, semCheck = true): PNode = diff --git a/compiler/semmagic.nim b/compiler/semmagic.nim index 4caf1fb8e2..80e70b8c0a 100644 --- a/compiler/semmagic.nim +++ b/compiler/semmagic.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim index 34a1f3af82..dd1584e7d0 100644 --- a/compiler/semparallel.nim +++ b/compiler/semparallel.nim @@ -19,7 +19,11 @@ # - passed slices need to be ensured to be disjoint (+) # - output slices need special logic -import lowerings, guards, sempass2 +import + ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs, + renderer +from trees import getMagic +from strutils import `%` discard """ @@ -75,12 +79,17 @@ proc initAnalysisCtx(): AnalysisCtx = result.args = @[] result.guards = @[] -proc getSlot(c: var AnalysisCtx; s: PSym): ptr MonotonicVar = - var L = c.locals.len - for i in 0.. = 0: return addr(c.locals[s]) + let L = c.locals.len c.locals.setLen(L+1) - c.locals[L].v = s + c.locals[L].v = v return addr(c.locals[L]) proc getRoot(n: PNode): PSym = @@ -110,25 +119,28 @@ proc gatherArgs(c: var AnalysisCtx; n: PNode) = c.args.add root gatherArgs(c, n[i]) -proc isLocal(s: PSym): bool = - s.kind in {skResult, skTemp, skForVar, skVar, skLet} and - {sfAddrTaken, sfGlobal} * s.flags == {} +proc isLocal(n: PNode): bool = + n.kind == nkSym and (let s = n.sym; + s.kind in {skResult, skTemp, skForVar, skVar, skLet} and + {sfAddrTaken, sfGlobal} * s.flags == {}) -proc checkLocal(c: var AnalysisCtx; n: PNode) = - if n.kind == nkSym and isLocal(n.sym): - let slot = c.getSlot(n[1].sym) - if slot.stride != nil: +proc checkLocal(c: AnalysisCtx; n: PNode) = + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: localError(n.info, "invalid usage of counter after increment") else: for i in 0 .. " & b.renderTree) + localError(a.info, "can prove: " & ?a & " > " & ?b) proc checkBounds(c: AnalysisCtx; arr, idx: PNode) = checkLe(c, arr.lowBound, idx) @@ -139,11 +151,8 @@ proc addLowerBoundAsFacts(c: var AnalysisCtx) = if not v.blacklisted: c.guards.addFactLe(v.lower, newSymNode(v.v)) -proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) = +proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) = checkLocal(c, n) - let le = n.sons[le] - let ri = n.sons[ri] - let x = n.sons[x] # perform static bounds checking here; and not later! let oldState = c.guards.len addLowerBoundAsFacts(c) @@ -152,17 +161,15 @@ proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) = c.guards.setLen(oldState) c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0)) -template `?`(x): expr = x.renderTree - proc overlap(m: TModel; x,y,c,d: PNode) = - # X..Y and C..D overlap iff (X <= D and Y >= C) + # X..Y and C..D overlap iff (X <= D and C <= Y) case proveLe(m, x, d) - of impUnkown: + of impUnknown: localError(x.info, "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % [?x, ?d, ?x, ?y, ?c, ?d]) of impYes: - case proveLe(m, y, c) + case proveLe(m, c, y) of impUnknown: localError(x.info, "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % @@ -175,12 +182,12 @@ proc overlap(m: TModel; x,y,c,d: PNode) = proc stride(c: AnalysisCtx; n: PNode): BiggestInt = # note: 0 if it cannot be determined is just right because then # we analyse 'i..i' and 'i+0 .. i+0' and these are not disjoint! - if n.kind == nkSym and isLocal(n.sym): - let slot = c.getSlot(n[1].sym) - if slot.stride != nil: - result = slot.stride.intVal + if isLocal(n): + let s = c.lookupSlot(n.sym) + if s >= 0 and c.locals[s].stride != nil: + result = c.locals[s].stride.intVal else: - for i in 0 .. 0: - result = copyNode(n.kind, n.info, n.len) + result = copyNode(n) for i in 0 .. < n.len: - result.sons[i] = transformSlices(n.sons[i]) + result.add transformSlices(n.sons[i]) else: result = n proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = - if n.kind in nkCalls: + if n.kind in nkCallKinds: if n[0].kind == nkSym: let op = n[0].sym if op.magic == mSpawn: result = transformSlices(n) - return wrapProcForSpawn(owner, result, barrier) + return wrapProcForSpawn(owner, result[1], barrier) elif n.safeLen > 0: - result = copyNode(n.kind, n.info, n.len) + result = copyNode(n) for i in 0 .. < n.len: - result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + result.add transformSpawn(owner, n.sons[i], barrier) else: result = n +proc checkArgs(a: var AnalysisCtx; n: PNode) = + discard "too implement" + +proc generateAliasChecks(a: AnalysisCtx; result: PNode) = + discard "too implement" + proc liftParallel*(owner: PSym; n: PNode): PNode = # this needs to be called after the 'for' loop elimination @@ -390,22 +408,17 @@ proc liftParallel*(owner: PSym; n: PNode): PNode = analyse(a, body) if a.spawns == 0: localError(n.info, "'parallel' section without 'spawn'") - checkSlices(a) + checkSlicesAreDisjoint(a) checkArgs(a, body) var varSection = newNodeI(nkVarSection, n.info) - var temp = newSym(skTemp, "barrier", owner, n.info) + var temp = newSym(skTemp, getIdent"barrier", owner, n.info) temp.typ = magicsys.getCompilerProc("Barrier").typ incl(temp.flags, sfFromGeneric) + let tempNode = newSymNode(temp) + varSection.addVar tempNode - var vpart = newNodeI(nkIdentDefs, n.info, 3) - vpart.sons[0] = newSymNode(temp) - vpart.sons[1] = ast.emptyNode - vpart.sons[2] = indirectAccess(castExpr, field, n.info) - varSection.add vpart - - barrier = genAddrOf(vpart[0]) - + let barrier = genAddrOf(tempNode) result = newNodeI(nkStmtList, n.info) generateAliasChecks(a, result) result.add varSection diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 856820c6e0..86819d25a7 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -74,12 +74,20 @@ type data: pointer ready: bool # put it here for correct alignment! initialized: bool # whether it has even been initialized + shutdown: bool # the pool requests to shut down this worker thread proc nimArgsPassingDone(p: pointer) {.compilerProc.} = let w = cast[ptr Worker](p) signal(w.taskStarted) +const + MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads + ## should be good enough for anybody ;-) + var + currentPoolSize: int + maxPoolSize = MaxThreadPoolSize + minPoolSize = 4 gSomeReady = createCondVar() readyWorker: ptr Worker @@ -91,15 +99,9 @@ proc slave(w: ptr Worker) {.thread.} = await(w.taskArrived) assert(not w.ready) w.f(w, w.data) - -const - MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads - ## should be good enough for anybody ;-) - -var - currentPoolSize: int - maxPoolSize = MaxThreadPoolSize - minPoolSize = 4 + if w.shutdown: + w.shutdown = false + atomicDec currentPoolSize proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) = ## sets the minimal thread pool size. The default value of this is 4. @@ -183,13 +185,15 @@ proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = if not workersData[currentPoolSize].initialized: activateThread(currentPoolSize) let w = addr(workersData[currentPoolSize]) - inc currentPoolSize + atomicInc currentPoolSize if selectWorker(w, fn, data): release(stateLock) return # else we didn't succeed but some other thread, so do nothing. of doShutdownThread: - if currentPoolSize > minPoolSize: dec currentPoolSize + if currentPoolSize > minPoolSize: + let w = addr(workersData[currentPoolSize-1]) + w.shutdown = true # we don't free anything here. Too dangerous. release(stateLock) # else the acquire failed, but this means some From 31b8fd66b1bd54b665e52855909538a50d33d7c3 Mon Sep 17 00:00:00 2001 From: Araq Date: Wed, 14 May 2014 23:36:28 +0200 Subject: [PATCH 03/13] 'parallel' statement: next steps --- compiler/guards.nim | 112 +++++++++++++++------- compiler/semparallel.nim | 41 +++++--- tests/parallel/tdisjoint_slice1.nim | 21 ++++ tests/parallel/tdisjoint_slice2.nim | 21 ++++ tests/parallel/tinvalid_array_bounds.nim | 25 +++++ tests/parallel/tinvalid_counter_usage.nim | 26 +++++ tests/parallel/tnon_disjoint_slice1.nim | 25 +++++ 7 files changed, 221 insertions(+), 50 deletions(-) create mode 100644 tests/parallel/tdisjoint_slice1.nim create mode 100644 tests/parallel/tdisjoint_slice2.nim create mode 100644 tests/parallel/tinvalid_array_bounds.nim create mode 100644 tests/parallel/tinvalid_counter_usage.nim create mode 100644 tests/parallel/tnon_disjoint_slice1.nim diff --git a/compiler/guards.nim b/compiler/guards.nim index 551a112565..de0ce1dcc9 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -1,7 +1,7 @@ # # # The Nimrod Compiler -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2014 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -165,9 +165,6 @@ proc buildCall(op: PSym; a, b: PNode): PNode = result.sons[1] = a result.sons[2] = b -proc `+@`*(a: PNode; b: BiggestInt): PNode = - (if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a) - proc `|+|`(a, b: PNode): PNode = result = copyNode(a) if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal @@ -178,22 +175,56 @@ proc `|*|`(a, b: PNode): PNode = if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal else: result.floatVal = a.floatVal * b.floatVal +proc negate(a, b, res: PNode): PNode = + if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt): + var b = copyNode(b) + b.intVal = -b.intVal + if a.kind in {nkCharLit..nkUInt64Lit}: + b.intVal = b.intVal |+| a.intVal + result = b + else: + result = buildCall(opAdd, a, b) + elif b.kind in {nkFloatLit..nkFloat64Lit}: + var b = copyNode(b) + b.floatVal = -b.floatVal + result = buildCall(opAdd, a, b) + else: + result = res + proc zero(): PNode = nkIntLit.newIntNode(0) proc one(): PNode = nkIntLit.newIntNode(1) proc minusOne(): PNode = nkIntLit.newIntNode(-1) -proc lowBound*(x: PNode): PNode = nkIntLit.newIntNode(firstOrd(x.typ)) +proc lowBound*(x: PNode): PNode = + result = nkIntLit.newIntNode(firstOrd(x.typ)) + result.info = x.info + proc highBound*(x: PNode): PNode = - if x.typ.skipTypes(abstractInst).kind == tyArray: - nkIntLit.newIntNode(lastOrd(x.typ)) - else: - opAdd.buildCall(opLen.buildCall(x), minusOne()) + result = if x.typ.skipTypes(abstractInst).kind == tyArray: + nkIntLit.newIntNode(lastOrd(x.typ)) + else: + opAdd.buildCall(opLen.buildCall(x), minusOne()) + result.info = x.info + +proc reassociation(n: PNode): PNode = + result = n + # (foo+5)+5 --> foo+10; same for '*' + case result.getMagic + of someAdd: + if result[2].isValue and + result[1].getMagic in someAdd and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2]) + of someMul: + if result[2].isValue and + result[1].getMagic in someMul and result[1][2].isValue: + result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2]) + else: discard proc canon*(n: PNode): PNode = # XXX for now only the new code in 'semparallel' uses this if n.safeLen >= 1: - result = newNodeI(n.kind, n.info, n.len) - for i in 0 .. < n.safeLen: + result = shallowCopy(n) + for i in 0 .. < n.len: result.sons[i] = canon(n.sons[i]) else: result = n @@ -210,32 +241,12 @@ proc canon*(n: PNode): PNode = result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1)) of someSub: # x - 4 --> x + (-4) - var b = result[2] - if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt): - b = copyNode(b) - b.intVal = -b.intVal - result = buildCall(opAdd, result[1], b) - elif b.kind in {nkFloatLit..nkFloat64Lit}: - b = copyNode(b) - b.floatVal = -b.floatVal - result = buildCall(opAdd, result[1], b) + result = negate(result[1], result[2], result) of someLen: result.sons[0] = opLen.newSymNode else: discard - # re-association: - # (foo+5)+5 --> foo+10; same for '*' - case result.getMagic - of someAdd: - if result[2].isValue and - result[1].getMagic in someAdd and result[1][2].isValue: - result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2]) - of someMul: - if result[2].isValue and - result[1].getMagic in someMul and result[1][2].isValue: - result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2]) - else: discard - + result = reassociation(result) # most important rule: (x-4) < a.len --> x < a.len+4 case result.getMagic of someLe, someLt: @@ -245,21 +256,32 @@ proc canon*(n: PNode): PNode = isLetLocation(x[1], true): case x.getMagic of someSub: - result = buildCall(result[0].sym, x[1], opAdd.buildCall(y, x[2])) + result = buildCall(result[0].sym, x[1], + reassociation(opAdd.buildCall(y, x[2]))) of someAdd: - result = buildCall(result[0].sym, x[1], opSub.buildCall(y, x[2])) + # Rule A: + let plus = negate(y, x[2], nil).reassociation + if plus != nil: result = buildCall(result[0].sym, x[1], plus) else: discard elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and isLetLocation(y[1], true): # a.len < x-3 case y.getMagic of someSub: - result = buildCall(result[0].sym, y[1], opAdd.buildCall(x, y[2])) + result = buildCall(result[0].sym, y[1], + reassociation(opAdd.buildCall(x, y[2]))) of someAdd: - result = buildCall(result[0].sym, y[1], opSub.buildCall(x, y[2])) + let plus = negate(x, y[2], nil).reassociation + # ensure that Rule A will not trigger afterwards with the + # additional 'not isLetLocation' constraint: + if plus != nil and not isLetLocation(x, true): + result = buildCall(result[0].sym, plus, y[1]) else: discard else: discard +proc `+@`*(a: PNode; b: BiggestInt): PNode = + canon(if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a) + proc usefulFact(n: PNode): PNode = case n.getMagic of someEq: @@ -639,8 +661,20 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication = proc impliesNotNil*(facts: TModel, arg: PNode): TImplication = result = doesImply(facts, opIsNil.buildCall(arg).neg) +proc simpleSlice*(a, b: PNode): BiggestInt = + # returns 'c' if a..b matches (i+c)..(i+c), -1 otherwise. (i)..(i) is matched + # as if it is (i+0)..(i+0). + if guards.sameTree(a, b): + if a.getMagic in someAdd and a[2].kind in {nkCharLit..nkUInt64Lit}: + result = a[2].intVal + else: + result = 0 + else: + result = -1 + proc proveLe*(m: TModel; a, b: PNode): TImplication = let res = canon(opLe.buildCall(a, b)) + #echo renderTree(res) # we hardcode lots of axioms here: let a = res[1] let b = res[2] @@ -662,6 +696,10 @@ proc proveLe*(m: TModel; a, b: PNode): TImplication = if b.getMagic in someAdd and sameTree(a, b[1]): return proveLe(m, zero(), b[2]) + # x+c <= x iff c <= 0 + if a.getMagic in someAdd and sameTree(b, a[1]): + return proveLe(m, a[2], zero()) + # x <= x*c if 1 <= c and 0 <= x: if b.getMagic in someMul and sameTree(a, b[1]): if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes: diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim index dd1584e7d0..7917cab908 100644 --- a/compiler/semparallel.nim +++ b/compiler/semparallel.nim @@ -9,6 +9,8 @@ ## Semantic checking for 'parallel'. +# - codegen needs to support mSlice +# - lowerings must not perform unnecessary copies # - slices should become "nocopy" to openArray (+) # - need to perform bound checks (+) # @@ -153,6 +155,8 @@ proc addLowerBoundAsFacts(c: var AnalysisCtx) = proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) = checkLocal(c, n) + let le = le.canon + let ri = ri.canon # perform static bounds checking here; and not later! let oldState = c.guards.len addLowerBoundAsFacts(c) @@ -166,16 +170,16 @@ proc overlap(m: TModel; x,y,c,d: PNode) = case proveLe(m, x, d) of impUnknown: localError(x.info, - "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % + "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % [?x, ?d, ?x, ?y, ?c, ?d]) of impYes: case proveLe(m, c, y) of impUnknown: localError(x.info, - "cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" % + "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % [?y, ?d, ?x, ?y, ?c, ?d]) of impYes: - localError(x.info, "$#..$# not disjoint from $#..$#" % [?x, ?y, ?c, ?d]) + localError(x.info, "($#)..($#) not disjoint from ($#)..($#)" % [?x, ?y, ?c, ?d]) of impNo: discard of impNo: discard @@ -220,14 +224,25 @@ proc checkSlicesAreDisjoint(c: var AnalysisCtx) = let x = c.slices[i] let y = c.slices[j] if x.spawnId != y.spawnId and guards.sameTree(x.x, y.x): - if not x.inLoop and not y.inLoop: + if not x.inLoop or not y.inLoop: + # XXX strictly speaking, 'or' is not correct here and it needs to + # be 'and'. However this prevents too many obviously correct programs + # like f(a[0..x]); for i in x+1 .. a.high: f(a[i]) overlap(c.guards, x.a, x.b, y.a, y.b) + elif (let k = simpleSlice(x.a, x.b); let m = simpleSlice(y.a, y.b); + k >= 0 and m >= 0): + # ah I cannot resist the temptation and add another sweet heuristic: + # if both slices have the form (i+k)..(i+k) and (i+m)..(i+m) we + # check they are disjoint and k < stride and m < stride: + overlap(c.guards, x.a, x.b, y.a, y.b) + let stride = min(c.stride(x.a), c.stride(y.a)) + if k < stride and m < stride: + discard + else: + localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % + [?x.a, ?x.b, ?y.a, ?y.b]) else: - # ah I cannot resists the temptation and add another sweet heuristic: - # if both slices have the form (i+c)..(i+c) and (i+d)..(i+d) we - # check they are disjoint and c <= stride and d <= stride: - # XXX - localError(x.x.info, "cannot prove $#..$# disjoint from $#..$#" % + localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" % [?x.a, ?x.b, ?y.a, ?y.b]) proc analyse(c: var AnalysisCtx; n: PNode) @@ -369,9 +384,9 @@ proc transformSlices(n: PNode): PNode = result.add n[2][2] return result if n.safeLen > 0: - result = copyNode(n) + result = shallowCopy(n) for i in 0 .. < n.len: - result.add transformSlices(n.sons[i]) + result.sons[i] = transformSlices(n.sons[i]) else: result = n @@ -383,9 +398,9 @@ proc transformSpawn(owner: PSym; n, barrier: PNode): PNode = result = transformSlices(n) return wrapProcForSpawn(owner, result[1], barrier) elif n.safeLen > 0: - result = copyNode(n) + result = shallowCopy(n) for i in 0 .. < n.len: - result.add transformSpawn(owner, n.sons[i], barrier) + result.sons[i] = transformSpawn(owner, n.sons[i], barrier) else: result = n diff --git a/tests/parallel/tdisjoint_slice1.nim b/tests/parallel/tdisjoint_slice1.nim new file mode 100644 index 0000000000..2ca96d6ae2 --- /dev/null +++ b/tests/parallel/tdisjoint_slice1.nim @@ -0,0 +1,21 @@ + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + #spawn f(a[0..15]) + #spawn f(a[16..30]) + var i = 0 + while i <= 29: + spawn f(a[i]) + spawn f(a[i+1]) + inc i, 2 + # is correct here + +main() diff --git a/tests/parallel/tdisjoint_slice2.nim b/tests/parallel/tdisjoint_slice2.nim new file mode 100644 index 0000000000..b26559fc21 --- /dev/null +++ b/tests/parallel/tdisjoint_slice2.nim @@ -0,0 +1,21 @@ + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + spawn f(a[0..15]) + #spawn f(a[16..30]) + var i = 16 + while i <= 29: + spawn f(a[i]) + spawn f(a[i+1]) + inc i, 2 + # is correct here + +main() diff --git a/tests/parallel/tinvalid_array_bounds.nim b/tests/parallel/tinvalid_array_bounds.nim new file mode 100644 index 0000000000..337fae7291 --- /dev/null +++ b/tests/parallel/tinvalid_array_bounds.nim @@ -0,0 +1,25 @@ +discard """ + errormsg: "cannot prove: i + 1 <= 30" + line: 21 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + spawn f(a[0..15]) + spawn f(a[16..30]) + var i = 0 + while i <= 30: + spawn f(a[i]) + spawn f(a[i+1]) + inc i + #inc i # inc i, 2 would be correct here + +main() diff --git a/tests/parallel/tinvalid_counter_usage.nim b/tests/parallel/tinvalid_counter_usage.nim new file mode 100644 index 0000000000..c6303c6517 --- /dev/null +++ b/tests/parallel/tinvalid_counter_usage.nim @@ -0,0 +1,26 @@ +discard """ + errormsg: "invalid usage of counter after increment" + line: 21 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + spawn f(a[0..15]) + spawn f(a[16..30]) + var i = 0 + while i <= 30: + inc i + spawn f(a[i]) + inc i + #spawn f(a[i+1]) + #inc i # inc i, 2 would be correct here + +main() diff --git a/tests/parallel/tnon_disjoint_slice1.nim b/tests/parallel/tnon_disjoint_slice1.nim new file mode 100644 index 0000000000..72d008bbdb --- /dev/null +++ b/tests/parallel/tnon_disjoint_slice1.nim @@ -0,0 +1,25 @@ +discard """ + errormsg: "cannot prove (i)..(i) disjoint from (i + 1)..(i + 1)" + line: 20 +""" + +import threadpool + +proc f(a: openArray[int]) = + for x in a: echo x + +proc f(a: int) = echo a + +proc main() = + var a: array[0..30, int] + parallel: + #spawn f(a[0..15]) + #spawn f(a[16..30]) + var i = 0 + while i <= 29: + spawn f(a[i]) + spawn f(a[i+1]) + inc i + #inc i # inc i, 2 would be correct here + +main() From 417b9f5a1d13f26842b1337395a0f5b57827cc12 Mon Sep 17 00:00:00 2001 From: Araq Date: Thu, 22 May 2014 08:41:50 +0200 Subject: [PATCH 04/13] 'parallel' statement almost working --- compiler/ccgexprs.nim | 2 +- compiler/guards.nim | 58 +++-- compiler/lowerings.nim | 286 +++++++++++++++++++---- compiler/semmagic.nim | 12 +- compiler/semparallel.nim | 89 ++++--- doc/manual.txt | 2 +- lib/pure/concurrency/threadpool.nim | 112 +++++++++ lib/system/atomics.nim | 6 +- tests/parallel/tdisjoint_slice1.nim | 16 +- tests/parallel/tinvalid_array_bounds.nim | 2 +- 10 files changed, 470 insertions(+), 115 deletions(-) diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 7fb6af8965..34fdf5bf17 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -1636,7 +1636,7 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) = of mSlurp..mQuoteAst: localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s) of mSpawn: - let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1]) + let n = lowerings.wrapProcForSpawn(p.module.module, e[1], e.typ, nil, nil) expr(p, n, d) of mParallel: let n = semparallel.liftParallel(p.module.module, e) diff --git a/compiler/guards.nim b/compiler/guards.nim index de0ce1dcc9..3df3bd1a81 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -672,12 +672,8 @@ proc simpleSlice*(a, b: PNode): BiggestInt = else: result = -1 -proc proveLe*(m: TModel; a, b: PNode): TImplication = - let res = canon(opLe.buildCall(a, b)) - #echo renderTree(res) - # we hardcode lots of axioms here: - let a = res[1] - let b = res[2] +proc ple(m: TModel; a, b: PNode): TImplication = + template `<=?`(a,b): expr = ple(m,a,b) == impYes # 0 <= 3 if a.isValue and b.isValue: return if leValue(a, b): impYes else: impNo @@ -692,26 +688,46 @@ proc proveLe*(m: TModel; a, b: PNode): TImplication = # x <= x if sameTree(a, b): return impYes - # x <= x+c iff 0 <= c - if b.getMagic in someAdd and sameTree(a, b[1]): - return proveLe(m, zero(), b[2]) + # 0 <= x.len + if b.getMagic in someLen and a.isValue: + if a.intVal <= 0: return impYes - # x+c <= x iff c <= 0 - if a.getMagic in someAdd and sameTree(b, a[1]): - return proveLe(m, a[2], zero()) + # x <= y+c if 0 <= c and x <= y + if b.getMagic in someAdd and zero() <=? b[2] and a <=? b[1]: return impYes - # x <= x*c if 1 <= c and 0 <= x: - if b.getMagic in someMul and sameTree(a, b[1]): - if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes: - return impYes + # x+c <= y if c <= 0 and x <= y + if a.getMagic in someAdd and a[2] <=? zero() and a[1] <=? b: return impYes - # x div c <= x if 1 <= c and 0 <= x: - if a.getMagic in someDiv and sameTree(a[1], b): - if proveLe(m, one(), a[2]) == impYes and proveLe(m, zero(), b) == impYes: - return impYes + # x <= y*c if 1 <= c and x <= y and 0 <= y + if b.getMagic in someMul: + if a <=? b[1] and one() <=? b[2] and zero() <=? b[1]: return impYes + + # x div c <= y if 1 <= c and 0 <= y and x <= y: + if a.getMagic in someDiv: + if one() <=? a[2] and zero() <=? b and a[1] <=? b: return impYes + + # slightly subtle: + # x <= max(y, z) iff x <= y or x <= z + # note that 'x <= max(x, z)' is a special case of the above rule + if b.getMagic in someMax: + if a <=? b[1] or a <=? b[2]: return impYes + + # min(x, y) <= z iff x <= z or y <= z + if a.getMagic in someMin: + if a[1] <=? b or a[2] <=? b: return impYes # use the knowledge base: - return doesImply(m, res) + return doesImply(m, opLe.buildCall(a, b)) + +proc proveLe*(m: TModel; a, b: PNode): TImplication = + #echo "ROOT ", renderTree(a), " <=? ", b.rendertree + let x = canon(opLe.buildCall(a, b)) + #echo renderTree(res) + result = ple(m, x[1], x[2]) + if result == impUnknown: + # try an alternative: a <= b iff not (b < a) iff not (b+1 <= a): + let y = canon(opLe.buildCall(opAdd.buildCall(b, one()), a)) + result = ~ple(m, y[1], y[2]) proc addFactLe*(m: var TModel; a, b: PNode) = m.add canon(opLe.buildCall(a, b)) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 704cfbcdd0..2a1a8e577a 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -13,6 +13,8 @@ const genPrefix* = ":tmp" # prefix for generated names import ast, astalgo, types, idents, magicsys, msgs, options +from guards import createMagic +from trees import getMagic proc newTupleAccess*(tup: PNode, i: int): PNode = result = newNodeIT(nkBracketExpr, tup.info, tup.typ.skipTypes( @@ -80,19 +82,23 @@ proc newDotExpr(obj, b: PSym): PNode = addSon(result, newSymNode(field)) result.typ = field.typ -proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = +proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = # returns a[].b as a node var deref = newNodeI(nkHiddenDeref, info) - deref.typ = a.typ.sons[0] + deref.typ = a.typ.skipTypes(abstractInst).sons[0] assert deref.typ.kind == tyObject - let field = getSymFromList(deref.typ.n, getIdent(b.name.s & $b.id)) - assert field != nil, b.name.s + let field = getSymFromList(deref.typ.n, getIdent(b)) + assert field != nil, b addSon(deref, a) result = newNodeI(nkDotExpr, info) addSon(result, deref) addSon(result, newSymNode(field)) result.typ = field.typ +proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode = + # returns a[].b as a node + result = indirectAccess(a, b.name.s & $b.id, info) + proc indirectAccess*(a, b: PSym, info: TLineInfo): PNode = result = indirectAccess(newSymNode(a), b, info) @@ -102,6 +108,11 @@ proc genAddrOf*(n: PNode): PNode = result.typ = newType(tyPtr, n.typ.owner) result.typ.rawAddSon(n.typ) +proc genDeref*(n: PNode): PNode = + result = newNodeIT(nkHiddenDeref, n.info, + n.typ.skipTypes(abstractInst).sons[0]) + result.add n + proc callCodegenProc*(name: string, arg1: PNode; arg2, arg3: PNode = nil): PNode = result = newNodeI(nkCall, arg1.info) @@ -114,14 +125,83 @@ proc callCodegenProc*(name: string, arg1: PNode; if arg2 != nil: result.add arg2 if arg3 != nil: result.add arg3 +# we have 4 cases to consider: +# - a void proc --> nothing to do +# - a proc returning GC'ed memory --> requires a future +# - a proc returning non GC'ed memory --> pass as hidden 'var' parameter +# - not in a parallel environment --> requires a future for memory safety +type + TSpawnResult = enum + srVoid, srFuture, srByVar + TFutureKind = enum + futInvalid # invalid type T for 'Future[T]' + futGC # Future of a GC'ed type + futBlob # Future of a blob type + +proc spawnResult(t: PType; inParallel: bool): TSpawnResult = + if t.isEmptyType: srVoid + elif inParallel and not containsGarbageCollectedRef(t): srByVar + else: srFuture + +proc futureKind(t: PType): TFutureKind = + if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: futGC + elif containsGarbageCollectedRef(t): futInvalid + else: futBlob + +discard """ +We generate roughly this: + +proc f_wrapper(args) = + var a = args.a # copy strings/seqs; thread transfer; not generated for + # the 'parallel' statement + var b = args.b + + args.fut = createFuture(thread, sizeof(T)) # optional + nimArgsPassingDone() # signal parent that the work is done + args.fut.blob = f(a, b, ...) + # - or - + f(a, b, ...) + +stmtList: + var scratchObj + scratchObj.a = a + scratchObj.b = b + + nimSpawn(f_wrapper, addr scratchObj) + scratchObj.fut # optional + +""" + +proc createNimCreateFutureCall(fut, threadParam: PNode): PNode = + let size = newNodeIT(nkCall, fut.info, getSysType(tyInt)) + size.add newSymNode(createMagic("sizeof", mSizeOf)) + assert fut.typ.kind == tyGenericInst + size.add newNodeIT(nkType, fut.info, fut.typ.sons[1]) + + let castExpr = newNodeIT(nkCast, fut.info, fut.typ) + castExpr.add emptyNode + castExpr.add callCodeGenProc("nimCreateFuture", threadParam, size) + result = newFastAsgnStmt(fut, castExpr) + proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call, barrier: PNode): PSym = + varSection, call, barrier, fut: PNode): PSym = var body = newNodeI(nkStmtList, f.info) body.add varSection if barrier != nil: body.add callCodeGenProc("barrierEnter", barrier) - body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam)) - body.add call + if fut != nil: + body.add createNimCreateFutureCall(fut, threadParam.newSymNode) + if barrier == nil: + body.add callCodeGenProc("nimFutureCreateCondVar", fut) + + body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) + if fut != nil: + body.add newAsgnStmt(indirectAccess(fut, + if fut.typ.futureKind==futGC: "data" else: "blob", fut.info), call) + if barrier == nil: + body.add callCodeGenProc("nimFutureSignal", fut) + else: + body.add call if barrier != nil: body.add callCodeGenProc("barrierLeave", barrier) @@ -151,10 +231,148 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = - result = newNodeI(nkStmtList, n.info) - if n.kind notin nkCallKinds or not n.typ.isEmptyType: - localError(n.info, "'spawn' takes a call expression of type void") +proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, + castExpr, call, varSection, result: PNode) = + let formals = n[0].typ.n + let tmpName = getIdent(genPrefix) + for i in 1 .. 16) and + n.getRoot != nil: + # it is more efficient to pass a pointer instead: + let a = genAddrOf(n) + field.typ = a.typ + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) + call.add(genDeref(indirectAccess(castExpr, field, n.info))) + else: + # boring case + field.typ = argType + objType.addField(field) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n) + call.add(indirectAccess(castExpr, field, n.info)) + +proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; + barrier, dest: PNode = nil): PNode = + # if 'barrier' != nil, then it is in a 'parallel' section and we + # generate quite different code + let spawnKind = spawnResult(retType, barrier!=nil) + case spawnKind + of srVoid: + internalAssert dest == nil + result = newNodeI(nkStmtList, n.info) + of srFuture: + internalAssert dest == nil + result = newNodeIT(nkStmtListExpr, n.info, retType) + of srByVar: + if dest == nil: localError(n.info, "'spawn' must not be discarded") + result = newNodeI(nkStmtList, n.info) + + if n.kind notin nkCallKinds: + localError(n.info, "'spawn' takes a call expression") return if optThreadAnalysis in gGlobalOptions: if {tfThread, tfNoSideEffect} * n[0].typ.flags == {}: @@ -180,7 +398,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = varSectionB.addVar(scratchObj.newSymNode) result.add varSectionB - var call = newNodeI(nkCall, n.info) + var call = newNodeIT(nkCall, n.info, n.typ) var fn = n.sons[0] # templates and macros are in fact valid here due to the nature of # the transformation: @@ -200,34 +418,10 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode = call.add(fn) var varSection = newNodeI(nkVarSection, n.info) - let formals = n[0].typ.n - let tmpName = getIdent(genPrefix) - for i in 1 .. f shouldn't have side effects anyway # - passed arrays need to be ensured not to alias # - passed slices need to be ensured to be disjoint (+) -# - output slices need special logic +# - output slices need special logic (+) import ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs, @@ -94,23 +94,6 @@ proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar = c.locals[L].v = v return addr(c.locals[L]) -proc getRoot(n: PNode): PSym = - ## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression - ## like ``obj.x[i].y``. The *root* of a path is the symbol that can be - ## determined as the owner; ``obj`` in the example. - case n.kind - of nkSym: - if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}: - result = n.sym - of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr, - nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr: - result = getRoot(n.sons[0]) - of nkHiddenStdConv, nkHiddenSubConv, nkConv: - result = getRoot(n.sons[1]) - of nkCallKinds: - if getMagic(n) == mSlice: result = getRoot(n.sons[1]) - else: discard - proc gatherArgs(c: var AnalysisCtx; n: PNode) = for i in 0.. = 0 and c.locals[s].stride != nil: @@ -193,6 +174,20 @@ proc stride(c: AnalysisCtx; n: PNode): BiggestInt = else: for i in 0 .. = 0 and c.locals[s].stride != nil: + result = n +@ c.locals[s].stride.intVal + else: + result = n + elif n.safeLen > 0: + result = shallowCopy(n) + for i in 0 .. 1: addFact(c.guards, branch.sons[0]) - #setLen(c.locals, oldState) for i in 0 .. 0: - result = shallowCopy(n) - for i in 0 .. < n.len: - result.sons[i] = transformSpawn(owner, n.sons[i], barrier) + result = transformSpawnSons(owner, n, barrier) else: result = n @@ -440,3 +452,4 @@ proc liftParallel*(owner: PSym; n: PNode): PNode = result.add callCodeGenProc("openBarrier", barrier) result.add transformSpawn(owner, body, barrier) result.add callCodeGenProc("closeBarrier", barrier) + diff --git a/doc/manual.txt b/doc/manual.txt index 39e2bad2aa..b2e0089693 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -2748,7 +2748,7 @@ The following builtin procs cannot be overloaded for reasons of implementation simplicity (they require specialized semantic checking):: defined, definedInScope, compiles, low, high, sizeOf, - is, of, echo, shallowCopy, getAst + is, of, echo, shallowCopy, getAst, spawn Thus they act more like keywords than like ordinary identifiers; unlike a keyword however, a redefinition may `shadow`:idx: the definition in diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 86819d25a7..583c60c66d 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -65,6 +65,30 @@ proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = # ---------------------------------------------------------------------------- type + AwaitInfo = object + cv: CondVar + idx: int + + RawFuture* = ptr RawFutureObj ## untyped base class for 'Future[T]' + RawFutureObj {.inheritable.} = object # \ + # we allocate this with the thread local allocator; this + # is possible since we already need to do the GC_unref + # on the owning thread + ready, usesCondVar: bool + cv: CondVar #\ + # for 'awaitAny' support + ai: ptr AwaitInfo + idx: int + data: PObject # we incRef and unref it to keep it alive + owner: ptr Worker + next: RawFuture + align: float64 # a float for proper alignment + + Future* {.compilerProc.} [T] = ptr object of RawFutureObj + blob: T ## the underlying value, if available. Note that usually + ## you should not access this field directly! However it can + ## sometimes be more efficient than getting the value via ``^``. + WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.} Worker = object taskArrived: CondVar @@ -75,6 +99,92 @@ type ready: bool # put it here for correct alignment! initialized: bool # whether it has even been initialized shutdown: bool # the pool requests to shut down this worker thread + futureLock: TLock + head: RawFuture + +proc finished*(fut: RawFuture) = + ## This MUST be called for every created future to free its associated + ## resources. Note that the default reading operation ``^`` is destructive + ## and calls ``finished``. + doAssert fut.ai.isNil, "future is still attached to an 'awaitAny'" + assert fut.next == nil + let w = fut.owner + acquire(w.futureLock) + fut.next = w.head + w.head = fut + release(w.futureLock) + +proc cleanFutures(w: ptr Worker) = + var it = w.head + acquire(w.futureLock) + while it != nil: + let nxt = it.next + if it.usesCondVar: destroyCondVar(it.cv) + if it.data != nil: GC_unref(it.data) + dealloc(it) + it = nxt + w.head = nil + release(w.futureLock) + +proc nimCreateFuture(owner: pointer; blobSize: int): RawFuture {. + compilerProc.} = + result = cast[RawFuture](alloc0(RawFutureObj.sizeof + blobSize)) + result.owner = cast[ptr Worker](owner) + +proc nimFutureCreateCondVar(fut: RawFuture) {.compilerProc.} = + fut.cv = createCondVar() + fut.usesCondVar = true + +proc nimFutureSignal(fut: RawFuture) {.compilerProc.} = + assert fut.usesCondVar + signal(fut.cv) + +proc await*[T](fut: Future[T]) = + ## waits until the value for the future arrives. + if fut.usesCondVar: await(fut.cv) + +proc `^`*[T](fut: Future[T]): T = + ## blocks until the value is available and then returns this value. Note + ## this reading is destructive for reasons of efficiency and convenience. + ## This calls ``finished(fut)``. + await(fut) + when T is string or T is seq or T is ref: + result = cast[T](fut.data) + else: + result = fut.payload + finished(fut) + +proc notify*(fut: RawFuture) {.compilerproc.} = + if fut.ai != nil: + acquire(fut.ai.cv.L) + fut.ai.idx = fut.idx + inc fut.ai.cv.counter + release(fut.ai.cv.L) + signal(fut.ai.cv.c) + if fut.usesCondVar: signal(fut.cv) + +proc awaitAny*(futures: openArray[RawFuture]): int = + # awaits any of the given futures. Returns the index of one future for which + ## a value arrived. A future only supports one call to 'awaitAny' at the + ## same time. That means if you await([a,b]) and await([b,c]) the second + ## call will only await 'c'. If there is no future left to be able to wait + ## on, -1 is returned. + var ai: AwaitInfo + ai.cv = createCondVar() + var conflicts = 0 + for i in 0 .. futures.high: + if cas(addr futures[i].ai, nil, addr ai): + futures[i].idx = i + else: + inc conflicts + if conflicts < futures.len: + await(ai.cv) + result = ai.idx + for i in 0 .. futures.high: + discard cas(addr futures[i].ai, addr ai, nil) + else: + result = -1 + destroyCondVar(ai.cv) proc nimArgsPassingDone(p: pointer) {.compilerProc.} = let w = cast[ptr Worker](p) @@ -99,6 +209,7 @@ proc slave(w: ptr Worker) {.thread.} = await(w.taskArrived) assert(not w.ready) w.f(w, w.data) + if w.head != nil: w.cleanFutures if w.shutdown: w.shutdown = false atomicDec currentPoolSize @@ -119,6 +230,7 @@ var proc activateThread(i: int) {.noinline.} = workersData[i].taskArrived = createCondVar() workersData[i].taskStarted = createCondVar() + initLock workersData[i].futureLock workersData[i].initialized = true createThread(workers[i], slave, addr(workersData[i])) diff --git a/lib/system/atomics.nim b/lib/system/atomics.nim index c6c603b19e..96246ba01d 100644 --- a/lib/system/atomics.nim +++ b/lib/system/atomics.nim @@ -209,12 +209,12 @@ when defined(windows) and not defined(gcc): proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32 {.importc: "InterlockedCompareExchange", header: "", cdecl.} - proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool = + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool = interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0 - + # XXX fix for 64 bit build else: # this is valid for GCC and Intel C++ - proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool + proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool {.importc: "__sync_bool_compare_and_swap", nodecl.} # XXX is this valid for 'int'? diff --git a/tests/parallel/tdisjoint_slice1.nim b/tests/parallel/tdisjoint_slice1.nim index 2ca96d6ae2..c1d0e52f8f 100644 --- a/tests/parallel/tdisjoint_slice1.nim +++ b/tests/parallel/tdisjoint_slice1.nim @@ -1,20 +1,20 @@ +discard """ + outputsub: "EVEN 28" +""" import threadpool -proc f(a: openArray[int]) = - for x in a: echo x - -proc f(a: int) = echo a +proc odd(a: int) = echo "ODD ", a +proc even(a: int) = echo "EVEN ", a proc main() = var a: array[0..30, int] + for i in low(a)..high(a): a[i] = i parallel: - #spawn f(a[0..15]) - #spawn f(a[16..30]) var i = 0 while i <= 29: - spawn f(a[i]) - spawn f(a[i+1]) + spawn even(a[i]) + spawn odd(a[i+1]) inc i, 2 # is correct here diff --git a/tests/parallel/tinvalid_array_bounds.nim b/tests/parallel/tinvalid_array_bounds.nim index 337fae7291..4c6065fd6f 100644 --- a/tests/parallel/tinvalid_array_bounds.nim +++ b/tests/parallel/tinvalid_array_bounds.nim @@ -1,5 +1,5 @@ discard """ - errormsg: "cannot prove: i + 1 <= 30" + errormsg: "can prove: i + 1 > 30" line: 21 """ From d2dbcf2fa44aa76c6c7ed2c07641560640e6bc6b Mon Sep 17 00:00:00 2001 From: Araq Date: Fri, 23 May 2014 08:57:16 +0200 Subject: [PATCH 05/13] progress with futures --- compiler/ast.nim | 2 +- compiler/lowerings.nim | 27 ++++++++++++--- compiler/semexprs.nim | 12 +++++++ compiler/semmagic.nim | 11 ------- lib/pure/concurrency/threadpool.nim | 33 +++++++++---------- tests/parallel/tflowvar.nim | 17 ++++++++++ tests/{system => parallel}/tsysspawn.nim | 0 .../{system => parallel}/tsysspawnbadarg.nim | 2 +- 8 files changed, 68 insertions(+), 36 deletions(-) create mode 100644 tests/parallel/tflowvar.nim rename tests/{system => parallel}/tsysspawn.nim (100%) rename tests/{system => parallel}/tsysspawnbadarg.nim (64%) diff --git a/compiler/ast.nim b/compiler/ast.nim index 58b01d5e8a..c47407ee2b 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -605,7 +605,7 @@ const # thus cannot be overloaded (also documented in the spec!): SpecialSemMagics* = { mDefined, mDefinedInScope, mCompiles, mLow, mHigh, mSizeOf, mIs, mOf, - mEcho, mShallowCopy, mExpandToAst, mParallel} + mEcho, mShallowCopy, mExpandToAst, mParallel, mSpawn} type PNode* = ref TNode diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 2a1a8e577a..047bdf832c 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -86,8 +86,14 @@ proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode = # returns a[].b as a node var deref = newNodeI(nkHiddenDeref, info) deref.typ = a.typ.skipTypes(abstractInst).sons[0] - assert deref.typ.kind == tyObject - let field = getSymFromList(deref.typ.n, getIdent(b)) + var t = deref.typ + var field: PSym + while true: + assert t.kind == tyObject + field = getSymFromList(t.n, getIdent(b)) + if field != nil: break + t = t.sons[0] + if t == nil: break assert field != nil, b addSon(deref, a) result = newNodeI(nkDotExpr, info) @@ -124,6 +130,7 @@ proc callCodegenProc*(name: string, arg1: PNode; result.add arg1 if arg2 != nil: result.add arg2 if arg3 != nil: result.add arg3 + result.typ = sym.typ.sons[0] # we have 4 cases to consider: # - a void proc --> nothing to do @@ -152,15 +159,21 @@ discard """ We generate roughly this: proc f_wrapper(args) = + barrierEnter(args.barrier) # for parallel statement var a = args.a # copy strings/seqs; thread transfer; not generated for # the 'parallel' statement var b = args.b - args.fut = createFuture(thread, sizeof(T)) # optional + args.fut = nimCreateFuture(thread, sizeof(T)) # optional + nimFutureCreateCondVar(args.fut) # optional nimArgsPassingDone() # signal parent that the work is done + # args.fut.blob = f(a, b, ...) + nimFutureSignal(args.fut) + # - or - f(a, b, ...) + barrierLeave(args.barrier) # for parallel statement stmtList: var scratchObj @@ -196,8 +209,12 @@ proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) if fut != nil: - body.add newAsgnStmt(indirectAccess(fut, - if fut.typ.futureKind==futGC: "data" else: "blob", fut.info), call) + let fk = fut.typ.sons[1].futureKind + if fk == futInvalid: + localError(f.info, "cannot create a future of type: " & + typeToString(fut.typ.sons[1])) + body.add newAsgnStmt(indirectAccess(fut, + if fk == futGC: "data" else: "blob", fut.info), call) if barrier == nil: body.add callCodeGenProc("nimFutureSignal", fut) else: diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 505c289ea9..4e3d2f3cef 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1579,6 +1579,12 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode = else: result = semDirectOp(c, n, flags) +proc createFuture(c: PContext; t: PType; info: TLineInfo): PType = + result = newType(tyGenericInvokation, c.module) + addSonSkipIntLit(result, magicsys.getCompilerProc("Future").typ) + addSonSkipIntLit(result, t) + result = instGenericContainer(c, info, result, allowMetaTypes = false) + proc setMs(n: PNode, s: PSym): PNode = result = n n.sons[0] = newSymNode(s) @@ -1610,6 +1616,12 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = var x = n.lastSon if x.kind == nkDo: x = x.sons[bodyPos] result.sons[1] = semStmt(c, x) + of mSpawn: + result = setMs(n, s) + result.sons[1] = semExpr(c, n.sons[1]) + # later passes may transform the type 'Future[T]' back into 'T' + if not result[1].typ.isEmptyType: + result.typ = createFuture(c, result[1].typ, n.info) else: result = semDirectOp(c, n, flags) proc semWhen(c: PContext, n: PNode, semCheck = true): PNode = diff --git a/compiler/semmagic.nim b/compiler/semmagic.nim index 3a6bfcf676..f943e70061 100644 --- a/compiler/semmagic.nim +++ b/compiler/semmagic.nim @@ -115,12 +115,6 @@ proc semLocals(c: PContext, n: PNode): PNode = if it.typ.skipTypes({tyGenericInst}).kind == tyVar: a = newDeref(a) result.add(a) -proc createFuture(c: PContext; t: PType; info: TLineInfo): PType = - result = newType(tyGenericInvokation, c.module) - addSonSkipIntLit(result, magicsys.getCompilerProc("Future").typ) - addSonSkipIntLit(result, t) - result = instGenericContainer(c, info, result, allowMetaTypes = false) - proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode proc magicsAfterOverloadResolution(c: PContext, n: PNode, flags: TExprFlags): PNode = @@ -136,9 +130,4 @@ proc magicsAfterOverloadResolution(c: PContext, n: PNode, of mShallowCopy: result = semShallowCopy(c, n, flags) of mNBindSym: result = semBindSym(c, n) of mLocals: result = semLocals(c, n) - of mSpawn: - result = n - # later passes may transform the type 'Future[T]' back into 'T' - if not n[1].typ.isEmptyType: - result.typ = createFuture(c, n[1].typ, n.info) else: result = n diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 583c60c66d..41c1adca03 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -57,7 +57,7 @@ proc openBarrier*(b: ptr Barrier) {.compilerProc.} = b.cv = createCondVar() proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = - await(b.cv) + while b.counter > 0: await(b.cv) destroyCondVar(b.cv) {.pop.} @@ -136,8 +136,13 @@ proc nimFutureCreateCondVar(fut: RawFuture) {.compilerProc.} = fut.usesCondVar = true proc nimFutureSignal(fut: RawFuture) {.compilerProc.} = - assert fut.usesCondVar - signal(fut.cv) + if fut.ai != nil: + acquire(fut.ai.cv.L) + fut.ai.idx = fut.idx + inc fut.ai.cv.counter + release(fut.ai.cv.L) + signal(fut.ai.cv.c) + if fut.usesCondVar: signal(fut.cv) proc await*[T](fut: Future[T]) = ## waits until the value for the future arrives. @@ -147,28 +152,21 @@ proc `^`*[T](fut: Future[T]): T = ## blocks until the value is available and then returns this value. Note ## this reading is destructive for reasons of efficiency and convenience. ## This calls ``finished(fut)``. - await(fut) + if fut.usesCondVar: await(fut) when T is string or T is seq or T is ref: result = cast[T](fut.data) else: - result = fut.payload + result = fut.blob finished(fut) -proc notify*(fut: RawFuture) {.compilerproc.} = - if fut.ai != nil: - acquire(fut.ai.cv.L) - fut.ai.idx = fut.idx - inc fut.ai.cv.counter - release(fut.ai.cv.L) - signal(fut.ai.cv.c) - if fut.usesCondVar: signal(fut.cv) - proc awaitAny*(futures: openArray[RawFuture]): int = # awaits any of the given futures. Returns the index of one future for which ## a value arrived. A future only supports one call to 'awaitAny' at the ## same time. That means if you await([a,b]) and await([b,c]) the second ## call will only await 'c'. If there is no future left to be able to wait ## on, -1 is returned. + ## **Note**: This results in non-deterministic behaviour and so should be + ## avoided. var ai: AwaitInfo ai.cv = createCondVar() var conflicts = 0 @@ -245,19 +243,18 @@ proc preferSpawn*(): bool = ## it is not necessary to call this directly; use 'spawnX' instead. result = gSomeReady.counter > 0 -proc spawn*(call: stmt) {.magic: "Spawn".} +proc spawn*(call: expr): expr {.magic: "Spawn".} ## always spawns a new task, so that the 'call' is never executed on ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' ## is gcsafe and has 'void' as the return type. -template spawnX*(call: stmt) = +template spawnX*(call: expr): expr = ## spawns a new task if a CPU core is ready, otherwise executes the ## call in the calling thread. Usually it is advised to ## use 'spawn' in order to not block the producer for an unknown ## amount of time. 'call' has to be proc call 'p(...)' where 'p' ## is gcsafe and has 'void' as the return type. - if preferSpawn(): spawn call - else: call + (if preferSpawn(): spawn call else: call) proc parallel*(body: stmt) {.magic: "Parallel".} ## a parallel section can be used to execute a block in parallel. ``body`` diff --git a/tests/parallel/tflowvar.nim b/tests/parallel/tflowvar.nim new file mode 100644 index 0000000000..77fab14b5c --- /dev/null +++ b/tests/parallel/tflowvar.nim @@ -0,0 +1,17 @@ +discard """ + output: '''foobarfoobarbazbearbazbear''' + cmd: "nimrod $target --threads:on $options $file" +""" + +import threadpool + +proc computeSomething(a, b: string): string = a & b & a & b + +proc main = + let fvA = spawn computeSomething("foo", "bar") + let fvB = spawn computeSomething("baz", "bear") + + echo(^fvA, ^fvB) + +main() +sync() diff --git a/tests/system/tsysspawn.nim b/tests/parallel/tsysspawn.nim similarity index 100% rename from tests/system/tsysspawn.nim rename to tests/parallel/tsysspawn.nim diff --git a/tests/system/tsysspawnbadarg.nim b/tests/parallel/tsysspawnbadarg.nim similarity index 64% rename from tests/system/tsysspawnbadarg.nim rename to tests/parallel/tsysspawnbadarg.nim index ce3c5611b5..120975ed54 100644 --- a/tests/system/tsysspawnbadarg.nim +++ b/tests/parallel/tsysspawnbadarg.nim @@ -1,6 +1,6 @@ discard """ line: 7 - errormsg: "'spawn' takes a call expression of type void" + errormsg: "'spawn' takes a call expression" cmd: "nimrod $target --threads:on $options $file" """ From 030eac86c05427792d3c3c00b56fbe764d783a40 Mon Sep 17 00:00:00 2001 From: Araq Date: Sun, 25 May 2014 15:19:46 +0200 Subject: [PATCH 06/13] bugfix: regionized pointers in a generic context; renamed 'Future' to 'Promise' --- compiler/ast.nim | 2 + compiler/lowerings.nim | 88 +++++++++---------- compiler/semexprs.nim | 8 +- compiler/semtypes.nim | 8 +- lib/pure/concurrency/threadpool.nim | 132 ++++++++++++++++------------ lib/system.nim | 4 +- lib/system/assign.nim | 3 +- 7 files changed, 137 insertions(+), 108 deletions(-) diff --git a/compiler/ast.nim b/compiler/ast.nim index c47407ee2b..c3cb63df46 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -885,6 +885,8 @@ const nkCallKinds* = {nkCall, nkInfix, nkPrefix, nkPostfix, nkCommand, nkCallStrLit, nkHiddenCallConv} + nkIdentKinds* = {nkIdent, nkSym, nkAccQuoted, nkOpenSymChoice, + nkClosedSymChoice} nkLiterals* = {nkCharLit..nkTripleStrLit} nkLambdaKinds* = {nkLambda, nkDo} diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 047bdf832c..13d4bf60ec 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -134,26 +134,26 @@ proc callCodegenProc*(name: string, arg1: PNode; # we have 4 cases to consider: # - a void proc --> nothing to do -# - a proc returning GC'ed memory --> requires a future +# - a proc returning GC'ed memory --> requires a promise # - a proc returning non GC'ed memory --> pass as hidden 'var' parameter -# - not in a parallel environment --> requires a future for memory safety +# - not in a parallel environment --> requires a promise for memory safety type TSpawnResult = enum - srVoid, srFuture, srByVar - TFutureKind = enum - futInvalid # invalid type T for 'Future[T]' - futGC # Future of a GC'ed type - futBlob # Future of a blob type + srVoid, srPromise, srByVar + TPromiseKind = enum + promInvalid # invalid type T for 'Promise[T]' + promGC # Promise of a GC'ed type + promBlob # Promise of a blob type proc spawnResult(t: PType; inParallel: bool): TSpawnResult = if t.isEmptyType: srVoid elif inParallel and not containsGarbageCollectedRef(t): srByVar - else: srFuture + else: srPromise -proc futureKind(t: PType): TFutureKind = - if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: futGC - elif containsGarbageCollectedRef(t): futInvalid - else: futBlob +proc promiseKind(t: PType): TPromiseKind = + if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: promGC + elif containsGarbageCollectedRef(t): promInvalid + else: promBlob discard """ We generate roughly this: @@ -164,12 +164,12 @@ proc f_wrapper(args) = # the 'parallel' statement var b = args.b - args.fut = nimCreateFuture(thread, sizeof(T)) # optional - nimFutureCreateCondVar(args.fut) # optional + args.prom = nimCreatePromise(thread, sizeof(T)) # optional + nimPromiseCreateCondVar(args.prom) # optional nimArgsPassingDone() # signal parent that the work is done # - args.fut.blob = f(a, b, ...) - nimFutureSignal(args.fut) + args.prom.blob = f(a, b, ...) + nimPromiseSignal(args.prom) # - or - f(a, b, ...) @@ -181,42 +181,42 @@ stmtList: scratchObj.b = b nimSpawn(f_wrapper, addr scratchObj) - scratchObj.fut # optional + scratchObj.prom # optional """ -proc createNimCreateFutureCall(fut, threadParam: PNode): PNode = - let size = newNodeIT(nkCall, fut.info, getSysType(tyInt)) +proc createNimCreatePromiseCall(prom, threadParam: PNode): PNode = + let size = newNodeIT(nkCall, prom.info, getSysType(tyInt)) size.add newSymNode(createMagic("sizeof", mSizeOf)) - assert fut.typ.kind == tyGenericInst - size.add newNodeIT(nkType, fut.info, fut.typ.sons[1]) + assert prom.typ.kind == tyGenericInst + size.add newNodeIT(nkType, prom.info, prom.typ.sons[1]) - let castExpr = newNodeIT(nkCast, fut.info, fut.typ) + let castExpr = newNodeIT(nkCast, prom.info, prom.typ) castExpr.add emptyNode - castExpr.add callCodeGenProc("nimCreateFuture", threadParam, size) - result = newFastAsgnStmt(fut, castExpr) + castExpr.add callCodeGenProc("nimCreatePromise", threadParam, size) + result = newFastAsgnStmt(prom, castExpr) proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call, barrier, fut: PNode): PSym = + varSection, call, barrier, prom: PNode): PSym = var body = newNodeI(nkStmtList, f.info) body.add varSection if barrier != nil: body.add callCodeGenProc("barrierEnter", barrier) - if fut != nil: - body.add createNimCreateFutureCall(fut, threadParam.newSymNode) + if prom != nil: + body.add createNimCreatePromiseCall(prom, threadParam.newSymNode) if barrier == nil: - body.add callCodeGenProc("nimFutureCreateCondVar", fut) + body.add callCodeGenProc("nimPromiseCreateCondVar", prom) body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) - if fut != nil: - let fk = fut.typ.sons[1].futureKind - if fk == futInvalid: - localError(f.info, "cannot create a future of type: " & - typeToString(fut.typ.sons[1])) - body.add newAsgnStmt(indirectAccess(fut, - if fk == futGC: "data" else: "blob", fut.info), call) + if prom != nil: + let fk = prom.typ.sons[1].promiseKind + if fk == promInvalid: + localError(f.info, "cannot create a promise of type: " & + typeToString(prom.typ.sons[1])) + body.add newAsgnStmt(indirectAccess(prom, + if fk == promGC: "data" else: "blob", prom.info), call) if barrier == nil: - body.add callCodeGenProc("nimFutureSignal", fut) + body.add callCodeGenProc("nimPromiseSignal", prom) else: body.add call if barrier != nil: @@ -381,7 +381,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; of srVoid: internalAssert dest == nil result = newNodeI(nkStmtList, n.info) - of srFuture: + of srPromise: internalAssert dest == nil result = newNodeIT(nkStmtListExpr, n.info, retType) of srByVar: @@ -450,17 +450,17 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier) barrierAsExpr = indirectAccess(castExpr, field, n.info) - var futField, futAsExpr: PNode = nil - if spawnKind == srFuture: - var field = newSym(skField, getIdent"fut", owner, n.info) + var promField, promAsExpr: PNode = nil + if spawnKind == srPromise: + var field = newSym(skField, getIdent"prom", owner, n.info) field.typ = retType objType.addField(field) - futField = newDotExpr(scratchObj, field) - futAsExpr = indirectAccess(castExpr, field, n.info) + promField = newDotExpr(scratchObj, field) + promAsExpr = indirectAccess(castExpr, field, n.info) let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, - barrierAsExpr, futAsExpr) + barrierAsExpr, promAsExpr) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) - if spawnKind == srFuture: result.add futField + if spawnKind == srPromise: result.add promField diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 4e3d2f3cef..8f4cce547a 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1579,9 +1579,9 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode = else: result = semDirectOp(c, n, flags) -proc createFuture(c: PContext; t: PType; info: TLineInfo): PType = +proc createPromise(c: PContext; t: PType; info: TLineInfo): PType = result = newType(tyGenericInvokation, c.module) - addSonSkipIntLit(result, magicsys.getCompilerProc("Future").typ) + addSonSkipIntLit(result, magicsys.getCompilerProc("Promise").typ) addSonSkipIntLit(result, t) result = instGenericContainer(c, info, result, allowMetaTypes = false) @@ -1619,9 +1619,9 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = of mSpawn: result = setMs(n, s) result.sons[1] = semExpr(c, n.sons[1]) - # later passes may transform the type 'Future[T]' back into 'T' + # later passes may transform the type 'Promise[T]' back into 'T' if not result[1].typ.isEmptyType: - result.typ = createFuture(c, result[1].typ, n.info) + result.typ = createPromise(c, result[1].typ, n.info) else: result = semDirectOp(c, n, flags) proc semWhen(c: PContext, n: PNode, semCheck = true): PNode = diff --git a/compiler/semtypes.nim b/compiler/semtypes.nim index 8fcb6ea997..bb81cbe749 100644 --- a/compiler/semtypes.nim +++ b/compiler/semtypes.nim @@ -1084,8 +1084,10 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType = of nkCallKinds: if isRange(n): result = semRangeAux(c, n, prev) - elif n[0].kind == nkIdent: - let op = n.sons[0].ident + elif n[0].kind notin nkIdentKinds: + result = semTypeExpr(c, n) + else: + let op = considerAcc(n.sons[0]) if op.id in {ord(wAnd), ord(wOr)} or op.s == "|": checkSonsLen(n, 3) var @@ -1120,8 +1122,6 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType = result = semAnyRef(c, n, tyRef, prev) else: result = semTypeExpr(c, n) - else: - result = semTypeExpr(c, n) of nkWhenStmt: var whenResult = semWhen(c, n, false) if whenResult.kind == nkStmtList: whenResult.kind = nkStmtListType diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 41c1adca03..24cb9ccdd2 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -65,12 +65,14 @@ proc closeBarrier*(b: ptr Barrier) {.compilerProc.} = # ---------------------------------------------------------------------------- type + foreign* = object ## a region that indicates the pointer comes from a + ## foreign thread heap. AwaitInfo = object cv: CondVar idx: int - RawFuture* = ptr RawFutureObj ## untyped base class for 'Future[T]' - RawFutureObj {.inheritable.} = object # \ + RawPromise* = ptr RawPromiseObj ## untyped base class for 'Promise[T]' + RawPromiseObj {.inheritable.} = object # \ # we allocate this with the thread local allocator; this # is possible since we already need to do the GC_unref # on the owning thread @@ -81,10 +83,10 @@ type idx: int data: PObject # we incRef and unref it to keep it alive owner: ptr Worker - next: RawFuture + next: RawPromise align: float64 # a float for proper alignment - Future* {.compilerProc.} [T] = ptr object of RawFutureObj + Promise* {.compilerProc.} [T] = ptr object of RawPromiseObj blob: T ## the underlying value, if available. Note that usually ## you should not access this field directly! However it can ## sometimes be more efficient than getting the value via ``^``. @@ -99,24 +101,24 @@ type ready: bool # put it here for correct alignment! initialized: bool # whether it has even been initialized shutdown: bool # the pool requests to shut down this worker thread - futureLock: TLock - head: RawFuture + promiseLock: TLock + head: RawPromise -proc finished*(fut: RawFuture) = - ## This MUST be called for every created future to free its associated +proc finished*(prom: RawPromise) = + ## This MUST be called for every created promise to free its associated ## resources. Note that the default reading operation ``^`` is destructive ## and calls ``finished``. - doAssert fut.ai.isNil, "future is still attached to an 'awaitAny'" - assert fut.next == nil - let w = fut.owner - acquire(w.futureLock) - fut.next = w.head - w.head = fut - release(w.futureLock) + doAssert prom.ai.isNil, "promise is still attached to an 'awaitAny'" + assert prom.next == nil + let w = prom.owner + acquire(w.promiseLock) + prom.next = w.head + w.head = prom + release(w.promiseLock) -proc cleanFutures(w: ptr Worker) = +proc cleanPromises(w: ptr Worker) = var it = w.head - acquire(w.futureLock) + acquire(w.promiseLock) while it != nil: let nxt = it.next if it.usesCondVar: destroyCondVar(it.cv) @@ -124,62 +126,84 @@ proc cleanFutures(w: ptr Worker) = dealloc(it) it = nxt w.head = nil - release(w.futureLock) + release(w.promiseLock) -proc nimCreateFuture(owner: pointer; blobSize: int): RawFuture {. +proc nimCreatePromise(owner: pointer; blobSize: int): RawPromise {. compilerProc.} = - result = cast[RawFuture](alloc0(RawFutureObj.sizeof + blobSize)) + result = cast[RawPromise](alloc0(RawPromiseObj.sizeof + blobSize)) result.owner = cast[ptr Worker](owner) -proc nimFutureCreateCondVar(fut: RawFuture) {.compilerProc.} = - fut.cv = createCondVar() - fut.usesCondVar = true +proc nimPromiseCreateCondVar(prom: RawPromise) {.compilerProc.} = + prom.cv = createCondVar() + prom.usesCondVar = true -proc nimFutureSignal(fut: RawFuture) {.compilerProc.} = - if fut.ai != nil: - acquire(fut.ai.cv.L) - fut.ai.idx = fut.idx - inc fut.ai.cv.counter - release(fut.ai.cv.L) - signal(fut.ai.cv.c) - if fut.usesCondVar: signal(fut.cv) +proc nimPromiseSignal(prom: RawPromise) {.compilerProc.} = + if prom.ai != nil: + acquire(prom.ai.cv.L) + prom.ai.idx = prom.idx + inc prom.ai.cv.counter + release(prom.ai.cv.L) + signal(prom.ai.cv.c) + if prom.usesCondVar: signal(prom.cv) -proc await*[T](fut: Future[T]) = - ## waits until the value for the future arrives. - if fut.usesCondVar: await(fut.cv) +proc await*[T](prom: Promise[T]) = + ## waits until the value for the promise arrives. + if prom.usesCondVar: await(prom.cv) -proc `^`*[T](fut: Future[T]): T = +proc awaitAndThen*[T](prom: Promise[T]; action: proc (x: T) {.closure.}) = + ## blocks until the value is available and then passes this value + ## to ``action``. Note that due to Nimrod's parameter passing semantics this + ## means that ``T`` doesn't need to be copied and so ``awaitAndThen`` can + ## sometimes be more efficient than ``^``. + if prom.usesCondVar: await(prom) + when T is string or T is seq: + action(cast[T](prom.data)) + elif T is ref: + {.error: "'awaitAndThen' not available for Promise[ref]".} + else: + action(prom.blob) + finished(prom) + +proc `^`*[T](prom: Promise[ref T]): foreign ptr T = ## blocks until the value is available and then returns this value. Note ## this reading is destructive for reasons of efficiency and convenience. - ## This calls ``finished(fut)``. - if fut.usesCondVar: await(fut) - when T is string or T is seq or T is ref: - result = cast[T](fut.data) - else: - result = fut.blob - finished(fut) + ## This calls ``finished(prom)``. + if prom.usesCondVar: await(prom) + result = cast[foreign ptr T](prom.data) + finished(prom) -proc awaitAny*(futures: openArray[RawFuture]): int = - # awaits any of the given futures. Returns the index of one future for which - ## a value arrived. A future only supports one call to 'awaitAny' at the +proc `^`*[T](prom: Promise[T]): T = + ## blocks until the value is available and then returns this value. Note + ## this reading is destructive for reasons of efficiency and convenience. + ## This calls ``finished(prom)``. + if prom.usesCondVar: await(prom) + when T is string or T is seq: + result = cast[T](prom.data) + else: + result = prom.blob + finished(prom) + +proc awaitAny*(promises: openArray[RawPromise]): int = + # awaits any of the given promises. Returns the index of one promise for which + ## a value arrived. A promise only supports one call to 'awaitAny' at the ## same time. That means if you await([a,b]) and await([b,c]) the second - ## call will only await 'c'. If there is no future left to be able to wait + ## call will only await 'c'. If there is no promise left to be able to wait ## on, -1 is returned. ## **Note**: This results in non-deterministic behaviour and so should be ## avoided. var ai: AwaitInfo ai.cv = createCondVar() var conflicts = 0 - for i in 0 .. futures.high: - if cas(addr futures[i].ai, nil, addr ai): - futures[i].idx = i + for i in 0 .. promises.high: + if cas(addr promises[i].ai, nil, addr ai): + promises[i].idx = i else: inc conflicts - if conflicts < futures.len: + if conflicts < promises.len: await(ai.cv) result = ai.idx - for i in 0 .. futures.high: - discard cas(addr futures[i].ai, addr ai, nil) + for i in 0 .. promises.high: + discard cas(addr promises[i].ai, addr ai, nil) else: result = -1 destroyCondVar(ai.cv) @@ -207,7 +231,7 @@ proc slave(w: ptr Worker) {.thread.} = await(w.taskArrived) assert(not w.ready) w.f(w, w.data) - if w.head != nil: w.cleanFutures + if w.head != nil: w.cleanPromises if w.shutdown: w.shutdown = false atomicDec currentPoolSize @@ -228,7 +252,7 @@ var proc activateThread(i: int) {.noinline.} = workersData[i].taskArrived = createCondVar() workersData[i].taskStarted = createCondVar() - initLock workersData[i].futureLock + initLock workersData[i].promiseLock workersData[i].initialized = true createThread(workers[i], slave, addr(workersData[i])) diff --git a/lib/system.nim b/lib/system.nim index fbd905afab..fc6f617a59 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -42,7 +42,6 @@ type cstring* {.magic: Cstring.} ## built-in cstring (*compatible string*) type pointer* {.magic: Pointer.} ## built-in pointer type, use the ``addr`` ## operator to get a pointer to a variable - const on* = true ## alias for ``true`` off* = false ## alias for ``false`` @@ -51,6 +50,9 @@ const type Ordinal* {.magic: Ordinal.}[T] + `ptr`* {.magic: Pointer.}[T] ## built-in generic untraced pointer type + `ref`* {.magic: Pointer.}[T] ## built-in generic traced pointer type + `nil` {.magic: "Nil".} expr* {.magic: Expr.} ## meta type to denote an expression (for templates) stmt* {.magic: Stmt.} ## meta type to denote a statement (for templates) diff --git a/lib/system/assign.nim b/lib/system/assign.nim index 75c7496331..2ae945fb1c 100644 --- a/lib/system/assign.nim +++ b/lib/system/assign.nim @@ -179,7 +179,8 @@ when not defined(nimmixin): # internal proc used for destroying sequences and arrays for i in countup(0, r.len - 1): destroy(r[i]) else: - # XXX Why is this exported and no compilerproc? + # XXX Why is this exported and no compilerproc? -> compilerprocs cannot be + # generic for now proc nimDestroyRange*[T](r: T) = # internal proc used for destroying sequences and arrays mixin destroy From f12a0820e0e7e5c32378bb56b8d0d2591fc71ae5 Mon Sep 17 00:00:00 2001 From: Araq Date: Thu, 29 May 2014 13:19:26 +0200 Subject: [PATCH 07/13] added 'sortoutput' option to make output deterministic for threading tests --- tests/testament/specs.nim | 4 +++- tests/testament/tester.nim | 12 ++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/testament/specs.nim b/tests/testament/specs.nim index 225ea18910..6e72f4b5e3 100644 --- a/tests/testament/specs.nim +++ b/tests/testament/specs.nim @@ -46,7 +46,7 @@ type msg*: string ccodeCheck*: string err*: TResultEnum - substr*: bool + substr*, sortoutput*: bool targets*: set[TTarget] const @@ -113,6 +113,8 @@ proc parseSpec*(filename: string): TSpec = result.action = actionRun result.outp = e.value result.substr = true + of "sortoutput": + result.sortoutput = parseCfgBool(e.value) of "exitcode": discard parseInt(e.value, result.exitCode) of "msg": diff --git a/tests/testament/tester.nim b/tests/testament/tester.nim index 50d0e6eac9..adf9785e0b 100644 --- a/tests/testament/tester.nim +++ b/tests/testament/tester.nim @@ -11,7 +11,8 @@ import parseutils, strutils, pegs, os, osproc, streams, parsecfg, json, - marshal, backend, parseopt, specs, htmlgen, browsers, terminal + marshal, backend, parseopt, specs, htmlgen, browsers, terminal, sequtils, + algorithm const resultsFile = "testresults.html" @@ -150,6 +151,11 @@ proc codegenCheck(test: TTest, check: string, given: var TSpec) = except EIO: given.err = reCodeNotFound +proc makeDeterministic(s: string): string = + var x = toSeq(s.lines) + sort(x, system.cmp) + result = join(x, "\n") + proc testSpec(r: var TResults, test: TTest) = # major entry point for a single test let tname = test.name.addFileExt(".nim") @@ -191,7 +197,9 @@ proc testSpec(r: var TResults, test: TTest) = r.addResult(test, "exitcode: " & $expected.exitCode, "exitcode: " & $exitCode, reExitCodesDiffer) else: - if strip(buf.string) != strip(expected.outp): + var bufB = strip(buf.string) + if expected.sortoutput: bufB = makeDeterministic(bufB) + if bufB != strip(expected.outp): if not (expected.substr and expected.outp in buf.string): given.err = reOutputsDiffer if given.err == reSuccess: From 6470bd8f87b860c555556a2a965f6c8077e993ad Mon Sep 17 00:00:00 2001 From: Araq Date: Thu, 29 May 2014 13:27:45 +0200 Subject: [PATCH 08/13] 'parallel' proves array bounds --- compiler/guards.nim | 67 ++++++++++++++++++++++++++++++++++--- compiler/semparallel.nim | 30 +++++++++++------ tests/parallel/tforstmt.nim | 24 +++++++++++++ 3 files changed, 107 insertions(+), 14 deletions(-) create mode 100644 tests/parallel/tforstmt.nim diff --git a/compiler/guards.nim b/compiler/guards.nim index 3df3bd1a81..ec5adb4da4 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -246,6 +246,7 @@ proc canon*(n: PNode): PNode = result.sons[0] = opLen.newSymNode else: discard + result = skipConv(result) result = reassociation(result) # most important rule: (x-4) < a.len --> x < a.len+4 case result.getMagic @@ -672,7 +673,9 @@ proc simpleSlice*(a, b: PNode): BiggestInt = else: result = -1 -proc ple(m: TModel; a, b: PNode): TImplication = +proc pleViaModel(model: TModel; aa, bb: PNode): TImplication + +proc ple(m: TModel; a, b: PNode): TImplication = template `<=?`(a,b): expr = ple(m,a,b) == impYes # 0 <= 3 if a.isValue and b.isValue: @@ -717,12 +720,68 @@ proc ple(m: TModel; a, b: PNode): TImplication = if a[1] <=? b or a[2] <=? b: return impYes # use the knowledge base: - return doesImply(m, opLe.buildCall(a, b)) + return pleViaModel(m, a, b) + #return doesImply(m, opLe.buildCall(a, b)) + +type TReplacements = seq[tuple[a,b: PNode]] + +proc replaceSubTree(n, x, by: PNode): PNode = + if sameTree(n, x): + result = by + elif hasSubTree(n, x): + result = shallowCopy(n) + for i in 0 .. safeLen(n)-1: + result.sons[i] = replaceSubTree(n.sons[i], x, by) + else: + result = n + +proc applyReplacements(n: PNode; rep: TReplacements): PNode = + result = n + for x in rep: result = result.replaceSubTree(x.a, x.b) + +proc pleViaModelRec(m: var TModel; a, b: PNode): TImplication = + # now check for inferrable facts: a <= b and b <= c implies a <= c + for i in 0..m.high: + let fact = m[i] + if fact != nil and fact.getMagic in someLe: + # x <= y implies a <= b if a <= x and y <= b + let x = fact[1] + let y = fact[2] + # mark as used: + m[i] = nil + if ple(m, a, x) == impYes: + if ple(m, y, b) == impYes: return impYes + #if pleViaModelRec(m, y, b): return impYes + +proc pleViaModel(model: TModel; aa, bb: PNode): TImplication = + # compute replacements: + var replacements: TReplacements = @[] + for fact in model: + if fact != nil and fact.getMagic in someEq: + let a = fact[1] + let b = fact[2] + if a.kind == nkSym: replacements.add((a,b)) + else: replacements.add((b,a)) + var m: TModel + var a = aa + var b = bb + if replacements.len > 0: + m = @[] + # make the other facts consistent: + for fact in model: + if fact != nil and fact.getMagic notin someEq: + # XXX 'canon' should not be necessary here, but it is + m.add applyReplacements(fact, replacements).canon + a = applyReplacements(aa, replacements) + b = applyReplacements(bb, replacements) + else: + # we have to make a copy here, because the model will be modified: + m = model + result = pleViaModelRec(m, a, b) proc proveLe*(m: TModel; a, b: PNode): TImplication = - #echo "ROOT ", renderTree(a), " <=? ", b.rendertree let x = canon(opLe.buildCall(a, b)) - #echo renderTree(res) + #echo "ROOT ", renderTree(x[1]), " <=? ", renderTree(x[2]) result = ple(m, x[1], x[2]) if result == impUnknown: # try an alternative: a <= b iff not (b < a) iff not (b+1 <= a): diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim index b135420382..678ffd2fbb 100644 --- a/compiler/semparallel.nim +++ b/compiler/semparallel.nim @@ -59,7 +59,8 @@ type TDirection = enum ascending, descending MonotonicVar = object - v: PSym + v, alias: PSym # to support the ordinary 'countup' iterator + # we need to detect aliases lower, upper, stride: PNode dir: TDirection blacklisted: bool # blacklisted variables that are not monotonic @@ -83,7 +84,7 @@ proc initAnalysisCtx(): AnalysisCtx = proc lookupSlot(c: AnalysisCtx; s: PSym): int = for i in 0.. 1: - addFact(c.guards, branch.sons[0]) + addFact(c.guards, canon(branch.sons[0])) for i in 0 .. Date: Fri, 30 May 2014 13:15:54 +0200 Subject: [PATCH 09/13] correct code generation for tforstmt --- compiler/lowerings.nim | 54 +++++++++++++++++++++++-------------- tests/parallel/tforstmt.nim | 5 ++-- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 13d4bf60ec..5636d423f2 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -160,8 +160,8 @@ We generate roughly this: proc f_wrapper(args) = barrierEnter(args.barrier) # for parallel statement - var a = args.a # copy strings/seqs; thread transfer; not generated for - # the 'parallel' statement + var a = args.a # thread transfer; deepCopy or shallowCopy or no copy + # depending on whether we're in a 'parallel' statement var b = args.b args.prom = nimCreatePromise(thread, sizeof(T)) # optional @@ -199,9 +199,9 @@ proc createNimCreatePromiseCall(prom, threadParam: PNode): PNode = proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; varSection, call, barrier, prom: PNode): PSym = var body = newNodeI(nkStmtList, f.info) - body.add varSection if barrier != nil: body.add callCodeGenProc("barrierEnter", barrier) + body.add varSection if prom != nil: body.add createNimCreatePromiseCall(prom, threadParam.newSymNode) if barrier == nil: @@ -248,6 +248,17 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) +proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym = + result = newSym(skTemp, getIdent(genPrefix), owner, varSection.info) + result.typ = typ + incl(result.flags, sfFromGeneric) + + var vpart = newNodeI(nkIdentDefs, varSection.info, 3) + vpart.sons[0] = newSymNode(result) + vpart.sons[1] = ast.emptyNode + vpart.sons[2] = v + varSection.add vpart + proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, castExpr, call, varSection, result: PNode) = let formals = n[0].typ.n @@ -267,16 +278,8 @@ proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, objType.addField(field) result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i]) - var temp = newSym(skTemp, tmpName, objType.owner, n.info) - temp.typ = argType - incl(temp.flags, sfFromGeneric) - - var vpart = newNodeI(nkIdentDefs, n.info, 3) - vpart.sons[0] = newSymNode(temp) - vpart.sons[1] = ast.emptyNode - vpart.sons[2] = indirectAccess(castExpr, field, n.info) - varSection.add vpart - + let temp = addLocalVar(varSection, objType.owner, argType, + indirectAccess(castExpr, field, n.info)) call.add(newSymNode(temp)) proc getRoot*(n: PNode): PSym = @@ -310,9 +313,11 @@ proc genHigh(n: PNode): PNode = result.sons[1] = n proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym; - castExpr, call, result: PNode) = + castExpr, call, varSection, result: PNode) = let formals = n[0].typ.n let tmpName = getIdent(genPrefix) + # we need to copy the foreign scratch object fields into local variables + # for correctness: These are called 'threadLocal' here. for i in 1 .. 16) and n.getRoot != nil: @@ -364,13 +374,17 @@ proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym; field.typ = a.typ objType.addField(field) result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) - call.add(genDeref(indirectAccess(castExpr, field, n.info))) + let threadLocal = addLocalVar(varSection, objType.owner, field.typ, + indirectAccess(castExpr, field, n.info)) + call.add(genDeref(threadLocal.newSymNode)) else: # boring case field.typ = argType objType.addField(field) result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n) - call.add(indirectAccess(castExpr, field, n.info)) + let threadLocal = addLocalVar(varSection, objType.owner, field.typ, + indirectAccess(castExpr, field, n.info)) + call.add(threadLocal.newSymNode) proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; barrier, dest: PNode = nil): PNode = @@ -438,7 +452,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; if barrier.isNil: setupArgsForConcurrency(n, objType, scratchObj, castExpr, call, varSection, result) else: - setupArgsForParallelism(n, objType, scratchObj, castExpr, call, result) + setupArgsForParallelism(n, objType, scratchObj, castExpr, call, varSection, result) var barrierAsExpr: PNode = nil if barrier != nil: diff --git a/tests/parallel/tforstmt.nim b/tests/parallel/tforstmt.nim index 35f28759e4..58de833f3e 100644 --- a/tests/parallel/tforstmt.nim +++ b/tests/parallel/tforstmt.nim @@ -7,14 +7,15 @@ discard """ sortoutput: true """ -import threadpool, math +import threadpool, os proc p(x: int) = + os.sleep(100 - x*10) echo x proc testFor(a, b: int; foo: var openArray[int]) = parallel: - for i in max(a, 0) .. min(b, foo.len-1): + for i in max(a, 0) .. min(b, foo.high): spawn p(foo[i]) var arr = [0, 1, 2, 3, 4, 5, 6, 7] From bea1761da1195acb883b34105ec9a834f2a10c2e Mon Sep 17 00:00:00 2001 From: Araq Date: Fri, 30 May 2014 17:04:39 +0200 Subject: [PATCH 10/13] tester works again --- tests/parallel/tsysspawnbadarg.nim | 4 ++-- tests/testament/tester.nim | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/parallel/tsysspawnbadarg.nim b/tests/parallel/tsysspawnbadarg.nim index 120975ed54..ad798a7d35 100644 --- a/tests/parallel/tsysspawnbadarg.nim +++ b/tests/parallel/tsysspawnbadarg.nim @@ -1,9 +1,9 @@ discard """ - line: 7 + line: 9 errormsg: "'spawn' takes a call expression" cmd: "nimrod $target --threads:on $options $file" """ import threadpool -spawn(1) +let foo = spawn(1) diff --git a/tests/testament/tester.nim b/tests/testament/tester.nim index adf9785e0b..fc6b4ff95c 100644 --- a/tests/testament/tester.nim +++ b/tests/testament/tester.nim @@ -11,7 +11,7 @@ import parseutils, strutils, pegs, os, osproc, streams, parsecfg, json, - marshal, backend, parseopt, specs, htmlgen, browsers, terminal, sequtils, + marshal, backend, parseopt, specs, htmlgen, browsers, terminal, algorithm const @@ -152,7 +152,7 @@ proc codegenCheck(test: TTest, check: string, given: var TSpec) = given.err = reCodeNotFound proc makeDeterministic(s: string): string = - var x = toSeq(s.lines) + var x = splitLines(s) sort(x, system.cmp) result = join(x, "\n") @@ -200,7 +200,7 @@ proc testSpec(r: var TResults, test: TTest) = var bufB = strip(buf.string) if expected.sortoutput: bufB = makeDeterministic(bufB) if bufB != strip(expected.outp): - if not (expected.substr and expected.outp in buf.string): + if not (expected.substr and expected.outp in bufB): given.err = reOutputsDiffer if given.err == reSuccess: codeGenCheck(test, expected.ccodeCheck, given) From 9953e0bbca92d81e41a5ca39981b02596027f236 Mon Sep 17 00:00:00 2001 From: Araq Date: Sat, 31 May 2014 01:16:16 +0200 Subject: [PATCH 11/13] tdisjoint_slice2 works --- compiler/ccgcalls.nim | 2 +- compiler/guards.nim | 8 ++++++++ compiler/lowerings.nim | 2 +- compiler/semparallel.nim | 2 +- tests/parallel/tdisjoint_slice2.nim | 20 ++++++++++++++++---- 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim index a7840305dd..71e23aa1dd 100644 --- a/compiler/ccgcalls.nim +++ b/compiler/ccgcalls.nim @@ -86,7 +86,7 @@ proc openArrayLoc(p: BProc, n: PNode): PRope = initLocExpr(p, q[2], b) initLocExpr(p, q[3], c) let fmt = - case skipTypes(a.t, abstractVar).kind + case skipTypes(a.t, abstractVar+{tyPtr}).kind of tyOpenArray, tyVarargs, tyArray, tyArrayConstr: "($1)+($2), ($3)-($2)+1" of tyString, tySequence: diff --git a/compiler/guards.nim b/compiler/guards.nim index ec5adb4da4..813a300146 100644 --- a/compiler/guards.nim +++ b/compiler/guards.nim @@ -752,6 +752,14 @@ proc pleViaModelRec(m: var TModel; a, b: PNode): TImplication = if ple(m, a, x) == impYes: if ple(m, y, b) == impYes: return impYes #if pleViaModelRec(m, y, b): return impYes + # fact: 16 <= i + # x y + # question: i <= 15? no! + result = impliesLe(fact, a, b) + if result != impUnknown: return result + if sameTree(y, a): + result = ple(m, x, b) + if result != impUnknown: return result proc pleViaModel(model: TModel; aa, bb: PNode): TImplication = # compute replacements: diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index 5636d423f2..b159502dc6 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -338,7 +338,7 @@ proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym; objType.addField(fieldB) if getMagic(n) == mSlice: - let a = genAddrOf(n[0]) + let a = genAddrOf(n[1]) field.typ = a.typ objType.addField(field) result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a) diff --git a/compiler/semparallel.nim b/compiler/semparallel.nim index 678ffd2fbb..72def1137b 100644 --- a/compiler/semparallel.nim +++ b/compiler/semparallel.nim @@ -166,7 +166,7 @@ proc overlap(m: TModel; x,y,c,d: PNode) = of impUnknown: localError(x.info, "cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" % - [?y, ?d, ?x, ?y, ?c, ?d]) + [?c, ?y, ?x, ?y, ?c, ?d]) of impYes: localError(x.info, "($#)..($#) not disjoint from ($#)..($#)" % [?x, ?y, ?c, ?d]) of impNo: discard diff --git a/tests/parallel/tdisjoint_slice2.nim b/tests/parallel/tdisjoint_slice2.nim index b26559fc21..1e86ea644a 100644 --- a/tests/parallel/tdisjoint_slice2.nim +++ b/tests/parallel/tdisjoint_slice2.nim @@ -1,3 +1,15 @@ +discard """ + output: '''0 +1 +2 +3 +4 +5 +6 +7 +8''' + sortoutput: true +""" import threadpool @@ -7,12 +19,12 @@ proc f(a: openArray[int]) = proc f(a: int) = echo a proc main() = - var a: array[0..30, int] + var a: array[0..9, int] = [0,1,2,3,4,5,6,7,8,9] parallel: - spawn f(a[0..15]) + spawn f(a[0..2]) #spawn f(a[16..30]) - var i = 16 - while i <= 29: + var i = 3 + while i <= 8: spawn f(a[i]) spawn f(a[i+1]) inc i, 2 From 40baebebfe425f03fbdd41da4c6d2e4c6778d241 Mon Sep 17 00:00:00 2001 From: Araq Date: Sun, 1 Jun 2014 01:45:44 +0200 Subject: [PATCH 12/13] pi test compiles, but crashes randomly --- compiler/lowerings.nim | 18 ++++++++++++++---- compiler/semdata.nim | 1 + compiler/semexprs.nim | 8 ++++++-- tests/parallel/tpi.nim | 22 ++++++++++++++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 tests/parallel/tpi.nim diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index b159502dc6..d370f21f05 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -197,18 +197,21 @@ proc createNimCreatePromiseCall(prom, threadParam: PNode): PNode = result = newFastAsgnStmt(prom, castExpr) proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; - varSection, call, barrier, prom: PNode): PSym = + varSection, call, barrier, prom: PNode; + spawnKind: TSpawnResult): PSym = var body = newNodeI(nkStmtList, f.info) if barrier != nil: body.add callCodeGenProc("barrierEnter", barrier) body.add varSection - if prom != nil: + if prom != nil and spawnKind != srByVar: body.add createNimCreatePromiseCall(prom, threadParam.newSymNode) if barrier == nil: body.add callCodeGenProc("nimPromiseCreateCondVar", prom) body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) - if prom != nil: + if spawnKind == srByVar: + body.add newAsgnStmt(genDeref(prom), call) + elif prom != nil: let fk = prom.typ.sons[1].promiseKind if fk == promInvalid: localError(f.info, "cannot create a promise of type: " & @@ -471,9 +474,16 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType; objType.addField(field) promField = newDotExpr(scratchObj, field) promAsExpr = indirectAccess(castExpr, field, n.info) + elif spawnKind == srByVar: + var field = newSym(skField, getIdent"prom", owner, n.info) + field.typ = newType(tyPtr, objType.owner) + field.typ.rawAddSon(retType) + objType.addField(field) + promAsExpr = indirectAccess(castExpr, field, n.info) + result.add newFastAsgnStmt(newDotExpr(scratchObj, field), genAddrOf(dest)) let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call, - barrierAsExpr, promAsExpr) + barrierAsExpr, promAsExpr, spawnKind) result.add callCodeGenProc("nimSpawn", wrapper.newSymNode, genAddrOf(scratchObj.newSymNode)) diff --git a/compiler/semdata.nim b/compiler/semdata.nim index 987a70a419..19181d98e0 100644 --- a/compiler/semdata.nim +++ b/compiler/semdata.nim @@ -91,6 +91,7 @@ type generics*: seq[TInstantiationPair] # pending list of instantiated generics to compile lastGenericIdx*: int # used for the generics stack hloLoopDetector*: int # used to prevent endless loops in the HLO + inParallelStmt*: int proc makeInstPair*(s: PSym, inst: PInstantiation): TInstantiationPair = result.genericSym = s diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 8f4cce547a..e507e711f3 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1615,13 +1615,17 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode = result = setMs(n, s) var x = n.lastSon if x.kind == nkDo: x = x.sons[bodyPos] + inc c.inParallelStmt result.sons[1] = semStmt(c, x) + dec c.inParallelStmt of mSpawn: result = setMs(n, s) result.sons[1] = semExpr(c, n.sons[1]) - # later passes may transform the type 'Promise[T]' back into 'T' if not result[1].typ.isEmptyType: - result.typ = createPromise(c, result[1].typ, n.info) + if c.inParallelStmt > 0: + result.typ = result[1].typ + else: + result.typ = createPromise(c, result[1].typ, n.info) else: result = semDirectOp(c, n, flags) proc semWhen(c: PContext, n: PNode, semCheck = true): PNode = diff --git a/tests/parallel/tpi.nim b/tests/parallel/tpi.nim new file mode 100644 index 0000000000..de5aa9a514 --- /dev/null +++ b/tests/parallel/tpi.nim @@ -0,0 +1,22 @@ + +import strutils, math, threadpool + +proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1) + +proc piU(n: int): float = + var ch = newSeq[Promise[float]](n+1) + for k in 0..n: + ch[k] = spawn term(float(k)) + for k in 0..n: + result += ^ch[k] + +proc piS(n: int): float = + var ch = newSeq[float](n+1) + parallel: + for k in 0..ch.high: + ch[k] = spawn term(float(k)) + for k in 0..ch.high: + result += ch[k] + +echo formatFloat(piU(5000)) +echo formatFloat(piS(5000)) From e6d12f3f6ee933f295dd83a64f5f0e6eba77e1d1 Mon Sep 17 00:00:00 2001 From: Araq Date: Sun, 1 Jun 2014 15:02:13 +0200 Subject: [PATCH 13/13] fixed codegen for return values --- compiler/lowerings.nim | 27 +++++++++++++++------------ tests/parallel/tpi.nim | 4 ++++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/compiler/lowerings.nim b/compiler/lowerings.nim index d370f21f05..df2816a0e0 100644 --- a/compiler/lowerings.nim +++ b/compiler/lowerings.nim @@ -155,6 +155,17 @@ proc promiseKind(t: PType): TPromiseKind = elif containsGarbageCollectedRef(t): promInvalid else: promBlob +proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym = + result = newSym(skTemp, getIdent(genPrefix), owner, varSection.info) + result.typ = typ + incl(result.flags, sfFromGeneric) + + var vpart = newNodeI(nkIdentDefs, varSection.info, 3) + vpart.sons[0] = newSymNode(result) + vpart.sons[1] = ast.emptyNode + vpart.sons[2] = v + varSection.add vpart + discard """ We generate roughly this: @@ -202,6 +213,9 @@ proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; var body = newNodeI(nkStmtList, f.info) if barrier != nil: body.add callCodeGenProc("barrierEnter", barrier) + var threadLocalProm: PSym + if spawnKind == srByVar: + threadLocalProm = addLocalVar(varSection, argsParam.owner, prom.typ, prom) body.add varSection if prom != nil and spawnKind != srByVar: body.add createNimCreatePromiseCall(prom, threadParam.newSymNode) @@ -210,7 +224,7 @@ proc createWrapperProc(f: PNode; threadParam, argsParam: PSym; body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode) if spawnKind == srByVar: - body.add newAsgnStmt(genDeref(prom), call) + body.add newAsgnStmt(genDeref(threadLocalProm.newSymNode), call) elif prom != nil: let fk = prom.typ.sons[1].promiseKind if fk == promInvalid: @@ -251,17 +265,6 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode = result.typ = newType(tyPtr, objType.owner) result.typ.rawAddSon(objType) -proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym = - result = newSym(skTemp, getIdent(genPrefix), owner, varSection.info) - result.typ = typ - incl(result.flags, sfFromGeneric) - - var vpart = newNodeI(nkIdentDefs, varSection.info, 3) - vpart.sons[0] = newSymNode(result) - vpart.sons[1] = ast.emptyNode - vpart.sons[2] = v - varSection.add vpart - proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym, castExpr, call, varSection, result: PNode) = let formals = n[0].typ.n diff --git a/tests/parallel/tpi.nim b/tests/parallel/tpi.nim index de5aa9a514..1ef5c6aea0 100644 --- a/tests/parallel/tpi.nim +++ b/tests/parallel/tpi.nim @@ -1,3 +1,7 @@ +discard """ + output: '''3.141792613595791 +3.141792613595791''' +""" import strutils, math, threadpool