mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-19 14:00:35 +00:00
initial non-compiling version of 'parallel'
This commit is contained in:
@@ -9,7 +9,8 @@
|
||||
|
||||
## This module implements the 'implies' relation for guards.
|
||||
|
||||
import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents
|
||||
import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents,
|
||||
saturate
|
||||
|
||||
const
|
||||
someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc,
|
||||
@@ -25,6 +26,17 @@ const
|
||||
|
||||
someIn = {mInRange, mInSet}
|
||||
|
||||
someHigh = {mHigh}
|
||||
# we don't list unsigned here because wrap around semantics suck for
|
||||
# proving anything:
|
||||
someAdd = {mAddI, mAddI64, mAddF64, mSucc}
|
||||
someSub = {mSubI, mSubI64, mSubF64, mPred}
|
||||
someMul = {mMulI, mMulI64, mMulF64}
|
||||
someDiv = {mDivI, mDivI64, mDivF64}
|
||||
someMod = {mModI, mModI64}
|
||||
someMax = {mMaxI, mMaxI64, mMaxF64}
|
||||
someMin = {mMinI, mMinI64, mMinF64}
|
||||
|
||||
proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit}
|
||||
proc isLocation(n: PNode): bool = not n.isValue
|
||||
|
||||
@@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool =
|
||||
|
||||
proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true)
|
||||
|
||||
proc getMagicOp(name: string, m: TMagic): PSym =
|
||||
proc createMagic*(name: string, m: TMagic): PSym =
|
||||
result = newSym(skProc, getIdent(name), nil, unknownLineInfo())
|
||||
result.magic = m
|
||||
|
||||
let
|
||||
opLe = getMagicOp("<=", mLeI)
|
||||
opLt = getMagicOp("<", mLtI)
|
||||
opAnd = getMagicOp("and", mAnd)
|
||||
opOr = getMagicOp("or", mOr)
|
||||
opNot = getMagicOp("not", mNot)
|
||||
opIsNil = getMagicOp("isnil", mIsNil)
|
||||
opContains = getMagicOp("contains", mInSet)
|
||||
opEq = getMagicOp("==", mEqI)
|
||||
opLe = createMagic("<=", mLeI)
|
||||
opLt = createMagic("<", mLtI)
|
||||
opAnd = createMagic("and", mAnd)
|
||||
opOr = createMagic("or", mOr)
|
||||
opNot = createMagic("not", mNot)
|
||||
opIsNil = createMagic("isnil", mIsNil)
|
||||
opContains = createMagic("contains", mInSet)
|
||||
opEq = createMagic("==", mEqI)
|
||||
opAdd = createMagic("+", mAddI)
|
||||
opSub = createMagic("-", mSubI)
|
||||
opMul = createMagic("*", mMulI)
|
||||
opDiv = createMagic("div", mDivI)
|
||||
opLen = createMagic("len", mLengthSeq)
|
||||
|
||||
proc swapArgs(fact: PNode, newOp: PSym): PNode =
|
||||
result = newNodeI(nkCall, fact.info, 3)
|
||||
@@ -137,17 +154,118 @@ proc neg(n: PNode): PNode =
|
||||
result.sons[0] = newSymNode(opNot)
|
||||
result.sons[1] = n
|
||||
|
||||
proc buildIsNil(arg: PNode): PNode =
|
||||
result = newNodeI(nkCall, arg.info, 2)
|
||||
result.sons[0] = newSymNode(opIsNil)
|
||||
result.sons[1] = arg
|
||||
proc buildCall(op: PSym; a: PNode): PNode =
|
||||
result = newNodeI(nkCall, a.info, 2)
|
||||
result.sons[0] = newSymNode(op)
|
||||
result.sons[1] = a
|
||||
|
||||
proc buildCall(op: PSym; a, b: PNode): PNode =
|
||||
result = newNodeI(nkCall, a.info, 3)
|
||||
result.sons[0] = newSymNode(op)
|
||||
result.sons[1] = a
|
||||
result.sons[2] = b
|
||||
|
||||
proc `+@`*(a: PNode; b: BiggestInt): PNode =
|
||||
opAdd.buildCall(a, nkIntLit.newIntNode(b))
|
||||
|
||||
proc `|+|`(a, b: PNode): PNode =
|
||||
result = copyNode(a)
|
||||
if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal
|
||||
else: result.floatVal = a.floatVal + b.floatVal
|
||||
|
||||
proc `|*|`(a, b: PNode): PNode =
|
||||
result = copyNode(a)
|
||||
if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal
|
||||
else: result.floatVal = a.floatVal * b.floatVal
|
||||
|
||||
proc zero(): PNode = nkIntLit.newIntNode(0)
|
||||
proc one(): PNode = nkIntLit.newIntNode(1)
|
||||
proc minusOne(): PNode = nkIntLit.newIntNode(-1)
|
||||
|
||||
proc lowBound*(x: PNode): PNode = nkIntLit.newIntNode(firstOrd(x.typ))
|
||||
proc highBound*(x: PNode): PNode =
|
||||
if x.typ.skipTypes(abstractInst).kind == tyArray:
|
||||
nkIntLit.newIntNode(lastOrd(x.typ))
|
||||
else:
|
||||
opAdd.buildCall(opLen.buildCall(x), minusOne())
|
||||
|
||||
proc canon*(n: PNode): PNode =
|
||||
# XXX for now only the new code in 'semparallel' uses this
|
||||
if n.safeLen >= 1:
|
||||
result = newNodeI(n.kind, n.info, n.len)
|
||||
for i in 0 .. < n.safeLen:
|
||||
result.sons[i] = canon(n.sons[i])
|
||||
else:
|
||||
result = n
|
||||
case result.getMagic
|
||||
of someEq, someAdd, someMul, someMin, someMax:
|
||||
# these are symmetric; put value as last:
|
||||
if result.sons[1].isValue and not result.sons[2].isValue:
|
||||
result = swapArgs(result, result.sons[0].sym)
|
||||
# (4 + foo) + 2 --> (foo + 4) + 2
|
||||
of someHigh:
|
||||
# high == len+(-1)
|
||||
result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne())
|
||||
of mUnaryMinusI, mUnaryMinusI64:
|
||||
result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1))
|
||||
of someSub:
|
||||
# x - 4 --> x + (-4)
|
||||
var b = result[2]
|
||||
if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt):
|
||||
b = copyNode(b)
|
||||
b.intVal = -b.intVal
|
||||
result = buildCall(opAdd, result[1], b)
|
||||
elif b.kind in {nkFloatLit..nkFloat64Lit}:
|
||||
b = copyNode(b)
|
||||
b.floatVal = -b.floatVal
|
||||
result = buildCall(opAdd, result[1], b)
|
||||
of someLen:
|
||||
result.sons[0] = opLen.newSymNode
|
||||
else: discard
|
||||
|
||||
# re-association:
|
||||
# (foo+5)+5 --> foo+10; same for '*'
|
||||
case result.getMagic
|
||||
of someAdd:
|
||||
if result[2].isValue and
|
||||
result[1].getMagic in someAdd and result[1][2].isValue:
|
||||
result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2])
|
||||
of someMul:
|
||||
if result[2].isValue and
|
||||
result[1].getMagic in someMul and result[1][2].isValue:
|
||||
result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2])
|
||||
else: discard
|
||||
|
||||
# most important rule: (x-4) < a.len --> x < a.len+4
|
||||
case result.getMagic
|
||||
of someLe, someLt:
|
||||
let x = result[1]
|
||||
let y = result[2]
|
||||
if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and
|
||||
isLetLocation(x[1], true):
|
||||
case x.getMagic
|
||||
of someSub:
|
||||
result = buildCall(result[0].sym, x[1], opAdd.buildCall(y, x[2]))
|
||||
of someAdd:
|
||||
result = buildCall(result[0].sym, x[1], opSub.buildCall(y, x[2]))
|
||||
else: discard
|
||||
elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and
|
||||
isLetLocation(y[1], true):
|
||||
# a.len < x-3
|
||||
case y.getMagic
|
||||
of someSub:
|
||||
result = buildCall(result[0].sym, y[1], opAdd.buildCall(x, y[2]))
|
||||
of someAdd:
|
||||
result = buildCall(result[0].sym, y[1], opSub.buildCall(x, y[2]))
|
||||
else: discard
|
||||
else: discard
|
||||
|
||||
proc usefulFact(n: PNode): PNode =
|
||||
case n.getMagic
|
||||
of someEq:
|
||||
if skipConv(n.sons[2]).kind == nkNilLit and (
|
||||
isLetLocation(n.sons[1], false) or isVar(n.sons[1])):
|
||||
result = buildIsNil(n.sons[1])
|
||||
result = opIsNil.buildCall(n.sons[1])
|
||||
else:
|
||||
if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true):
|
||||
# XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1'
|
||||
@@ -217,7 +335,7 @@ proc addFactNeg*(m: var TModel, n: PNode) =
|
||||
let n = n.neg
|
||||
if n != nil: addFact(m, n)
|
||||
|
||||
proc sameTree(a, b: PNode): bool =
|
||||
proc sameTree*(a, b: PNode): bool =
|
||||
result = false
|
||||
if a == b:
|
||||
result = true
|
||||
@@ -519,7 +637,46 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication =
|
||||
if result != impUnknown: return
|
||||
|
||||
proc impliesNotNil*(facts: TModel, arg: PNode): TImplication =
|
||||
result = doesImply(facts, buildIsNil(arg).neg)
|
||||
result = doesImply(facts, opIsNil.buildCall(arg).neg)
|
||||
|
||||
proc proveLe*(m: TModel; a, b: PNode): TImplication =
|
||||
let res = canon(opLe.buildCall(a, b))
|
||||
# we hardcode lots of axioms here:
|
||||
let a = res[1]
|
||||
let b = res[2]
|
||||
# 0 <= 3
|
||||
if a.isValue and b.isValue:
|
||||
return if leValue(a, b): impYes else: impNo
|
||||
|
||||
# use type information too: x <= 4 iff high(x) <= 4
|
||||
if b.isValue and a.typ != nil and a.typ.isOrdinalType:
|
||||
if lastOrd(a.typ) <= b.intVal: return impYes
|
||||
# 3 <= x iff low(x) <= 3
|
||||
if a.isValue and b.typ != nil and b.typ.isOrdinalType:
|
||||
if firstOrd(b.typ) <= a.intVal: return impYes
|
||||
|
||||
# x <= x
|
||||
if sameTree(a, b): return impYes
|
||||
|
||||
# x <= x+c iff 0 <= c
|
||||
if b.getMagic in someAdd and sameTree(a, b[1]):
|
||||
return proveLe(m, zero(), b[2])
|
||||
|
||||
# x <= x*c if 1 <= c and 0 <= x:
|
||||
if b.getMagic in someMul and sameTree(a, b[1]):
|
||||
if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes:
|
||||
return impYes
|
||||
|
||||
# x div c <= x if 1 <= c and 0 <= x:
|
||||
if a.getMagic in someDiv and sameTree(a[1], b):
|
||||
if proveLe(m, one(), a[2]) == impYes and proveLe(m, zero(), b) == impYes:
|
||||
return impYes
|
||||
|
||||
# use the knowledge base:
|
||||
return doesImply(m, res)
|
||||
|
||||
proc addFactLe*(m: var TModel; a, b: PNode) =
|
||||
m.add canon(opLe.buildCall(a, b))
|
||||
|
||||
proc settype(n: PNode): PType =
|
||||
result = newType(tySet, n.typ.owner)
|
||||
|
||||
@@ -114,11 +114,15 @@ proc callCodegenProc*(name: string, arg1: PNode;
|
||||
if arg3 != nil: result.add arg3
|
||||
|
||||
proc createWrapperProc(f: PNode; threadParam, argsParam: PSym;
|
||||
varSection, call: PNode): PSym =
|
||||
varSection, call, barrier: PNode): PSym =
|
||||
var body = newNodeI(nkStmtList, f.info)
|
||||
body.add varSection
|
||||
if barrier != nil:
|
||||
body.add callCodeGenProc("barrierEnter", barrier)
|
||||
body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam))
|
||||
body.add call
|
||||
if barrier != nil:
|
||||
body.add callCodeGenProc("barrierLeave", barrier)
|
||||
|
||||
var params = newNodeI(nkFormalParams, f.info)
|
||||
params.add emptyNode
|
||||
@@ -146,7 +150,7 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode =
|
||||
result.typ = newType(tyPtr, objType.owner)
|
||||
result.typ.rawAddSon(objType)
|
||||
|
||||
proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
|
||||
proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode =
|
||||
result = newNodeI(nkStmtList, n.info)
|
||||
if n.kind notin nkCallKinds or not n.typ.isEmptyType:
|
||||
localError(n.info, "'spawn' takes a call expression of type void")
|
||||
@@ -162,6 +166,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
|
||||
threadParam.typ = ptrType
|
||||
argsParam.typ = ptrType
|
||||
argsParam.position = 1
|
||||
|
||||
var objType = createObj(owner, n.info)
|
||||
incl(objType.flags, tfFinal)
|
||||
let castExpr = createCastExpr(argsParam, objType)
|
||||
@@ -223,6 +228,17 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
|
||||
|
||||
call.add(newSymNode(temp))
|
||||
|
||||
let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call)
|
||||
var barrierAsExpr: PNode = nil
|
||||
if barrier != nil:
|
||||
let typ = newType(tyPtr, owner)
|
||||
typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ)
|
||||
var field = newSym(skField, getIdent"barrier", owner, n.info)
|
||||
field.typ = typ
|
||||
objType.addField(field)
|
||||
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier)
|
||||
barrierAsExpr = indirectAccess(castExpr, field, n.info)
|
||||
|
||||
let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call,
|
||||
barrierAsExpr)
|
||||
result.add callCodeGenProc("nimSpawn", wrapper.newSymNode,
|
||||
genAddrOf(scratchObj.newSymNode))
|
||||
|
||||
414
compiler/semparallel.nim
Normal file
414
compiler/semparallel.nim
Normal file
@@ -0,0 +1,414 @@
|
||||
#
|
||||
#
|
||||
# The Nimrod Compiler
|
||||
# (c) Copyright 2014 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## Semantic checking for 'parallel'.
|
||||
|
||||
# - slices should become "nocopy" to openArray (+)
|
||||
# - need to perform bound checks (+)
|
||||
#
|
||||
# - parallel needs to insert a barrier (+)
|
||||
# - passed arguments need to be ensured to be "const"
|
||||
# - what about 'f(a)'? --> f shouldn't have side effects anyway
|
||||
# - passed arrays need to be ensured not to alias
|
||||
# - passed slices need to be ensured to be disjoint (+)
|
||||
# - output slices need special logic
|
||||
|
||||
import lowerings, guards, sempass2
|
||||
|
||||
discard """
|
||||
|
||||
one major problem:
|
||||
spawn f(a[i])
|
||||
inc i
|
||||
spawn f(a[i])
|
||||
is valid, but
|
||||
spawn f(a[i])
|
||||
spawn f(a[i])
|
||||
inc i
|
||||
is not! However,
|
||||
spawn f(a[i])
|
||||
if guard: inc i
|
||||
spawn f(a[i])
|
||||
is not valid either! --> We need a flow dependent analysis here.
|
||||
|
||||
However:
|
||||
while foo:
|
||||
spawn f(a[i])
|
||||
inc i
|
||||
spawn f(a[i])
|
||||
|
||||
Is not valid either! --> We should really restrict 'inc' to loop endings?
|
||||
|
||||
The heuristic that we implement here (that has no false positives) is: Usage
|
||||
of 'i' in a slice *after* we determined the stride is invalid!
|
||||
"""
|
||||
|
||||
type
|
||||
TDirection = enum
|
||||
ascending, descending
|
||||
MonotonicVar = object
|
||||
v: PSym
|
||||
lower, upper, stride: PNode
|
||||
dir: TDirection
|
||||
blacklisted: bool # blacklisted variables that are not monotonic
|
||||
AnalysisCtx = object
|
||||
locals: seq[MonotonicVar]
|
||||
slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]]
|
||||
guards: TModel # nested guards
|
||||
args: seq[PSym] # args must be deeply immutable
|
||||
spawns: int # we can check that at last 1 spawn is used in
|
||||
# the 'parallel' section
|
||||
currentSpawnId: int
|
||||
inLoop: int
|
||||
|
||||
let opSlice = createMagic("slice", mSlice)
|
||||
|
||||
proc initAnalysisCtx(): AnalysisCtx =
|
||||
result.locals = @[]
|
||||
result.slices = @[]
|
||||
result.args = @[]
|
||||
result.guards = @[]
|
||||
|
||||
proc getSlot(c: var AnalysisCtx; s: PSym): ptr MonotonicVar =
|
||||
var L = c.locals.len
|
||||
for i in 0.. <L:
|
||||
if c.locals[i].v == s: return addr(c.locals[i])
|
||||
c.locals.setLen(L+1)
|
||||
c.locals[L].v = s
|
||||
return addr(c.locals[L])
|
||||
|
||||
proc getRoot(n: PNode): PSym =
|
||||
## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression
|
||||
## like ``obj.x[i].y``. The *root* of a path is the symbol that can be
|
||||
## determined as the owner; ``obj`` in the example.
|
||||
case n.kind
|
||||
of nkSym:
|
||||
if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}:
|
||||
result = n.sym
|
||||
of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr,
|
||||
nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr:
|
||||
result = getRoot(n.sons[0])
|
||||
of nkHiddenStdConv, nkHiddenSubConv, nkConv:
|
||||
result = getRoot(n.sons[1])
|
||||
of nkCallKinds:
|
||||
if getMagic(n) == mSlice: result = getRoot(n.sons[1])
|
||||
else: discard
|
||||
|
||||
proc gatherArgs(c: var AnalysisCtx; n: PNode) =
|
||||
for i in 0.. <n.safeLen:
|
||||
let root = getRoot n[i]
|
||||
if root != nil:
|
||||
block addRoot:
|
||||
for r in items(c.args):
|
||||
if r == root: break addRoot
|
||||
c.args.add root
|
||||
gatherArgs(c, n[i])
|
||||
|
||||
proc isLocal(s: PSym): bool =
|
||||
s.kind in {skResult, skTemp, skForVar, skVar, skLet} and
|
||||
{sfAddrTaken, sfGlobal} * s.flags == {}
|
||||
|
||||
proc checkLocal(c: var AnalysisCtx; n: PNode) =
|
||||
if n.kind == nkSym and isLocal(n.sym):
|
||||
let slot = c.getSlot(n[1].sym)
|
||||
if slot.stride != nil:
|
||||
localError(n.info, "invalid usage of counter after increment")
|
||||
else:
|
||||
for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i])
|
||||
|
||||
proc checkLe(c: AnalysisCtx; a, b: PNode) =
|
||||
case proveLe(c.guards, a, b)
|
||||
of impUnkown:
|
||||
localError(n.info, "cannot prove: " & a.renderTree & " <= " & b.renderTree)
|
||||
of impYes: discard
|
||||
of impNo:
|
||||
localError(n.info, "can prove: " & a.renderTree & " > " & b.renderTree)
|
||||
|
||||
proc checkBounds(c: AnalysisCtx; arr, idx: PNode) =
|
||||
checkLe(c, arr.lowBound, idx)
|
||||
checkLe(c, idx, arr.highBound)
|
||||
|
||||
proc addLowerBoundAsFacts(c: var AnalysisCtx) =
|
||||
for v in c.locals:
|
||||
if not v.blacklisted:
|
||||
c.guards.addFactLe(v.lower, newSymNode(v.v))
|
||||
|
||||
proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) =
|
||||
checkLocal(c, n)
|
||||
let le = n.sons[le]
|
||||
let ri = n.sons[ri]
|
||||
let x = n.sons[x]
|
||||
# perform static bounds checking here; and not later!
|
||||
let oldState = c.guards.len
|
||||
addLowerBoundAsFacts(c)
|
||||
c.checkBounds(x, le)
|
||||
c.checkBounds(x, ri)
|
||||
c.guards.setLen(oldState)
|
||||
c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0))
|
||||
|
||||
template `?`(x): expr = x.renderTree
|
||||
|
||||
proc overlap(m: TModel; x,y,c,d: PNode) =
|
||||
# X..Y and C..D overlap iff (X <= D and Y >= C)
|
||||
case proveLe(m, x, d)
|
||||
of impUnkown:
|
||||
localError(x.info,
|
||||
"cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" %
|
||||
[?x, ?d, ?x, ?y, ?c, ?d])
|
||||
of impYes:
|
||||
case proveLe(m, y, c)
|
||||
of impUnknown:
|
||||
localError(x.info,
|
||||
"cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" %
|
||||
[?y, ?d, ?x, ?y, ?c, ?d])
|
||||
of impYes:
|
||||
localError(x.info, "$#..$# not disjoint from $#..$#" % [?x, ?y, ?c, ?d])
|
||||
of impNo: discard
|
||||
of impNo: discard
|
||||
|
||||
proc stride(c: AnalysisCtx; n: PNode): BiggestInt =
|
||||
# note: 0 if it cannot be determined is just right because then
|
||||
# we analyse 'i..i' and 'i+0 .. i+0' and these are not disjoint!
|
||||
if n.kind == nkSym and isLocal(n.sym):
|
||||
let slot = c.getSlot(n[1].sym)
|
||||
if slot.stride != nil:
|
||||
result = slot.stride.intVal
|
||||
else:
|
||||
for i in 0 .. <n.safeLen: inc(result, stride(c, n.sons[i]))
|
||||
|
||||
proc checkSlicesAreDisjoint(c: var AnalysisCtx) =
|
||||
# this is the only thing that we need to perform after we have traversed
|
||||
# the whole tree so that the strides are available.
|
||||
# First we need to add all the computed lower bounds:
|
||||
addLowerBoundAsFacts(c)
|
||||
# Every slice used in a loop needs to be disjoint with itself:
|
||||
for x,a,b,id,inLoop in items(c.slices):
|
||||
if inLoop: overlap(c.guards, a,b, a+@c.stride(a), b+@c.stride(b))
|
||||
# Another tricky example is:
|
||||
# while true:
|
||||
# spawn f(a[i])
|
||||
# spawn f(a[i+1])
|
||||
# inc i # inc i, 2 would be correct here
|
||||
#
|
||||
# Or even worse:
|
||||
# while true:
|
||||
# spawn f(a[i+1 .. i+3])
|
||||
# spawn f(a[i+4 .. i+5])
|
||||
# inc i, 4
|
||||
# Prove that i*k*stride + 3 != i*k'*stride + 5
|
||||
# For the correct example this amounts to
|
||||
# i*k*2 != i*k'*2 + 1
|
||||
# which is true.
|
||||
# For now, we don't try to prove things like that at all, even though it'd
|
||||
# be feasible for many useful examples. Instead we attach the slice to
|
||||
# a spawn and if the attached spawns differ, we bail out:
|
||||
for i in 0 .. high(c.slices):
|
||||
for j in 0 .. high(c.slices):
|
||||
let x = c.slices[i]
|
||||
let y = c.slices[j]
|
||||
if i != j and x.spawnId != y.spawnId and guards.sameTree(x.x, y.x):
|
||||
if not x.inLoop and not y.inLoop:
|
||||
overlap(c.guards, x.a, x.b, y.a, y.b)
|
||||
else:
|
||||
# ah I cannot resists the temptation and add another sweet heuristic:
|
||||
# if both slices have the form (i+c)..(i+c) and (i+d)..(i+d) we
|
||||
# check they are disjoint and c <= stride and d <= stride:
|
||||
# XXX
|
||||
localError(x.x.info, "cannot prove $#..$# disjoint from $#..$#" %
|
||||
[?x.a, ?x.b, ?y.a, ?y.b])
|
||||
|
||||
proc analyse(c: var AnalysisCtx; n: PNode)
|
||||
|
||||
proc analyseSons(c: var AnalysisCtx; n: PNode) =
|
||||
for i in 0 .. <safeLen(n): analyse(c, n[i])
|
||||
|
||||
proc min(a, b: PNode): PNode =
|
||||
if a.isNil: result = b
|
||||
elif a.intVal < b.intVal: result = a
|
||||
else: result = b
|
||||
|
||||
proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) =
|
||||
if op.magic == mSpawn:
|
||||
inc c.spawns
|
||||
let oldSpawnId = c.currentSpawnId
|
||||
c.currentSpawnId = c.spawns
|
||||
gatherArgs(c, n[1])
|
||||
analyseSons(c, n)
|
||||
c.currentSpawnId = oldSpawnId
|
||||
elif op.magic == mInc or (op.name.s == "+=" and sfSystemModule in op.owner.flags):
|
||||
if n[1].kind == nkSym and n[1].isLocal:
|
||||
let incr = n[1].skipConv
|
||||
if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0:
|
||||
let slot = c.getSlot(n[1].sym)
|
||||
slot.stride = min(slot.stride, incr)
|
||||
analyseSons(c, n)
|
||||
elif op.name.s == "[]" and sfSystemModule in op.owner.flags:
|
||||
c.addSlice(n, 1, 2, 3)
|
||||
analyseSons(c, n)
|
||||
elif op.name.s == "[]=" and sfSystemModule in op.owner.flags:
|
||||
c.addSlice(n, 1, 2, 3)
|
||||
analyseSons(c, n)
|
||||
else:
|
||||
analyseSons(c, n)
|
||||
|
||||
proc analyseCase(c: var AnalysisCtx; n: PNode) =
|
||||
analyse(c, n.sons[0])
|
||||
#let oldState = c.locals.len
|
||||
let oldFacts = c.guards.len
|
||||
for i in 1.. <n.len:
|
||||
let branch = n.sons[i]
|
||||
#setLen(c.locals, oldState)
|
||||
setLen(c.guards, oldFacts)
|
||||
addCaseBranchFacts(c.guards, n, i)
|
||||
for i in 0 .. <branch.len:
|
||||
analyse(c, branch.sons[i])
|
||||
#setLen(c.locals, oldState)
|
||||
setLen(c.guards, oldFacts)
|
||||
|
||||
proc analyseIf(c: var AnalysisCtx; n: PNode) =
|
||||
analyse(c, n.sons[0].sons[0])
|
||||
let oldFacts = c.guards.len
|
||||
addFact(c.guards, n.sons[0].sons[0])
|
||||
#let oldState = c.locals.len
|
||||
|
||||
analyse(c, n.sons[0].sons[1])
|
||||
for i in 1.. <n.len:
|
||||
let branch = n.sons[i]
|
||||
setLen(c.guards, oldFacts)
|
||||
for j in 0..i-1:
|
||||
addFactNeg(c.guards, n.sons[j].sons[0])
|
||||
if branch.len > 1:
|
||||
addFact(c.guards, branch.sons[0])
|
||||
#setLen(c.locals, oldState)
|
||||
for i in 0 .. <branch.len:
|
||||
analyse(c, branch.sons[i])
|
||||
#setLen(c.locals, oldState)
|
||||
setLen(c.guards, oldFacts)
|
||||
|
||||
proc analyse(c: var AnalysisCtx; n: PNode) =
|
||||
case n.kind
|
||||
of nkAsgn, nkFastAsgn:
|
||||
# since we already ensure sfAddrTaken is not in s.flags, we only need to
|
||||
# prevent direct assignments to the monotonic variable:
|
||||
if n[0].kind == nkSym and n[0].isLocal:
|
||||
let slot = c.getSlot(it[j].sym)
|
||||
slot.blackListed = true
|
||||
invalidateFacts(c.guards, n.sons[0])
|
||||
analyseSons(c, n)
|
||||
addAsgnFact(c.guards, n.sons[0], n.sons[1])
|
||||
of nkCallKinds:
|
||||
# direct call:
|
||||
if n[0].kind == nkSym: analyseCall(c, n, n[0].sym)
|
||||
else: analyseSons(c, n)
|
||||
of nkBracket:
|
||||
c.addSlice(n, 0, 1, 1)
|
||||
analyseSons(c, n)
|
||||
of nkReturnStmt, nkRaiseStmt, nkTryStmt:
|
||||
localError(n.info, "invalid control flow for 'parallel'")
|
||||
# 'break' that leaves the 'parallel' section is not valid either
|
||||
# or maybe we should generate a 'try' XXX
|
||||
of nkVarSection:
|
||||
for it in n:
|
||||
if it.sons[it.len-1].kind != nkEmpty:
|
||||
for j in 0 .. it.len-3:
|
||||
if it[j].kind == nkSym and it[j].isLocal:
|
||||
let slot = c.getSlot(it[j].sym)
|
||||
if slot.lower.isNil: slot.lower = it.sons[it.len-1]
|
||||
else: internalError(it.info, "slot already has a lower bound")
|
||||
analyseSons(c, n)
|
||||
|
||||
of nkCaseStmt: analyseCase(c, n)
|
||||
of nkIfStmt, nkIfExpr: analyseIf(c, n)
|
||||
of nkWhileStmt:
|
||||
analyse(c, n.sons[0])
|
||||
# 'while true' loop?
|
||||
inc c.inLoop
|
||||
if isTrue(n.sons[0]):
|
||||
analyseSons(c, n.sons[1])
|
||||
else:
|
||||
# loop may never execute:
|
||||
let oldState = c.locals.len
|
||||
let oldFacts = c.guards.len
|
||||
addFact(c.guards, n.sons[0])
|
||||
analyse(c, n.sons[1])
|
||||
setLen(c.locals, oldState)
|
||||
setLen(c.guards, oldFacts)
|
||||
# we know after the loop the negation holds:
|
||||
if not containsNode(n.sons[1], nkBreakStmt):
|
||||
addFactNeg(c.guards, n.sons[0])
|
||||
dec c.inLoop
|
||||
of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef,
|
||||
nkMacroDef, nkTemplateDef, nkConstSection, nkPragma:
|
||||
discard
|
||||
else:
|
||||
analyseSons(c, n)
|
||||
|
||||
proc transformSlices(n: PNode): PNode =
|
||||
if n.kind in nkCalls and n[0].kind == nkSym:
|
||||
let op = n[0].sym
|
||||
if op.name.s == "[]" and sfSystemModule in op.owner.flags:
|
||||
result = copyTree(n)
|
||||
result.sons[0] = opSlice
|
||||
return result
|
||||
if n.safeLen > 0:
|
||||
result = copyNode(n.kind, n.info, n.len)
|
||||
for i in 0 .. < n.len:
|
||||
result.sons[i] = transformSlices(n.sons[i])
|
||||
else:
|
||||
result = n
|
||||
|
||||
proc transformSpawn(owner: PSym; n, barrier: PNode): PNode =
|
||||
if n.kind in nkCalls:
|
||||
if n[0].kind == nkSym:
|
||||
let op = n[0].sym
|
||||
if op.magic == mSpawn:
|
||||
result = transformSlices(n)
|
||||
return wrapProcForSpawn(owner, result, barrier)
|
||||
elif n.safeLen > 0:
|
||||
result = copyNode(n.kind, n.info, n.len)
|
||||
for i in 0 .. < n.len:
|
||||
result.sons[i] = transformSpawn(owner, n.sons[i], barrier)
|
||||
else:
|
||||
result = n
|
||||
|
||||
proc liftParallel*(owner: PSym; n: PNode): PNode =
|
||||
# this needs to be called after the 'for' loop elimination
|
||||
|
||||
# first pass:
|
||||
# - detect monotonic local integer variables
|
||||
# - detect used slices
|
||||
# - detect used arguments
|
||||
|
||||
var a = initAnalysisCtx()
|
||||
let body = n.lastSon
|
||||
analyse(a, body)
|
||||
if a.spawns == 0:
|
||||
localError(n.info, "'parallel' section without 'spawn'")
|
||||
checkSlices(a)
|
||||
checkArgs(a, body)
|
||||
|
||||
var varSection = newNodeI(nkVarSection, n.info)
|
||||
var temp = newSym(skTemp, "barrier", owner, n.info)
|
||||
temp.typ = magicsys.getCompilerProc("Barrier").typ
|
||||
incl(temp.flags, sfFromGeneric)
|
||||
|
||||
var vpart = newNodeI(nkIdentDefs, n.info, 3)
|
||||
vpart.sons[0] = newSymNode(temp)
|
||||
vpart.sons[1] = ast.emptyNode
|
||||
vpart.sons[2] = indirectAccess(castExpr, field, n.info)
|
||||
varSection.add vpart
|
||||
|
||||
barrier = genAddrOf(vpart[0])
|
||||
|
||||
result = newNodeI(nkStmtList, n.info)
|
||||
generateAliasChecks(a, result)
|
||||
result.add varSection
|
||||
result.add callCodeGenProc("openBarrier", barrier)
|
||||
result.add transformSpawn(owner, body, barrier)
|
||||
result.add callCodeGenProc("closeBarrier", barrier)
|
||||
@@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) =
|
||||
if n.kind != nkSym: return
|
||||
let s = n.sym
|
||||
if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}:
|
||||
# 'x' is not nil, but that doesn't mean it's not nil children
|
||||
# 'x' is not nil, but that doesn't mean its "not nil" children
|
||||
# are initialized:
|
||||
initVar(a, n)
|
||||
|
||||
@@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) =
|
||||
else:
|
||||
track(tracked, n)
|
||||
|
||||
proc isTrue(n: PNode): bool =
|
||||
proc isTrue*(n: PNode): bool =
|
||||
n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
|
||||
n.kind == nkIntLit and n.intVal != 0
|
||||
|
||||
|
||||
@@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) =
|
||||
nfAllConst in x.node.flags:
|
||||
# XXX this is hacky; tests/txmlgen triggers it:
|
||||
x.node = newNode(nkStrLit)
|
||||
# debug x.node
|
||||
#assert x.node.kind in {nkStrLit..nkTripleStrLit}
|
||||
# It not only hackey, it is also wrong for tgentemplate. The primary
|
||||
# cause of bugs like these is that the VM does not properly distinguish
|
||||
# between variable defintions (var foo = e) and variable updates (foo = e).
|
||||
|
||||
template createStr(x) =
|
||||
x.node = newNode(nkStrLit)
|
||||
|
||||
@@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc"
|
||||
path="$lib/core"
|
||||
path="$lib/pure"
|
||||
path="$lib/pure/collections"
|
||||
path="$lib/pure/concurrency"
|
||||
path="$lib/impure"
|
||||
path="$lib/wrappers"
|
||||
# path="$lib/wrappers/cairo"
|
||||
|
||||
58
lib/pure/concurrency/cpuinfo.nim
Normal file
58
lib/pure/concurrency/cpuinfo.nim
Normal file
@@ -0,0 +1,58 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2014 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## This module implements procs to determine the number of CPUs / cores.
|
||||
|
||||
include "system/inclrtl"
|
||||
|
||||
import strutils, os
|
||||
|
||||
when not defined(windows):
|
||||
import posix
|
||||
|
||||
when defined(linux):
|
||||
import linux
|
||||
|
||||
when defined(macosx) or defined(bsd):
|
||||
const
|
||||
CTL_HW = 6
|
||||
HW_AVAILCPU = 25
|
||||
HW_NCPU = 3
|
||||
proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
|
||||
a: var csize, b: pointer, c: int): cint {.
|
||||
importc: "sysctl", header: "<sys/sysctl.h>".}
|
||||
|
||||
proc countProcessors*(): int {.rtl, extern: "ncpi$1".} =
|
||||
## returns the numer of the processors/cores the machine has.
|
||||
## Returns 0 if it cannot be detected.
|
||||
when defined(windows):
|
||||
var x = getEnv("NUMBER_OF_PROCESSORS")
|
||||
if x.len > 0: result = parseInt(x.string)
|
||||
elif defined(macosx) or defined(bsd):
|
||||
var
|
||||
mib: array[0..3, cint]
|
||||
numCPU: int
|
||||
len: csize
|
||||
mib[0] = CTL_HW
|
||||
mib[1] = HW_AVAILCPU
|
||||
len = sizeof(numCPU)
|
||||
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
|
||||
if numCPU < 1:
|
||||
mib[1] = HW_NCPU
|
||||
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
|
||||
result = numCPU
|
||||
elif defined(hpux):
|
||||
result = mpctl(MPC_GETNUMSPUS, nil, nil)
|
||||
elif defined(irix):
|
||||
var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
|
||||
result = sysconf(SC_NPROC_ONLN)
|
||||
else:
|
||||
result = sysconf(SC_NPROCESSORS_ONLN)
|
||||
if result <= 0: result = 1
|
||||
|
||||
96
lib/pure/concurrency/cpuload.nim
Normal file
96
lib/pure/concurrency/cpuload.nim
Normal file
@@ -0,0 +1,96 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2014 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## This module implements a helper for a thread pool to determine whether
|
||||
## creating a thread is a good idea.
|
||||
|
||||
when defined(windows):
|
||||
import winlean, os, strutils, math
|
||||
|
||||
proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime
|
||||
elif defined(linux):
|
||||
from cpuinfo import countProcessors
|
||||
|
||||
type
|
||||
ThreadPoolAdvice* = enum
|
||||
doNothing,
|
||||
doCreateThread, # create additional thread for throughput
|
||||
doShutdownThread # too many threads are busy, shutdown one
|
||||
|
||||
ThreadPoolState* = object
|
||||
when defined(windows):
|
||||
prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME
|
||||
calls*: int
|
||||
|
||||
proc advice*(s: var ThreadPoolState): ThreadPoolAdvice =
|
||||
when defined(windows):
|
||||
var
|
||||
sysIdle, sysKernel, sysUser,
|
||||
procCreation, procExit, procKernel, procUser: TFILETIME
|
||||
if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or
|
||||
getProcessTimes(THandle(-1), procCreation, procExit,
|
||||
procKernel, procUser) == 0:
|
||||
return doNothing
|
||||
if s.calls > 0:
|
||||
let
|
||||
sysKernelDiff = sysKernel - s.prevSysKernel
|
||||
sysUserDiff = sysUser - s.prevSysUser
|
||||
|
||||
procKernelDiff = procKernel - s.prevProcKernel
|
||||
procUserDiff = procUser - s.prevProcUser
|
||||
|
||||
sysTotal = int(sysKernelDiff + sysUserDiff)
|
||||
procTotal = int(procKernelDiff + procUserDiff)
|
||||
# total CPU usage < 85% --> create a new worker thread.
|
||||
# Measurements show that 100% and often even 90% is not reached even
|
||||
# if all my cores are busy.
|
||||
if sysTotal == 0 or procTotal / sysTotal < 0.85:
|
||||
result = doCreateThread
|
||||
s.prevSysKernel = sysKernel
|
||||
s.prevSysUser = sysUser
|
||||
s.prevProcKernel = procKernel
|
||||
s.prevProcUser = procUser
|
||||
elif defined(linux):
|
||||
proc fscanf(c: TFile, frmt: cstring) {.varargs, importc,
|
||||
header: "<stdio.h>".}
|
||||
|
||||
var f = open("/proc/loadavg")
|
||||
var b: float
|
||||
var busy, total: int
|
||||
fscanf(f,"%lf %lf %lf %ld/%ld",
|
||||
addr b, addr b, addr b, addr busy, addr total)
|
||||
f.close()
|
||||
let cpus = countProcessors()
|
||||
if busy-1 < cpus:
|
||||
result = doCreateThread
|
||||
elif busy-1 >= cpus*2:
|
||||
result = doShutdownThread
|
||||
else:
|
||||
result = doNothing
|
||||
else:
|
||||
# XXX implement this for other OSes
|
||||
result = doNothing
|
||||
inc s.calls
|
||||
|
||||
when isMainModule:
|
||||
proc busyLoop() =
|
||||
while true:
|
||||
discard random(80)
|
||||
os.sleep(100)
|
||||
|
||||
spawn busyLoop()
|
||||
spawn busyLoop()
|
||||
spawn busyLoop()
|
||||
spawn busyLoop()
|
||||
|
||||
var s: ThreadPoolState
|
||||
|
||||
for i in 1 .. 70:
|
||||
echo advice(s)
|
||||
os.sleep(1000)
|
||||
210
lib/pure/concurrency/threadpool.nim
Normal file
210
lib/pure/concurrency/threadpool.nim
Normal file
@@ -0,0 +1,210 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2014 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## Implements Nimrod's 'spawn'.
|
||||
|
||||
import cpuinfo, cpuload, locks
|
||||
|
||||
{.push stackTrace:off.}
|
||||
|
||||
type
|
||||
CondVar = object
|
||||
c: TCond
|
||||
L: TLock
|
||||
counter: int
|
||||
|
||||
proc createCondVar(): CondVar =
|
||||
initCond(result.c)
|
||||
initLock(result.L)
|
||||
|
||||
proc destroyCondVar(cv: var CondVar) {.inline.} =
|
||||
deinitCond(cv.c)
|
||||
deinitLock(cv.L)
|
||||
|
||||
proc await(cv: var CondVar) =
|
||||
acquire(cv.L)
|
||||
while cv.counter <= 0:
|
||||
wait(cv.c, cv.L)
|
||||
dec cv.counter
|
||||
release(cv.L)
|
||||
|
||||
proc signal(cv: var CondVar) =
|
||||
acquire(cv.L)
|
||||
inc cv.counter
|
||||
release(cv.L)
|
||||
signal(cv.c)
|
||||
|
||||
type
|
||||
Barrier* {.compilerProc.} = object
|
||||
counter: int
|
||||
cv: CondVar
|
||||
|
||||
proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
|
||||
atomicInc b.counter
|
||||
|
||||
proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
|
||||
atomicDec b.counter
|
||||
if b.counter <= 0: signal(b.cv)
|
||||
|
||||
proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
|
||||
b.counter = 0
|
||||
b.cv = createCondVar()
|
||||
|
||||
proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
|
||||
await(b.cv)
|
||||
destroyCondVar(b.cv)
|
||||
|
||||
{.pop.}
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
type
|
||||
WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.}
|
||||
Worker = object
|
||||
taskArrived: CondVar
|
||||
taskStarted: CondVar #\
|
||||
# task data:
|
||||
f: WorkerProc
|
||||
data: pointer
|
||||
ready: bool # put it here for correct alignment!
|
||||
initialized: bool # whether it has even been initialized
|
||||
|
||||
proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
|
||||
let w = cast[ptr Worker](p)
|
||||
signal(w.taskStarted)
|
||||
|
||||
var
|
||||
gSomeReady = createCondVar()
|
||||
readyWorker: ptr Worker
|
||||
|
||||
proc slave(w: ptr Worker) {.thread.} =
|
||||
while true:
|
||||
w.ready = true
|
||||
readyWorker = w
|
||||
signal(gSomeReady)
|
||||
await(w.taskArrived)
|
||||
assert(not w.ready)
|
||||
w.f(w, w.data)
|
||||
|
||||
const
|
||||
MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads
|
||||
## should be good enough for anybody ;-)
|
||||
|
||||
var
|
||||
currentPoolSize: int
|
||||
maxPoolSize = MaxThreadPoolSize
|
||||
minPoolSize = 4
|
||||
|
||||
proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) =
|
||||
## sets the minimal thread pool size. The default value of this is 4.
|
||||
minPoolSize = size
|
||||
|
||||
proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) =
|
||||
## sets the minimal thread pool size. The default value of this
|
||||
## is ``MaxThreadPoolSize``.
|
||||
maxPoolSize = size
|
||||
|
||||
var
|
||||
workers: array[MaxThreadPoolSize, TThread[ptr Worker]]
|
||||
workersData: array[MaxThreadPoolSize, Worker]
|
||||
|
||||
proc activateThread(i: int) {.noinline.} =
|
||||
workersData[i].taskArrived = createCondVar()
|
||||
workersData[i].taskStarted = createCondVar()
|
||||
workersData[i].initialized = true
|
||||
createThread(workers[i], slave, addr(workersData[i]))
|
||||
|
||||
proc setup() =
|
||||
currentPoolSize = min(countProcessors(), MaxThreadPoolSize)
|
||||
readyWorker = addr(workersData[0])
|
||||
for i in 0.. <currentPoolSize: activateThread(i)
|
||||
|
||||
proc preferSpawn*(): bool =
|
||||
## Use this proc to determine quickly if a 'spawn' or a direct call is
|
||||
## preferable. If it returns 'true' a 'spawn' may make sense. In general
|
||||
## it is not necessary to call this directly; use 'spawnX' instead.
|
||||
result = gSomeReady.counter > 0
|
||||
|
||||
proc spawn*(call: stmt) {.magic: "Spawn".}
|
||||
## always spawns a new task, so that the 'call' is never executed on
|
||||
## the calling thread. 'call' has to be proc call 'p(...)' where 'p'
|
||||
## is gcsafe and has 'void' as the return type.
|
||||
|
||||
template spawnX*(call: stmt) =
|
||||
## spawns a new task if a CPU core is ready, otherwise executes the
|
||||
## call in the calling thread. Usually it is advised to
|
||||
## use 'spawn' in order to not block the producer for an unknown
|
||||
## amount of time. 'call' has to be proc call 'p(...)' where 'p'
|
||||
## is gcsafe and has 'void' as the return type.
|
||||
if preferSpawn(): spawn call
|
||||
else: call
|
||||
|
||||
proc parallel*(body: stmt) {.magic: "Parallel".}
|
||||
## a parallel section can be used to execute a block in parallel. ``body``
|
||||
## has to be in a DSL that is a particular subset of the language. Please
|
||||
## refer to the manual for further information.
|
||||
|
||||
var
|
||||
state: ThreadPoolState
|
||||
stateLock: TLock
|
||||
|
||||
initLock stateLock
|
||||
|
||||
proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool =
|
||||
if cas(addr w.ready, true, false):
|
||||
w.data = data
|
||||
w.f = fn
|
||||
signal(w.taskArrived)
|
||||
await(w.taskStarted)
|
||||
result = true
|
||||
|
||||
proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
|
||||
# implementation of 'spawn' that is used by the code generator.
|
||||
while true:
|
||||
if selectWorker(readyWorker, fn, data): return
|
||||
for i in 0.. <currentPoolSize:
|
||||
if selectWorker(addr(workersData[i]), fn, data): return
|
||||
# determine what to do, but keep in mind this is expensive too:
|
||||
# state.calls < maxPoolSize: warmup phase
|
||||
# (state.calls and 127) == 0: periodic check
|
||||
if state.calls < maxPoolSize or (state.calls and 127) == 0:
|
||||
# ensure the call to 'advice' is atomic:
|
||||
if tryAcquire(stateLock):
|
||||
case advice(state)
|
||||
of doNothing: discard
|
||||
of doCreateThread:
|
||||
if currentPoolSize < maxPoolSize:
|
||||
if not workersData[currentPoolSize].initialized:
|
||||
activateThread(currentPoolSize)
|
||||
let w = addr(workersData[currentPoolSize])
|
||||
inc currentPoolSize
|
||||
if selectWorker(w, fn, data):
|
||||
release(stateLock)
|
||||
return
|
||||
# else we didn't succeed but some other thread, so do nothing.
|
||||
of doShutdownThread:
|
||||
if currentPoolSize > minPoolSize: dec currentPoolSize
|
||||
# we don't free anything here. Too dangerous.
|
||||
release(stateLock)
|
||||
# else the acquire failed, but this means some
|
||||
# other thread succeeded, so we don't need to do anything here.
|
||||
await(gSomeReady)
|
||||
|
||||
proc sync*() =
|
||||
## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate
|
||||
## waiting, you have to use an explicit barrier.
|
||||
while true:
|
||||
var allReady = true
|
||||
for i in 0 .. <currentPoolSize:
|
||||
if not allReady: break
|
||||
allReady = allReady and workersData[i].ready
|
||||
if allReady: break
|
||||
await(gSomeReady)
|
||||
|
||||
setup()
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2013 Andreas Rumpf
|
||||
# (c) Copyright 2014 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
@@ -13,7 +13,7 @@
|
||||
include "system/inclrtl"
|
||||
|
||||
import
|
||||
strutils, os, strtabs, streams
|
||||
strutils, os, strtabs, streams, cpuinfo
|
||||
|
||||
when defined(windows):
|
||||
import winlean
|
||||
@@ -225,42 +225,10 @@ proc errorHandle*(p: PProcess): TFileHandle {.rtl, extern: "nosp$1",
|
||||
## it is closed when closing the PProcess ``p``.
|
||||
result = p.errHandle
|
||||
|
||||
when defined(macosx) or defined(bsd):
|
||||
const
|
||||
CTL_HW = 6
|
||||
HW_AVAILCPU = 25
|
||||
HW_NCPU = 3
|
||||
proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
|
||||
a: var csize, b: pointer, c: int): cint {.
|
||||
importc: "sysctl", header: "<sys/sysctl.h>".}
|
||||
|
||||
proc countProcessors*(): int {.rtl, extern: "nosp$1".} =
|
||||
## returns the numer of the processors/cores the machine has.
|
||||
## Returns 0 if it cannot be detected.
|
||||
when defined(windows):
|
||||
var x = getEnv("NUMBER_OF_PROCESSORS")
|
||||
if x.len > 0: result = parseInt(x.string)
|
||||
elif defined(macosx) or defined(bsd):
|
||||
var
|
||||
mib: array[0..3, cint]
|
||||
numCPU: int
|
||||
len: csize
|
||||
mib[0] = CTL_HW
|
||||
mib[1] = HW_AVAILCPU
|
||||
len = sizeof(numCPU)
|
||||
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
|
||||
if numCPU < 1:
|
||||
mib[1] = HW_NCPU
|
||||
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
|
||||
result = numCPU
|
||||
elif defined(hpux):
|
||||
result = mpctl(MPC_GETNUMSPUS, nil, nil)
|
||||
elif defined(irix):
|
||||
var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
|
||||
result = sysconf(SC_NPROC_ONLN)
|
||||
else:
|
||||
result = sysconf(SC_NPROCESSORS_ONLN)
|
||||
if result <= 0: result = 1
|
||||
result = cpuinfo.countProcessors()
|
||||
|
||||
proc execProcesses*(cmds: openArray[string],
|
||||
options = {poStdErrToStdOut, poParentStreams},
|
||||
|
||||
@@ -2934,6 +2934,3 @@ when not defined(booting):
|
||||
|
||||
template isStatic*(x): expr = compiles(static(x))
|
||||
# checks whether `x` is a value known at compile-time
|
||||
|
||||
when hasThreadSupport:
|
||||
when hostOS != "standalone": include "system/sysspawn"
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2012 Andreas Rumpf
|
||||
# (c) Copyright 2014 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## Atomic operations for Nimrod.
|
||||
{.push stackTrace:off.}
|
||||
|
||||
when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
|
||||
type
|
||||
@@ -203,3 +204,31 @@ proc atomicDec*(memLoc: var int, x: int = 1): int =
|
||||
else:
|
||||
dec(memLoc, x)
|
||||
result = memLoc
|
||||
|
||||
when defined(windows) and not defined(gcc):
|
||||
proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
|
||||
{.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
|
||||
|
||||
proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool =
|
||||
interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
|
||||
|
||||
else:
|
||||
# this is valid for GCC and Intel C++
|
||||
proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool
|
||||
{.importc: "__sync_bool_compare_and_swap", nodecl.}
|
||||
# XXX is this valid for 'int'?
|
||||
|
||||
|
||||
when (defined(x86) or defined(amd64)) and defined(gcc):
|
||||
proc cpuRelax {.inline.} =
|
||||
{.emit: """asm volatile("pause" ::: "memory");""".}
|
||||
elif (defined(x86) or defined(amd64)) and defined(vcc):
|
||||
proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
|
||||
elif defined(intelc):
|
||||
proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
|
||||
elif false:
|
||||
from os import sleep
|
||||
|
||||
proc cpuRelax {.inline.} = os.sleep(1)
|
||||
|
||||
{.pop.}
|
||||
|
||||
@@ -14,30 +14,6 @@ when not defined(NimString):
|
||||
|
||||
{.push stackTrace:off.}
|
||||
|
||||
when (defined(x86) or defined(amd64)) and defined(gcc):
|
||||
proc cpuRelax {.inline.} =
|
||||
{.emit: """asm volatile("pause" ::: "memory");""".}
|
||||
elif (defined(x86) or defined(amd64)) and defined(vcc):
|
||||
proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
|
||||
elif defined(intelc):
|
||||
proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
|
||||
elif false:
|
||||
from os import sleep
|
||||
|
||||
proc cpuRelax {.inline.} = os.sleep(1)
|
||||
|
||||
when defined(windows) and not defined(gcc):
|
||||
proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
|
||||
{.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
|
||||
|
||||
proc cas(p: ptr bool; oldValue, newValue: bool): bool =
|
||||
interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
|
||||
|
||||
else:
|
||||
# this is valid for GCC and Intel C++
|
||||
proc cas(p: ptr bool; oldValue, newValue: bool): bool
|
||||
{.importc: "__sync_bool_compare_and_swap", nodecl.}
|
||||
|
||||
# We declare our own condition variables here to get rid of the dummy lock
|
||||
# on Windows:
|
||||
|
||||
@@ -54,6 +30,9 @@ proc createCondVar(): CondVar =
|
||||
initSysLock(result.stupidLock)
|
||||
#acquireSys(result.stupidLock)
|
||||
|
||||
proc destroyCondVar(c: var CondVar) {.inline.} =
|
||||
deinitSysCond(c.c)
|
||||
|
||||
proc await(cv: var CondVar) =
|
||||
when defined(posix):
|
||||
acquireSys(cv.stupidLock)
|
||||
@@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) =
|
||||
#if cas(addr cv.slowPath, true, false):
|
||||
signal(cv.slow)
|
||||
|
||||
type
|
||||
Barrier* {.compilerProc.} = object
|
||||
counter: int
|
||||
cv: CondVar
|
||||
|
||||
proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
|
||||
atomicInc b.counter
|
||||
|
||||
proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
|
||||
atomicDec b.counter
|
||||
if b.counter <= 0: signal(b.cv)
|
||||
|
||||
proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
|
||||
b.counter = 0
|
||||
b.cv = createCondVar()
|
||||
|
||||
proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
|
||||
await(b.cv)
|
||||
destroyCondVar(b.cv)
|
||||
|
||||
{.pop.}
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
@@ -4,20 +4,22 @@ discard """
|
||||
cmd: "nimrod $target --threads:on $options $file"
|
||||
"""
|
||||
|
||||
import threadpool
|
||||
|
||||
var
|
||||
x, y = 0
|
||||
|
||||
proc p1 =
|
||||
for i in 0 .. 1_000_000:
|
||||
for i in 0 .. 10_000:
|
||||
discard
|
||||
|
||||
inc x
|
||||
atomicInc x
|
||||
|
||||
proc p2 =
|
||||
for i in 0 .. 1_000_000:
|
||||
for i in 0 .. 10_000:
|
||||
discard
|
||||
|
||||
inc y, 2
|
||||
atomicInc y, 2
|
||||
|
||||
for i in 0.. 3:
|
||||
spawn(p1())
|
||||
|
||||
@@ -4,4 +4,6 @@ discard """
|
||||
cmd: "nimrod $target --threads:on $options $file"
|
||||
"""
|
||||
|
||||
import threadpool
|
||||
|
||||
spawn(1)
|
||||
|
||||
17
web/news.txt
17
web/news.txt
@@ -2,6 +2,23 @@
|
||||
News
|
||||
====
|
||||
|
||||
..
|
||||
2014-06-29 Version 0.9.6 released
|
||||
=================================
|
||||
|
||||
Changes affecting backwards compatibility
|
||||
-----------------------------------------
|
||||
|
||||
- ``spawn`` now uses an elaborate self-adapting thread pool and as such
|
||||
has been moved into its own module. So to use it, you now have to import
|
||||
``threadpool``.
|
||||
|
||||
|
||||
Library Additions
|
||||
-----------------
|
||||
|
||||
- Added module ``cpuinfo``.
|
||||
- Added module ``threadpool``.
|
||||
|
||||
|
||||
2014-04-21 Version 0.9.4 released
|
||||
|
||||
Reference in New Issue
Block a user