initial non-compiling version of 'parallel'

This commit is contained in:
Araq
2014-05-12 11:12:37 +02:00
parent bdb2d21f27
commit 6195dbe491
16 changed files with 1058 additions and 91 deletions

View File

@@ -9,7 +9,8 @@
## This module implements the 'implies' relation for guards.
import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents
import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents,
saturate
const
someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc,
@@ -25,6 +26,17 @@ const
someIn = {mInRange, mInSet}
someHigh = {mHigh}
# we don't list unsigned here because wrap around semantics suck for
# proving anything:
someAdd = {mAddI, mAddI64, mAddF64, mSucc}
someSub = {mSubI, mSubI64, mSubF64, mPred}
someMul = {mMulI, mMulI64, mMulF64}
someDiv = {mDivI, mDivI64, mDivF64}
someMod = {mModI, mModI64}
someMax = {mMaxI, mMaxI64, mMaxF64}
someMin = {mMinI, mMinI64, mMinF64}
proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit}
proc isLocation(n: PNode): bool = not n.isValue
@@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool =
proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true)
proc getMagicOp(name: string, m: TMagic): PSym =
proc createMagic*(name: string, m: TMagic): PSym =
result = newSym(skProc, getIdent(name), nil, unknownLineInfo())
result.magic = m
let
opLe = getMagicOp("<=", mLeI)
opLt = getMagicOp("<", mLtI)
opAnd = getMagicOp("and", mAnd)
opOr = getMagicOp("or", mOr)
opNot = getMagicOp("not", mNot)
opIsNil = getMagicOp("isnil", mIsNil)
opContains = getMagicOp("contains", mInSet)
opEq = getMagicOp("==", mEqI)
opLe = createMagic("<=", mLeI)
opLt = createMagic("<", mLtI)
opAnd = createMagic("and", mAnd)
opOr = createMagic("or", mOr)
opNot = createMagic("not", mNot)
opIsNil = createMagic("isnil", mIsNil)
opContains = createMagic("contains", mInSet)
opEq = createMagic("==", mEqI)
opAdd = createMagic("+", mAddI)
opSub = createMagic("-", mSubI)
opMul = createMagic("*", mMulI)
opDiv = createMagic("div", mDivI)
opLen = createMagic("len", mLengthSeq)
proc swapArgs(fact: PNode, newOp: PSym): PNode =
result = newNodeI(nkCall, fact.info, 3)
@@ -137,17 +154,118 @@ proc neg(n: PNode): PNode =
result.sons[0] = newSymNode(opNot)
result.sons[1] = n
proc buildIsNil(arg: PNode): PNode =
result = newNodeI(nkCall, arg.info, 2)
result.sons[0] = newSymNode(opIsNil)
result.sons[1] = arg
proc buildCall(op: PSym; a: PNode): PNode =
result = newNodeI(nkCall, a.info, 2)
result.sons[0] = newSymNode(op)
result.sons[1] = a
proc buildCall(op: PSym; a, b: PNode): PNode =
result = newNodeI(nkCall, a.info, 3)
result.sons[0] = newSymNode(op)
result.sons[1] = a
result.sons[2] = b
proc `+@`*(a: PNode; b: BiggestInt): PNode =
opAdd.buildCall(a, nkIntLit.newIntNode(b))
proc `|+|`(a, b: PNode): PNode =
result = copyNode(a)
if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal
else: result.floatVal = a.floatVal + b.floatVal
proc `|*|`(a, b: PNode): PNode =
result = copyNode(a)
if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal
else: result.floatVal = a.floatVal * b.floatVal
proc zero(): PNode = nkIntLit.newIntNode(0)
proc one(): PNode = nkIntLit.newIntNode(1)
proc minusOne(): PNode = nkIntLit.newIntNode(-1)
proc lowBound*(x: PNode): PNode = nkIntLit.newIntNode(firstOrd(x.typ))
proc highBound*(x: PNode): PNode =
if x.typ.skipTypes(abstractInst).kind == tyArray:
nkIntLit.newIntNode(lastOrd(x.typ))
else:
opAdd.buildCall(opLen.buildCall(x), minusOne())
proc canon*(n: PNode): PNode =
# XXX for now only the new code in 'semparallel' uses this
if n.safeLen >= 1:
result = newNodeI(n.kind, n.info, n.len)
for i in 0 .. < n.safeLen:
result.sons[i] = canon(n.sons[i])
else:
result = n
case result.getMagic
of someEq, someAdd, someMul, someMin, someMax:
# these are symmetric; put value as last:
if result.sons[1].isValue and not result.sons[2].isValue:
result = swapArgs(result, result.sons[0].sym)
# (4 + foo) + 2 --> (foo + 4) + 2
of someHigh:
# high == len+(-1)
result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne())
of mUnaryMinusI, mUnaryMinusI64:
result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1))
of someSub:
# x - 4 --> x + (-4)
var b = result[2]
if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt):
b = copyNode(b)
b.intVal = -b.intVal
result = buildCall(opAdd, result[1], b)
elif b.kind in {nkFloatLit..nkFloat64Lit}:
b = copyNode(b)
b.floatVal = -b.floatVal
result = buildCall(opAdd, result[1], b)
of someLen:
result.sons[0] = opLen.newSymNode
else: discard
# re-association:
# (foo+5)+5 --> foo+10; same for '*'
case result.getMagic
of someAdd:
if result[2].isValue and
result[1].getMagic in someAdd and result[1][2].isValue:
result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2])
of someMul:
if result[2].isValue and
result[1].getMagic in someMul and result[1][2].isValue:
result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2])
else: discard
# most important rule: (x-4) < a.len --> x < a.len+4
case result.getMagic
of someLe, someLt:
let x = result[1]
let y = result[2]
if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and
isLetLocation(x[1], true):
case x.getMagic
of someSub:
result = buildCall(result[0].sym, x[1], opAdd.buildCall(y, x[2]))
of someAdd:
result = buildCall(result[0].sym, x[1], opSub.buildCall(y, x[2]))
else: discard
elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and
isLetLocation(y[1], true):
# a.len < x-3
case y.getMagic
of someSub:
result = buildCall(result[0].sym, y[1], opAdd.buildCall(x, y[2]))
of someAdd:
result = buildCall(result[0].sym, y[1], opSub.buildCall(x, y[2]))
else: discard
else: discard
proc usefulFact(n: PNode): PNode =
case n.getMagic
of someEq:
if skipConv(n.sons[2]).kind == nkNilLit and (
isLetLocation(n.sons[1], false) or isVar(n.sons[1])):
result = buildIsNil(n.sons[1])
result = opIsNil.buildCall(n.sons[1])
else:
if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true):
# XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1'
@@ -217,7 +335,7 @@ proc addFactNeg*(m: var TModel, n: PNode) =
let n = n.neg
if n != nil: addFact(m, n)
proc sameTree(a, b: PNode): bool =
proc sameTree*(a, b: PNode): bool =
result = false
if a == b:
result = true
@@ -519,7 +637,46 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication =
if result != impUnknown: return
proc impliesNotNil*(facts: TModel, arg: PNode): TImplication =
result = doesImply(facts, buildIsNil(arg).neg)
result = doesImply(facts, opIsNil.buildCall(arg).neg)
proc proveLe*(m: TModel; a, b: PNode): TImplication =
let res = canon(opLe.buildCall(a, b))
# we hardcode lots of axioms here:
let a = res[1]
let b = res[2]
# 0 <= 3
if a.isValue and b.isValue:
return if leValue(a, b): impYes else: impNo
# use type information too: x <= 4 iff high(x) <= 4
if b.isValue and a.typ != nil and a.typ.isOrdinalType:
if lastOrd(a.typ) <= b.intVal: return impYes
# 3 <= x iff low(x) <= 3
if a.isValue and b.typ != nil and b.typ.isOrdinalType:
if firstOrd(b.typ) <= a.intVal: return impYes
# x <= x
if sameTree(a, b): return impYes
# x <= x+c iff 0 <= c
if b.getMagic in someAdd and sameTree(a, b[1]):
return proveLe(m, zero(), b[2])
# x <= x*c if 1 <= c and 0 <= x:
if b.getMagic in someMul and sameTree(a, b[1]):
if proveLe(m, one(), b[2]) == impYes and proveLe(m, zero(), a) == impYes:
return impYes
# x div c <= x if 1 <= c and 0 <= x:
if a.getMagic in someDiv and sameTree(a[1], b):
if proveLe(m, one(), a[2]) == impYes and proveLe(m, zero(), b) == impYes:
return impYes
# use the knowledge base:
return doesImply(m, res)
proc addFactLe*(m: var TModel; a, b: PNode) =
m.add canon(opLe.buildCall(a, b))
proc settype(n: PNode): PType =
result = newType(tySet, n.typ.owner)

View File

@@ -114,11 +114,15 @@ proc callCodegenProc*(name: string, arg1: PNode;
if arg3 != nil: result.add arg3
proc createWrapperProc(f: PNode; threadParam, argsParam: PSym;
varSection, call: PNode): PSym =
varSection, call, barrier: PNode): PSym =
var body = newNodeI(nkStmtList, f.info)
body.add varSection
if barrier != nil:
body.add callCodeGenProc("barrierEnter", barrier)
body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam))
body.add call
if barrier != nil:
body.add callCodeGenProc("barrierLeave", barrier)
var params = newNodeI(nkFormalParams, f.info)
params.add emptyNode
@@ -146,7 +150,7 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode =
result.typ = newType(tyPtr, objType.owner)
result.typ.rawAddSon(objType)
proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
proc wrapProcForSpawn*(owner: PSym; n: PNode; barrier: PNode = nil): PNode =
result = newNodeI(nkStmtList, n.info)
if n.kind notin nkCallKinds or not n.typ.isEmptyType:
localError(n.info, "'spawn' takes a call expression of type void")
@@ -162,6 +166,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
threadParam.typ = ptrType
argsParam.typ = ptrType
argsParam.position = 1
var objType = createObj(owner, n.info)
incl(objType.flags, tfFinal)
let castExpr = createCastExpr(argsParam, objType)
@@ -223,6 +228,17 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
call.add(newSymNode(temp))
let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call)
var barrierAsExpr: PNode = nil
if barrier != nil:
let typ = newType(tyPtr, owner)
typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ)
var field = newSym(skField, getIdent"barrier", owner, n.info)
field.typ = typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier)
barrierAsExpr = indirectAccess(castExpr, field, n.info)
let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call,
barrierAsExpr)
result.add callCodeGenProc("nimSpawn", wrapper.newSymNode,
genAddrOf(scratchObj.newSymNode))

414
compiler/semparallel.nim Normal file
View File

@@ -0,0 +1,414 @@
#
#
# The Nimrod Compiler
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Semantic checking for 'parallel'.
# - slices should become "nocopy" to openArray (+)
# - need to perform bound checks (+)
#
# - parallel needs to insert a barrier (+)
# - passed arguments need to be ensured to be "const"
# - what about 'f(a)'? --> f shouldn't have side effects anyway
# - passed arrays need to be ensured not to alias
# - passed slices need to be ensured to be disjoint (+)
# - output slices need special logic
import lowerings, guards, sempass2
discard """
one major problem:
spawn f(a[i])
inc i
spawn f(a[i])
is valid, but
spawn f(a[i])
spawn f(a[i])
inc i
is not! However,
spawn f(a[i])
if guard: inc i
spawn f(a[i])
is not valid either! --> We need a flow dependent analysis here.
However:
while foo:
spawn f(a[i])
inc i
spawn f(a[i])
Is not valid either! --> We should really restrict 'inc' to loop endings?
The heuristic that we implement here (that has no false positives) is: Usage
of 'i' in a slice *after* we determined the stride is invalid!
"""
type
TDirection = enum
ascending, descending
MonotonicVar = object
v: PSym
lower, upper, stride: PNode
dir: TDirection
blacklisted: bool # blacklisted variables that are not monotonic
AnalysisCtx = object
locals: seq[MonotonicVar]
slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]]
guards: TModel # nested guards
args: seq[PSym] # args must be deeply immutable
spawns: int # we can check that at last 1 spawn is used in
# the 'parallel' section
currentSpawnId: int
inLoop: int
let opSlice = createMagic("slice", mSlice)
proc initAnalysisCtx(): AnalysisCtx =
result.locals = @[]
result.slices = @[]
result.args = @[]
result.guards = @[]
proc getSlot(c: var AnalysisCtx; s: PSym): ptr MonotonicVar =
var L = c.locals.len
for i in 0.. <L:
if c.locals[i].v == s: return addr(c.locals[i])
c.locals.setLen(L+1)
c.locals[L].v = s
return addr(c.locals[L])
proc getRoot(n: PNode): PSym =
## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression
## like ``obj.x[i].y``. The *root* of a path is the symbol that can be
## determined as the owner; ``obj`` in the example.
case n.kind
of nkSym:
if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}:
result = n.sym
of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr,
nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr:
result = getRoot(n.sons[0])
of nkHiddenStdConv, nkHiddenSubConv, nkConv:
result = getRoot(n.sons[1])
of nkCallKinds:
if getMagic(n) == mSlice: result = getRoot(n.sons[1])
else: discard
proc gatherArgs(c: var AnalysisCtx; n: PNode) =
for i in 0.. <n.safeLen:
let root = getRoot n[i]
if root != nil:
block addRoot:
for r in items(c.args):
if r == root: break addRoot
c.args.add root
gatherArgs(c, n[i])
proc isLocal(s: PSym): bool =
s.kind in {skResult, skTemp, skForVar, skVar, skLet} and
{sfAddrTaken, sfGlobal} * s.flags == {}
proc checkLocal(c: var AnalysisCtx; n: PNode) =
if n.kind == nkSym and isLocal(n.sym):
let slot = c.getSlot(n[1].sym)
if slot.stride != nil:
localError(n.info, "invalid usage of counter after increment")
else:
for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i])
proc checkLe(c: AnalysisCtx; a, b: PNode) =
case proveLe(c.guards, a, b)
of impUnkown:
localError(n.info, "cannot prove: " & a.renderTree & " <= " & b.renderTree)
of impYes: discard
of impNo:
localError(n.info, "can prove: " & a.renderTree & " > " & b.renderTree)
proc checkBounds(c: AnalysisCtx; arr, idx: PNode) =
checkLe(c, arr.lowBound, idx)
checkLe(c, idx, arr.highBound)
proc addLowerBoundAsFacts(c: var AnalysisCtx) =
for v in c.locals:
if not v.blacklisted:
c.guards.addFactLe(v.lower, newSymNode(v.v))
proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: int) =
checkLocal(c, n)
let le = n.sons[le]
let ri = n.sons[ri]
let x = n.sons[x]
# perform static bounds checking here; and not later!
let oldState = c.guards.len
addLowerBoundAsFacts(c)
c.checkBounds(x, le)
c.checkBounds(x, ri)
c.guards.setLen(oldState)
c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0))
template `?`(x): expr = x.renderTree
proc overlap(m: TModel; x,y,c,d: PNode) =
# X..Y and C..D overlap iff (X <= D and Y >= C)
case proveLe(m, x, d)
of impUnkown:
localError(x.info,
"cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" %
[?x, ?d, ?x, ?y, ?c, ?d])
of impYes:
case proveLe(m, y, c)
of impUnknown:
localError(x.info,
"cannot prove: $# > $#; required for $#..$# disjoint from $#..$#" %
[?y, ?d, ?x, ?y, ?c, ?d])
of impYes:
localError(x.info, "$#..$# not disjoint from $#..$#" % [?x, ?y, ?c, ?d])
of impNo: discard
of impNo: discard
proc stride(c: AnalysisCtx; n: PNode): BiggestInt =
# note: 0 if it cannot be determined is just right because then
# we analyse 'i..i' and 'i+0 .. i+0' and these are not disjoint!
if n.kind == nkSym and isLocal(n.sym):
let slot = c.getSlot(n[1].sym)
if slot.stride != nil:
result = slot.stride.intVal
else:
for i in 0 .. <n.safeLen: inc(result, stride(c, n.sons[i]))
proc checkSlicesAreDisjoint(c: var AnalysisCtx) =
# this is the only thing that we need to perform after we have traversed
# the whole tree so that the strides are available.
# First we need to add all the computed lower bounds:
addLowerBoundAsFacts(c)
# Every slice used in a loop needs to be disjoint with itself:
for x,a,b,id,inLoop in items(c.slices):
if inLoop: overlap(c.guards, a,b, a+@c.stride(a), b+@c.stride(b))
# Another tricky example is:
# while true:
# spawn f(a[i])
# spawn f(a[i+1])
# inc i # inc i, 2 would be correct here
#
# Or even worse:
# while true:
# spawn f(a[i+1 .. i+3])
# spawn f(a[i+4 .. i+5])
# inc i, 4
# Prove that i*k*stride + 3 != i*k'*stride + 5
# For the correct example this amounts to
# i*k*2 != i*k'*2 + 1
# which is true.
# For now, we don't try to prove things like that at all, even though it'd
# be feasible for many useful examples. Instead we attach the slice to
# a spawn and if the attached spawns differ, we bail out:
for i in 0 .. high(c.slices):
for j in 0 .. high(c.slices):
let x = c.slices[i]
let y = c.slices[j]
if i != j and x.spawnId != y.spawnId and guards.sameTree(x.x, y.x):
if not x.inLoop and not y.inLoop:
overlap(c.guards, x.a, x.b, y.a, y.b)
else:
# ah I cannot resists the temptation and add another sweet heuristic:
# if both slices have the form (i+c)..(i+c) and (i+d)..(i+d) we
# check they are disjoint and c <= stride and d <= stride:
# XXX
localError(x.x.info, "cannot prove $#..$# disjoint from $#..$#" %
[?x.a, ?x.b, ?y.a, ?y.b])
proc analyse(c: var AnalysisCtx; n: PNode)
proc analyseSons(c: var AnalysisCtx; n: PNode) =
for i in 0 .. <safeLen(n): analyse(c, n[i])
proc min(a, b: PNode): PNode =
if a.isNil: result = b
elif a.intVal < b.intVal: result = a
else: result = b
proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) =
if op.magic == mSpawn:
inc c.spawns
let oldSpawnId = c.currentSpawnId
c.currentSpawnId = c.spawns
gatherArgs(c, n[1])
analyseSons(c, n)
c.currentSpawnId = oldSpawnId
elif op.magic == mInc or (op.name.s == "+=" and sfSystemModule in op.owner.flags):
if n[1].kind == nkSym and n[1].isLocal:
let incr = n[1].skipConv
if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0:
let slot = c.getSlot(n[1].sym)
slot.stride = min(slot.stride, incr)
analyseSons(c, n)
elif op.name.s == "[]" and sfSystemModule in op.owner.flags:
c.addSlice(n, 1, 2, 3)
analyseSons(c, n)
elif op.name.s == "[]=" and sfSystemModule in op.owner.flags:
c.addSlice(n, 1, 2, 3)
analyseSons(c, n)
else:
analyseSons(c, n)
proc analyseCase(c: var AnalysisCtx; n: PNode) =
analyse(c, n.sons[0])
#let oldState = c.locals.len
let oldFacts = c.guards.len
for i in 1.. <n.len:
let branch = n.sons[i]
#setLen(c.locals, oldState)
setLen(c.guards, oldFacts)
addCaseBranchFacts(c.guards, n, i)
for i in 0 .. <branch.len:
analyse(c, branch.sons[i])
#setLen(c.locals, oldState)
setLen(c.guards, oldFacts)
proc analyseIf(c: var AnalysisCtx; n: PNode) =
analyse(c, n.sons[0].sons[0])
let oldFacts = c.guards.len
addFact(c.guards, n.sons[0].sons[0])
#let oldState = c.locals.len
analyse(c, n.sons[0].sons[1])
for i in 1.. <n.len:
let branch = n.sons[i]
setLen(c.guards, oldFacts)
for j in 0..i-1:
addFactNeg(c.guards, n.sons[j].sons[0])
if branch.len > 1:
addFact(c.guards, branch.sons[0])
#setLen(c.locals, oldState)
for i in 0 .. <branch.len:
analyse(c, branch.sons[i])
#setLen(c.locals, oldState)
setLen(c.guards, oldFacts)
proc analyse(c: var AnalysisCtx; n: PNode) =
case n.kind
of nkAsgn, nkFastAsgn:
# since we already ensure sfAddrTaken is not in s.flags, we only need to
# prevent direct assignments to the monotonic variable:
if n[0].kind == nkSym and n[0].isLocal:
let slot = c.getSlot(it[j].sym)
slot.blackListed = true
invalidateFacts(c.guards, n.sons[0])
analyseSons(c, n)
addAsgnFact(c.guards, n.sons[0], n.sons[1])
of nkCallKinds:
# direct call:
if n[0].kind == nkSym: analyseCall(c, n, n[0].sym)
else: analyseSons(c, n)
of nkBracket:
c.addSlice(n, 0, 1, 1)
analyseSons(c, n)
of nkReturnStmt, nkRaiseStmt, nkTryStmt:
localError(n.info, "invalid control flow for 'parallel'")
# 'break' that leaves the 'parallel' section is not valid either
# or maybe we should generate a 'try' XXX
of nkVarSection:
for it in n:
if it.sons[it.len-1].kind != nkEmpty:
for j in 0 .. it.len-3:
if it[j].kind == nkSym and it[j].isLocal:
let slot = c.getSlot(it[j].sym)
if slot.lower.isNil: slot.lower = it.sons[it.len-1]
else: internalError(it.info, "slot already has a lower bound")
analyseSons(c, n)
of nkCaseStmt: analyseCase(c, n)
of nkIfStmt, nkIfExpr: analyseIf(c, n)
of nkWhileStmt:
analyse(c, n.sons[0])
# 'while true' loop?
inc c.inLoop
if isTrue(n.sons[0]):
analyseSons(c, n.sons[1])
else:
# loop may never execute:
let oldState = c.locals.len
let oldFacts = c.guards.len
addFact(c.guards, n.sons[0])
analyse(c, n.sons[1])
setLen(c.locals, oldState)
setLen(c.guards, oldFacts)
# we know after the loop the negation holds:
if not containsNode(n.sons[1], nkBreakStmt):
addFactNeg(c.guards, n.sons[0])
dec c.inLoop
of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef,
nkMacroDef, nkTemplateDef, nkConstSection, nkPragma:
discard
else:
analyseSons(c, n)
proc transformSlices(n: PNode): PNode =
if n.kind in nkCalls and n[0].kind == nkSym:
let op = n[0].sym
if op.name.s == "[]" and sfSystemModule in op.owner.flags:
result = copyTree(n)
result.sons[0] = opSlice
return result
if n.safeLen > 0:
result = copyNode(n.kind, n.info, n.len)
for i in 0 .. < n.len:
result.sons[i] = transformSlices(n.sons[i])
else:
result = n
proc transformSpawn(owner: PSym; n, barrier: PNode): PNode =
if n.kind in nkCalls:
if n[0].kind == nkSym:
let op = n[0].sym
if op.magic == mSpawn:
result = transformSlices(n)
return wrapProcForSpawn(owner, result, barrier)
elif n.safeLen > 0:
result = copyNode(n.kind, n.info, n.len)
for i in 0 .. < n.len:
result.sons[i] = transformSpawn(owner, n.sons[i], barrier)
else:
result = n
proc liftParallel*(owner: PSym; n: PNode): PNode =
# this needs to be called after the 'for' loop elimination
# first pass:
# - detect monotonic local integer variables
# - detect used slices
# - detect used arguments
var a = initAnalysisCtx()
let body = n.lastSon
analyse(a, body)
if a.spawns == 0:
localError(n.info, "'parallel' section without 'spawn'")
checkSlices(a)
checkArgs(a, body)
var varSection = newNodeI(nkVarSection, n.info)
var temp = newSym(skTemp, "barrier", owner, n.info)
temp.typ = magicsys.getCompilerProc("Barrier").typ
incl(temp.flags, sfFromGeneric)
var vpart = newNodeI(nkIdentDefs, n.info, 3)
vpart.sons[0] = newSymNode(temp)
vpart.sons[1] = ast.emptyNode
vpart.sons[2] = indirectAccess(castExpr, field, n.info)
varSection.add vpart
barrier = genAddrOf(vpart[0])
result = newNodeI(nkStmtList, n.info)
generateAliasChecks(a, result)
result.add varSection
result.add callCodeGenProc("openBarrier", barrier)
result.add transformSpawn(owner, body, barrier)
result.add callCodeGenProc("closeBarrier", barrier)

View File

@@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) =
if n.kind != nkSym: return
let s = n.sym
if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}:
# 'x' is not nil, but that doesn't mean it's not nil children
# 'x' is not nil, but that doesn't mean its "not nil" children
# are initialized:
initVar(a, n)
@@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) =
else:
track(tracked, n)
proc isTrue(n: PNode): bool =
proc isTrue*(n: PNode): bool =
n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
n.kind == nkIntLit and n.intVal != 0

View File

@@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) =
nfAllConst in x.node.flags:
# XXX this is hacky; tests/txmlgen triggers it:
x.node = newNode(nkStrLit)
# debug x.node
#assert x.node.kind in {nkStrLit..nkTripleStrLit}
# It not only hackey, it is also wrong for tgentemplate. The primary
# cause of bugs like these is that the VM does not properly distinguish
# between variable defintions (var foo = e) and variable updates (foo = e).
template createStr(x) =
x.node = newNode(nkStrLit)

View File

@@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc"
path="$lib/core"
path="$lib/pure"
path="$lib/pure/collections"
path="$lib/pure/concurrency"
path="$lib/impure"
path="$lib/wrappers"
# path="$lib/wrappers/cairo"

View File

@@ -0,0 +1,58 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements procs to determine the number of CPUs / cores.
include "system/inclrtl"
import strutils, os
when not defined(windows):
import posix
when defined(linux):
import linux
when defined(macosx) or defined(bsd):
const
CTL_HW = 6
HW_AVAILCPU = 25
HW_NCPU = 3
proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
a: var csize, b: pointer, c: int): cint {.
importc: "sysctl", header: "<sys/sysctl.h>".}
proc countProcessors*(): int {.rtl, extern: "ncpi$1".} =
## returns the numer of the processors/cores the machine has.
## Returns 0 if it cannot be detected.
when defined(windows):
var x = getEnv("NUMBER_OF_PROCESSORS")
if x.len > 0: result = parseInt(x.string)
elif defined(macosx) or defined(bsd):
var
mib: array[0..3, cint]
numCPU: int
len: csize
mib[0] = CTL_HW
mib[1] = HW_AVAILCPU
len = sizeof(numCPU)
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
if numCPU < 1:
mib[1] = HW_NCPU
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
result = numCPU
elif defined(hpux):
result = mpctl(MPC_GETNUMSPUS, nil, nil)
elif defined(irix):
var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
result = sysconf(SC_NPROC_ONLN)
else:
result = sysconf(SC_NPROCESSORS_ONLN)
if result <= 0: result = 1

View File

@@ -0,0 +1,96 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements a helper for a thread pool to determine whether
## creating a thread is a good idea.
when defined(windows):
import winlean, os, strutils, math
proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime
elif defined(linux):
from cpuinfo import countProcessors
type
ThreadPoolAdvice* = enum
doNothing,
doCreateThread, # create additional thread for throughput
doShutdownThread # too many threads are busy, shutdown one
ThreadPoolState* = object
when defined(windows):
prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME
calls*: int
proc advice*(s: var ThreadPoolState): ThreadPoolAdvice =
when defined(windows):
var
sysIdle, sysKernel, sysUser,
procCreation, procExit, procKernel, procUser: TFILETIME
if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or
getProcessTimes(THandle(-1), procCreation, procExit,
procKernel, procUser) == 0:
return doNothing
if s.calls > 0:
let
sysKernelDiff = sysKernel - s.prevSysKernel
sysUserDiff = sysUser - s.prevSysUser
procKernelDiff = procKernel - s.prevProcKernel
procUserDiff = procUser - s.prevProcUser
sysTotal = int(sysKernelDiff + sysUserDiff)
procTotal = int(procKernelDiff + procUserDiff)
# total CPU usage < 85% --> create a new worker thread.
# Measurements show that 100% and often even 90% is not reached even
# if all my cores are busy.
if sysTotal == 0 or procTotal / sysTotal < 0.85:
result = doCreateThread
s.prevSysKernel = sysKernel
s.prevSysUser = sysUser
s.prevProcKernel = procKernel
s.prevProcUser = procUser
elif defined(linux):
proc fscanf(c: TFile, frmt: cstring) {.varargs, importc,
header: "<stdio.h>".}
var f = open("/proc/loadavg")
var b: float
var busy, total: int
fscanf(f,"%lf %lf %lf %ld/%ld",
addr b, addr b, addr b, addr busy, addr total)
f.close()
let cpus = countProcessors()
if busy-1 < cpus:
result = doCreateThread
elif busy-1 >= cpus*2:
result = doShutdownThread
else:
result = doNothing
else:
# XXX implement this for other OSes
result = doNothing
inc s.calls
when isMainModule:
proc busyLoop() =
while true:
discard random(80)
os.sleep(100)
spawn busyLoop()
spawn busyLoop()
spawn busyLoop()
spawn busyLoop()
var s: ThreadPoolState
for i in 1 .. 70:
echo advice(s)
os.sleep(1000)

View File

@@ -0,0 +1,210 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Implements Nimrod's 'spawn'.
import cpuinfo, cpuload, locks
{.push stackTrace:off.}
type
CondVar = object
c: TCond
L: TLock
counter: int
proc createCondVar(): CondVar =
initCond(result.c)
initLock(result.L)
proc destroyCondVar(cv: var CondVar) {.inline.} =
deinitCond(cv.c)
deinitLock(cv.L)
proc await(cv: var CondVar) =
acquire(cv.L)
while cv.counter <= 0:
wait(cv.c, cv.L)
dec cv.counter
release(cv.L)
proc signal(cv: var CondVar) =
acquire(cv.L)
inc cv.counter
release(cv.L)
signal(cv.c)
type
Barrier* {.compilerProc.} = object
counter: int
cv: CondVar
proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
atomicInc b.counter
proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
atomicDec b.counter
if b.counter <= 0: signal(b.cv)
proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
b.counter = 0
b.cv = createCondVar()
proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
await(b.cv)
destroyCondVar(b.cv)
{.pop.}
# ----------------------------------------------------------------------------
type
WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.}
Worker = object
taskArrived: CondVar
taskStarted: CondVar #\
# task data:
f: WorkerProc
data: pointer
ready: bool # put it here for correct alignment!
initialized: bool # whether it has even been initialized
proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
let w = cast[ptr Worker](p)
signal(w.taskStarted)
var
gSomeReady = createCondVar()
readyWorker: ptr Worker
proc slave(w: ptr Worker) {.thread.} =
while true:
w.ready = true
readyWorker = w
signal(gSomeReady)
await(w.taskArrived)
assert(not w.ready)
w.f(w, w.data)
const
MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads
## should be good enough for anybody ;-)
var
currentPoolSize: int
maxPoolSize = MaxThreadPoolSize
minPoolSize = 4
proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) =
## sets the minimal thread pool size. The default value of this is 4.
minPoolSize = size
proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) =
## sets the minimal thread pool size. The default value of this
## is ``MaxThreadPoolSize``.
maxPoolSize = size
var
workers: array[MaxThreadPoolSize, TThread[ptr Worker]]
workersData: array[MaxThreadPoolSize, Worker]
proc activateThread(i: int) {.noinline.} =
workersData[i].taskArrived = createCondVar()
workersData[i].taskStarted = createCondVar()
workersData[i].initialized = true
createThread(workers[i], slave, addr(workersData[i]))
proc setup() =
currentPoolSize = min(countProcessors(), MaxThreadPoolSize)
readyWorker = addr(workersData[0])
for i in 0.. <currentPoolSize: activateThread(i)
proc preferSpawn*(): bool =
## Use this proc to determine quickly if a 'spawn' or a direct call is
## preferable. If it returns 'true' a 'spawn' may make sense. In general
## it is not necessary to call this directly; use 'spawnX' instead.
result = gSomeReady.counter > 0
proc spawn*(call: stmt) {.magic: "Spawn".}
## always spawns a new task, so that the 'call' is never executed on
## the calling thread. 'call' has to be proc call 'p(...)' where 'p'
## is gcsafe and has 'void' as the return type.
template spawnX*(call: stmt) =
## spawns a new task if a CPU core is ready, otherwise executes the
## call in the calling thread. Usually it is advised to
## use 'spawn' in order to not block the producer for an unknown
## amount of time. 'call' has to be proc call 'p(...)' where 'p'
## is gcsafe and has 'void' as the return type.
if preferSpawn(): spawn call
else: call
proc parallel*(body: stmt) {.magic: "Parallel".}
## a parallel section can be used to execute a block in parallel. ``body``
## has to be in a DSL that is a particular subset of the language. Please
## refer to the manual for further information.
var
state: ThreadPoolState
stateLock: TLock
initLock stateLock
proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool =
if cas(addr w.ready, true, false):
w.data = data
w.f = fn
signal(w.taskArrived)
await(w.taskStarted)
result = true
proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
# implementation of 'spawn' that is used by the code generator.
while true:
if selectWorker(readyWorker, fn, data): return
for i in 0.. <currentPoolSize:
if selectWorker(addr(workersData[i]), fn, data): return
# determine what to do, but keep in mind this is expensive too:
# state.calls < maxPoolSize: warmup phase
# (state.calls and 127) == 0: periodic check
if state.calls < maxPoolSize or (state.calls and 127) == 0:
# ensure the call to 'advice' is atomic:
if tryAcquire(stateLock):
case advice(state)
of doNothing: discard
of doCreateThread:
if currentPoolSize < maxPoolSize:
if not workersData[currentPoolSize].initialized:
activateThread(currentPoolSize)
let w = addr(workersData[currentPoolSize])
inc currentPoolSize
if selectWorker(w, fn, data):
release(stateLock)
return
# else we didn't succeed but some other thread, so do nothing.
of doShutdownThread:
if currentPoolSize > minPoolSize: dec currentPoolSize
# we don't free anything here. Too dangerous.
release(stateLock)
# else the acquire failed, but this means some
# other thread succeeded, so we don't need to do anything here.
await(gSomeReady)
proc sync*() =
## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate
## waiting, you have to use an explicit barrier.
while true:
var allReady = true
for i in 0 .. <currentPoolSize:
if not allReady: break
allReady = allReady and workersData[i].ready
if allReady: break
await(gSomeReady)
setup()

View File

@@ -1,7 +1,7 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2013 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
@@ -13,7 +13,7 @@
include "system/inclrtl"
import
strutils, os, strtabs, streams
strutils, os, strtabs, streams, cpuinfo
when defined(windows):
import winlean
@@ -225,42 +225,10 @@ proc errorHandle*(p: PProcess): TFileHandle {.rtl, extern: "nosp$1",
## it is closed when closing the PProcess ``p``.
result = p.errHandle
when defined(macosx) or defined(bsd):
const
CTL_HW = 6
HW_AVAILCPU = 25
HW_NCPU = 3
proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
a: var csize, b: pointer, c: int): cint {.
importc: "sysctl", header: "<sys/sysctl.h>".}
proc countProcessors*(): int {.rtl, extern: "nosp$1".} =
## returns the numer of the processors/cores the machine has.
## Returns 0 if it cannot be detected.
when defined(windows):
var x = getEnv("NUMBER_OF_PROCESSORS")
if x.len > 0: result = parseInt(x.string)
elif defined(macosx) or defined(bsd):
var
mib: array[0..3, cint]
numCPU: int
len: csize
mib[0] = CTL_HW
mib[1] = HW_AVAILCPU
len = sizeof(numCPU)
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
if numCPU < 1:
mib[1] = HW_NCPU
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
result = numCPU
elif defined(hpux):
result = mpctl(MPC_GETNUMSPUS, nil, nil)
elif defined(irix):
var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
result = sysconf(SC_NPROC_ONLN)
else:
result = sysconf(SC_NPROCESSORS_ONLN)
if result <= 0: result = 1
result = cpuinfo.countProcessors()
proc execProcesses*(cmds: openArray[string],
options = {poStdErrToStdOut, poParentStreams},

View File

@@ -2934,6 +2934,3 @@ when not defined(booting):
template isStatic*(x): expr = compiles(static(x))
# checks whether `x` is a value known at compile-time
when hasThreadSupport:
when hostOS != "standalone": include "system/sysspawn"

View File

@@ -1,13 +1,14 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Atomic operations for Nimrod.
{.push stackTrace:off.}
when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
type
@@ -203,3 +204,31 @@ proc atomicDec*(memLoc: var int, x: int = 1): int =
else:
dec(memLoc, x)
result = memLoc
when defined(windows) and not defined(gcc):
proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
{.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool =
interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
else:
# this is valid for GCC and Intel C++
proc cas*[T: bool|int](p: ptr T; oldValue, newValue: T): bool
{.importc: "__sync_bool_compare_and_swap", nodecl.}
# XXX is this valid for 'int'?
when (defined(x86) or defined(amd64)) and defined(gcc):
proc cpuRelax {.inline.} =
{.emit: """asm volatile("pause" ::: "memory");""".}
elif (defined(x86) or defined(amd64)) and defined(vcc):
proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
elif defined(intelc):
proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
elif false:
from os import sleep
proc cpuRelax {.inline.} = os.sleep(1)
{.pop.}

View File

@@ -14,30 +14,6 @@ when not defined(NimString):
{.push stackTrace:off.}
when (defined(x86) or defined(amd64)) and defined(gcc):
proc cpuRelax {.inline.} =
{.emit: """asm volatile("pause" ::: "memory");""".}
elif (defined(x86) or defined(amd64)) and defined(vcc):
proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
elif defined(intelc):
proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
elif false:
from os import sleep
proc cpuRelax {.inline.} = os.sleep(1)
when defined(windows) and not defined(gcc):
proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
{.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
proc cas(p: ptr bool; oldValue, newValue: bool): bool =
interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
else:
# this is valid for GCC and Intel C++
proc cas(p: ptr bool; oldValue, newValue: bool): bool
{.importc: "__sync_bool_compare_and_swap", nodecl.}
# We declare our own condition variables here to get rid of the dummy lock
# on Windows:
@@ -54,6 +30,9 @@ proc createCondVar(): CondVar =
initSysLock(result.stupidLock)
#acquireSys(result.stupidLock)
proc destroyCondVar(c: var CondVar) {.inline.} =
deinitSysCond(c.c)
proc await(cv: var CondVar) =
when defined(posix):
acquireSys(cv.stupidLock)
@@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) =
#if cas(addr cv.slowPath, true, false):
signal(cv.slow)
type
Barrier* {.compilerProc.} = object
counter: int
cv: CondVar
proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
atomicInc b.counter
proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
atomicDec b.counter
if b.counter <= 0: signal(b.cv)
proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
b.counter = 0
b.cv = createCondVar()
proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
await(b.cv)
destroyCondVar(b.cv)
{.pop.}
# ----------------------------------------------------------------------------

View File

@@ -4,20 +4,22 @@ discard """
cmd: "nimrod $target --threads:on $options $file"
"""
import threadpool
var
x, y = 0
proc p1 =
for i in 0 .. 1_000_000:
for i in 0 .. 10_000:
discard
inc x
atomicInc x
proc p2 =
for i in 0 .. 1_000_000:
for i in 0 .. 10_000:
discard
inc y, 2
atomicInc y, 2
for i in 0.. 3:
spawn(p1())

View File

@@ -4,4 +4,6 @@ discard """
cmd: "nimrod $target --threads:on $options $file"
"""
import threadpool
spawn(1)

View File

@@ -2,6 +2,23 @@
News
====
..
2014-06-29 Version 0.9.6 released
=================================
Changes affecting backwards compatibility
-----------------------------------------
- ``spawn`` now uses an elaborate self-adapting thread pool and as such
has been moved into its own module. So to use it, you now have to import
``threadpool``.
Library Additions
-----------------
- Added module ``cpuinfo``.
- Added module ``threadpool``.
2014-04-21 Version 0.9.4 released