Merge pull request #1281 from Araq/new_spawn

New spawn
This commit is contained in:
Andreas Rumpf
2014-06-16 23:03:17 +02:00
42 changed files with 2115 additions and 192 deletions

View File

@@ -561,7 +561,7 @@ type
mFloat, mFloat32, mFloat64, mFloat128,
mBool, mChar, mString, mCstring,
mPointer, mEmptySet, mIntSetBaseType, mNil, mExpr, mStmt, mTypeDesc,
mVoidType, mPNimrodNode, mShared, mGuarded, mLock, mSpawn,
mVoidType, mPNimrodNode, mShared, mGuarded, mLock, mSpawn, mDeepCopy,
mIsMainModule, mCompileDate, mCompileTime, mNimrodVersion, mNimrodMajor,
mNimrodMinor, mNimrodPatch, mCpuEndian, mHostOS, mHostCPU, mAppType,
mNaN, mInf, mNegInf,
@@ -606,9 +606,9 @@ const
# thus cannot be overloaded (also documented in the spec!):
SpecialSemMagics* = {
mDefined, mDefinedInScope, mCompiles, mLow, mHigh, mSizeOf, mIs, mOf,
mEcho, mShallowCopy, mExpandToAst}
mEcho, mShallowCopy, mExpandToAst, mParallel, mSpawn}
type
type
PNode* = ref TNode
TNodeSeq* = seq[PNode]
PType* = ref TType
@@ -886,6 +886,8 @@ const
nkCallKinds* = {nkCall, nkInfix, nkPrefix, nkPostfix,
nkCommand, nkCallStrLit, nkHiddenCallConv}
nkIdentKinds* = {nkIdent, nkSym, nkAccQuoted, nkOpenSymChoice,
nkClosedSymChoice}
nkLiterals* = {nkCharLit..nkTripleStrLit}
nkLambdaKinds* = {nkLambda, nkDo}

View File

@@ -86,7 +86,7 @@ proc openArrayLoc(p: BProc, n: PNode): PRope =
initLocExpr(p, q[2], b)
initLocExpr(p, q[3], c)
let fmt =
case skipTypes(a.t, abstractVar).kind
case skipTypes(a.t, abstractVar+{tyPtr}).kind
of tyOpenArray, tyVarargs, tyArray, tyArrayConstr:
"($1)+($2), ($3)-($2)+1"
of tyString, tySequence:

View File

@@ -1652,7 +1652,10 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) =
of mSlurp..mQuoteAst:
localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s)
of mSpawn:
let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1])
let n = lowerings.wrapProcForSpawn(p.module.module, e, e.typ, nil, nil)
expr(p, n, d)
of mParallel:
let n = semparallel.liftParallel(p.module.module, e)
expr(p, n, d)
else: internalError(e.info, "genMagicExpr: " & $op)

View File

@@ -14,7 +14,8 @@ import
options, intsets,
nversion, nimsets, msgs, crc, bitsets, idents, lists, types, ccgutils, os,
times, ropes, math, passes, rodread, wordrecg, treetab, cgmeth,
rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings
rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings,
semparallel
when options.hasTinyCBackend:
import tccgen

View File

@@ -1,7 +1,7 @@
#
#
# The Nimrod Compiler
# (c) Copyright 2013 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
@@ -9,7 +9,8 @@
## This module implements the 'implies' relation for guards.
import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents
import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents,
saturate
const
someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc,
@@ -25,6 +26,17 @@ const
someIn = {mInRange, mInSet}
someHigh = {mHigh}
# we don't list unsigned here because wrap around semantics suck for
# proving anything:
someAdd = {mAddI, mAddI64, mAddF64, mSucc}
someSub = {mSubI, mSubI64, mSubF64, mPred}
someMul = {mMulI, mMulI64, mMulF64}
someDiv = {mDivI, mDivI64, mDivF64}
someMod = {mModI, mModI64}
someMax = {mMaxI, mMaxI64, mMaxF64}
someMin = {mMinI, mMinI64, mMinF64}
proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit}
proc isLocation(n: PNode): bool = not n.isValue
@@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool =
proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true)
proc getMagicOp(name: string, m: TMagic): PSym =
proc createMagic*(name: string, m: TMagic): PSym =
result = newSym(skProc, getIdent(name), nil, unknownLineInfo())
result.magic = m
let
opLe = getMagicOp("<=", mLeI)
opLt = getMagicOp("<", mLtI)
opAnd = getMagicOp("and", mAnd)
opOr = getMagicOp("or", mOr)
opNot = getMagicOp("not", mNot)
opIsNil = getMagicOp("isnil", mIsNil)
opContains = getMagicOp("contains", mInSet)
opEq = getMagicOp("==", mEqI)
opLe = createMagic("<=", mLeI)
opLt = createMagic("<", mLtI)
opAnd = createMagic("and", mAnd)
opOr = createMagic("or", mOr)
opNot = createMagic("not", mNot)
opIsNil = createMagic("isnil", mIsNil)
opContains = createMagic("contains", mInSet)
opEq = createMagic("==", mEqI)
opAdd = createMagic("+", mAddI)
opSub = createMagic("-", mSubI)
opMul = createMagic("*", mMulI)
opDiv = createMagic("div", mDivI)
opLen = createMagic("len", mLengthSeq)
proc swapArgs(fact: PNode, newOp: PSym): PNode =
result = newNodeI(nkCall, fact.info, 3)
@@ -137,17 +154,141 @@ proc neg(n: PNode): PNode =
result.sons[0] = newSymNode(opNot)
result.sons[1] = n
proc buildIsNil(arg: PNode): PNode =
result = newNodeI(nkCall, arg.info, 2)
result.sons[0] = newSymNode(opIsNil)
result.sons[1] = arg
proc buildCall(op: PSym; a: PNode): PNode =
result = newNodeI(nkCall, a.info, 2)
result.sons[0] = newSymNode(op)
result.sons[1] = a
proc buildCall(op: PSym; a, b: PNode): PNode =
result = newNodeI(nkInfix, a.info, 3)
result.sons[0] = newSymNode(op)
result.sons[1] = a
result.sons[2] = b
proc `|+|`(a, b: PNode): PNode =
result = copyNode(a)
if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal
else: result.floatVal = a.floatVal + b.floatVal
proc `|*|`(a, b: PNode): PNode =
result = copyNode(a)
if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal
else: result.floatVal = a.floatVal * b.floatVal
proc negate(a, b, res: PNode): PNode =
if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt):
var b = copyNode(b)
b.intVal = -b.intVal
if a.kind in {nkCharLit..nkUInt64Lit}:
b.intVal = b.intVal |+| a.intVal
result = b
else:
result = buildCall(opAdd, a, b)
elif b.kind in {nkFloatLit..nkFloat64Lit}:
var b = copyNode(b)
b.floatVal = -b.floatVal
result = buildCall(opAdd, a, b)
else:
result = res
proc zero(): PNode = nkIntLit.newIntNode(0)
proc one(): PNode = nkIntLit.newIntNode(1)
proc minusOne(): PNode = nkIntLit.newIntNode(-1)
proc lowBound*(x: PNode): PNode =
result = nkIntLit.newIntNode(firstOrd(x.typ))
result.info = x.info
proc highBound*(x: PNode): PNode =
result = if x.typ.skipTypes(abstractInst).kind == tyArray:
nkIntLit.newIntNode(lastOrd(x.typ))
else:
opAdd.buildCall(opLen.buildCall(x), minusOne())
result.info = x.info
proc reassociation(n: PNode): PNode =
result = n
# (foo+5)+5 --> foo+10; same for '*'
case result.getMagic
of someAdd:
if result[2].isValue and
result[1].getMagic in someAdd and result[1][2].isValue:
result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2])
of someMul:
if result[2].isValue and
result[1].getMagic in someMul and result[1][2].isValue:
result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2])
else: discard
proc canon*(n: PNode): PNode =
# XXX for now only the new code in 'semparallel' uses this
if n.safeLen >= 1:
result = shallowCopy(n)
for i in 0 .. < n.len:
result.sons[i] = canon(n.sons[i])
else:
result = n
case result.getMagic
of someEq, someAdd, someMul, someMin, someMax:
# these are symmetric; put value as last:
if result.sons[1].isValue and not result.sons[2].isValue:
result = swapArgs(result, result.sons[0].sym)
# (4 + foo) + 2 --> (foo + 4) + 2
of someHigh:
# high == len+(-1)
result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne())
of mUnaryMinusI, mUnaryMinusI64:
result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1))
of someSub:
# x - 4 --> x + (-4)
result = negate(result[1], result[2], result)
of someLen:
result.sons[0] = opLen.newSymNode
else: discard
result = skipConv(result)
result = reassociation(result)
# most important rule: (x-4) < a.len --> x < a.len+4
case result.getMagic
of someLe, someLt:
let x = result[1]
let y = result[2]
if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and
isLetLocation(x[1], true):
case x.getMagic
of someSub:
result = buildCall(result[0].sym, x[1],
reassociation(opAdd.buildCall(y, x[2])))
of someAdd:
# Rule A:
let plus = negate(y, x[2], nil).reassociation
if plus != nil: result = buildCall(result[0].sym, x[1], plus)
else: discard
elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and
isLetLocation(y[1], true):
# a.len < x-3
case y.getMagic
of someSub:
result = buildCall(result[0].sym, y[1],
reassociation(opAdd.buildCall(x, y[2])))
of someAdd:
let plus = negate(x, y[2], nil).reassociation
# ensure that Rule A will not trigger afterwards with the
# additional 'not isLetLocation' constraint:
if plus != nil and not isLetLocation(x, true):
result = buildCall(result[0].sym, plus, y[1])
else: discard
else: discard
proc `+@`*(a: PNode; b: BiggestInt): PNode =
canon(if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a)
proc usefulFact(n: PNode): PNode =
case n.getMagic
of someEq:
if skipConv(n.sons[2]).kind == nkNilLit and (
isLetLocation(n.sons[1], false) or isVar(n.sons[1])):
result = buildIsNil(n.sons[1])
result = opIsNil.buildCall(n.sons[1])
else:
if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true):
# XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1'
@@ -217,7 +358,7 @@ proc addFactNeg*(m: var TModel, n: PNode) =
let n = n.neg
if n != nil: addFact(m, n)
proc sameTree(a, b: PNode): bool =
proc sameTree*(a, b: PNode): bool =
result = false
if a == b:
result = true
@@ -519,7 +660,144 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication =
if result != impUnknown: return
proc impliesNotNil*(facts: TModel, arg: PNode): TImplication =
result = doesImply(facts, buildIsNil(arg).neg)
result = doesImply(facts, opIsNil.buildCall(arg).neg)
proc simpleSlice*(a, b: PNode): BiggestInt =
# returns 'c' if a..b matches (i+c)..(i+c), -1 otherwise. (i)..(i) is matched
# as if it is (i+0)..(i+0).
if guards.sameTree(a, b):
if a.getMagic in someAdd and a[2].kind in {nkCharLit..nkUInt64Lit}:
result = a[2].intVal
else:
result = 0
else:
result = -1
proc pleViaModel(model: TModel; aa, bb: PNode): TImplication
proc ple(m: TModel; a, b: PNode): TImplication =
template `<=?`(a,b): expr = ple(m,a,b) == impYes
# 0 <= 3
if a.isValue and b.isValue:
return if leValue(a, b): impYes else: impNo
# use type information too: x <= 4 iff high(x) <= 4
if b.isValue and a.typ != nil and a.typ.isOrdinalType:
if lastOrd(a.typ) <= b.intVal: return impYes
# 3 <= x iff low(x) <= 3
if a.isValue and b.typ != nil and b.typ.isOrdinalType:
if firstOrd(b.typ) <= a.intVal: return impYes
# x <= x
if sameTree(a, b): return impYes
# 0 <= x.len
if b.getMagic in someLen and a.isValue:
if a.intVal <= 0: return impYes
# x <= y+c if 0 <= c and x <= y
if b.getMagic in someAdd and zero() <=? b[2] and a <=? b[1]: return impYes
# x+c <= y if c <= 0 and x <= y
if a.getMagic in someAdd and a[2] <=? zero() and a[1] <=? b: return impYes
# x <= y*c if 1 <= c and x <= y and 0 <= y
if b.getMagic in someMul:
if a <=? b[1] and one() <=? b[2] and zero() <=? b[1]: return impYes
# x div c <= y if 1 <= c and 0 <= y and x <= y:
if a.getMagic in someDiv:
if one() <=? a[2] and zero() <=? b and a[1] <=? b: return impYes
# slightly subtle:
# x <= max(y, z) iff x <= y or x <= z
# note that 'x <= max(x, z)' is a special case of the above rule
if b.getMagic in someMax:
if a <=? b[1] or a <=? b[2]: return impYes
# min(x, y) <= z iff x <= z or y <= z
if a.getMagic in someMin:
if a[1] <=? b or a[2] <=? b: return impYes
# use the knowledge base:
return pleViaModel(m, a, b)
#return doesImply(m, opLe.buildCall(a, b))
type TReplacements = seq[tuple[a,b: PNode]]
proc replaceSubTree(n, x, by: PNode): PNode =
if sameTree(n, x):
result = by
elif hasSubTree(n, x):
result = shallowCopy(n)
for i in 0 .. safeLen(n)-1:
result.sons[i] = replaceSubTree(n.sons[i], x, by)
else:
result = n
proc applyReplacements(n: PNode; rep: TReplacements): PNode =
result = n
for x in rep: result = result.replaceSubTree(x.a, x.b)
proc pleViaModelRec(m: var TModel; a, b: PNode): TImplication =
# now check for inferrable facts: a <= b and b <= c implies a <= c
for i in 0..m.high:
let fact = m[i]
if fact != nil and fact.getMagic in someLe:
# x <= y implies a <= b if a <= x and y <= b
let x = fact[1]
let y = fact[2]
# mark as used:
m[i] = nil
if ple(m, a, x) == impYes:
if ple(m, y, b) == impYes: return impYes
#if pleViaModelRec(m, y, b): return impYes
# fact: 16 <= i
# x y
# question: i <= 15? no!
result = impliesLe(fact, a, b)
if result != impUnknown: return result
if sameTree(y, a):
result = ple(m, x, b)
if result != impUnknown: return result
proc pleViaModel(model: TModel; aa, bb: PNode): TImplication =
# compute replacements:
var replacements: TReplacements = @[]
for fact in model:
if fact != nil and fact.getMagic in someEq:
let a = fact[1]
let b = fact[2]
if a.kind == nkSym: replacements.add((a,b))
else: replacements.add((b,a))
var m: TModel
var a = aa
var b = bb
if replacements.len > 0:
m = @[]
# make the other facts consistent:
for fact in model:
if fact != nil and fact.getMagic notin someEq:
# XXX 'canon' should not be necessary here, but it is
m.add applyReplacements(fact, replacements).canon
a = applyReplacements(aa, replacements)
b = applyReplacements(bb, replacements)
else:
# we have to make a copy here, because the model will be modified:
m = model
result = pleViaModelRec(m, a, b)
proc proveLe*(m: TModel; a, b: PNode): TImplication =
let x = canon(opLe.buildCall(a, b))
#echo "ROOT ", renderTree(x[1]), " <=? ", renderTree(x[2])
result = ple(m, x[1], x[2])
if result == impUnknown:
# try an alternative: a <= b iff not (b < a) iff not (b+1 <= a):
let y = canon(opLe.buildCall(opAdd.buildCall(b, one()), a))
result = ~ple(m, y[1], y[2])
proc addFactLe*(m: var TModel; a, b: PNode) =
m.add canon(opLe.buildCall(a, b))
proc settype(n: PNode): PType =
result = newType(tySet, n.typ.owner)

View File

@@ -13,6 +13,8 @@ const
genPrefix* = ":tmp" # prefix for generated names
import ast, astalgo, types, idents, magicsys, msgs, options
from guards import createMagic
from trees import getMagic
proc newTupleAccess*(tup: PNode, i: int): PNode =
result = newNodeIT(nkBracketExpr, tup.info, tup.typ.skipTypes(
@@ -68,6 +70,7 @@ proc addField*(obj: PType; s: PSym) =
var field = newSym(skField, getIdent(s.name.s & $s.id), s.owner, s.info)
let t = skipIntLit(s.typ)
field.typ = t
assert t.kind != tyStmt
field.position = sonsLen(obj.n)
addSon(obj.n, newSymNode(field))
@@ -79,19 +82,30 @@ proc newDotExpr(obj, b: PSym): PNode =
addSon(result, newSymNode(field))
result.typ = field.typ
proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode =
proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode =
# returns a[].b as a node
var deref = newNodeI(nkHiddenDeref, info)
deref.typ = a.typ.sons[0]
assert deref.typ.kind == tyObject
let field = getSymFromList(deref.typ.n, getIdent(b.name.s & $b.id))
assert field != nil, b.name.s
deref.typ = a.typ.skipTypes(abstractInst).sons[0]
var t = deref.typ.skipTypes(abstractInst)
var field: PSym
while true:
assert t.kind == tyObject
field = getSymFromList(t.n, getIdent(b))
if field != nil: break
t = t.sons[0]
if t == nil: break
t = t.skipTypes(abstractInst)
assert field != nil, b
addSon(deref, a)
result = newNodeI(nkDotExpr, info)
addSon(result, deref)
addSon(result, newSymNode(field))
result.typ = field.typ
proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode =
# returns a[].b as a node
result = indirectAccess(a, b.name.s & $b.id, info)
proc indirectAccess*(a, b: PSym, info: TLineInfo): PNode =
result = indirectAccess(newSymNode(a), b, info)
@@ -101,6 +115,11 @@ proc genAddrOf*(n: PNode): PNode =
result.typ = newType(tyPtr, n.typ.owner)
result.typ.rawAddSon(n.typ)
proc genDeref*(n: PNode): PNode =
result = newNodeIT(nkHiddenDeref, n.info,
n.typ.skipTypes(abstractInst).sons[0])
result.add n
proc callCodegenProc*(name: string, arg1: PNode;
arg2, arg3: PNode = nil): PNode =
result = newNodeI(nkCall, arg1.info)
@@ -112,13 +131,120 @@ proc callCodegenProc*(name: string, arg1: PNode;
result.add arg1
if arg2 != nil: result.add arg2
if arg3 != nil: result.add arg3
result.typ = sym.typ.sons[0]
proc callProc(a: PNode): PNode =
result = newNodeI(nkCall, a.info)
result.add a
result.typ = a.typ.sons[0]
# we have 4 cases to consider:
# - a void proc --> nothing to do
# - a proc returning GC'ed memory --> requires a flowVar
# - a proc returning non GC'ed memory --> pass as hidden 'var' parameter
# - not in a parallel environment --> requires a flowVar for memory safety
type
TSpawnResult = enum
srVoid, srFlowVar, srByVar
TFlowVarKind = enum
fvInvalid # invalid type T for 'FlowVar[T]'
fvGC # FlowVar of a GC'ed type
fvBlob # FlowVar of a blob type
proc spawnResult(t: PType; inParallel: bool): TSpawnResult =
if t.isEmptyType: srVoid
elif inParallel and not containsGarbageCollectedRef(t): srByVar
else: srFlowVar
proc flowVarKind(t: PType): TFlowVarKind =
if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: fvGC
elif containsGarbageCollectedRef(t): fvInvalid
else: fvBlob
proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym =
result = newSym(skTemp, getIdent(genPrefix), owner, varSection.info)
result.typ = typ
incl(result.flags, sfFromGeneric)
var vpart = newNodeI(nkIdentDefs, varSection.info, 3)
vpart.sons[0] = newSymNode(result)
vpart.sons[1] = ast.emptyNode
vpart.sons[2] = v
varSection.add vpart
discard """
We generate roughly this:
proc f_wrapper(thread, args) =
barrierEnter(args.barrier) # for parallel statement
var a = args.a # thread transfer; deepCopy or shallowCopy or no copy
# depending on whether we're in a 'parallel' statement
var b = args.b
var fv = args.fv
fv.owner = thread # optional
nimArgsPassingDone() # signal parent that the work is done
#
args.fv.blob = f(a, b, ...)
nimFlowVarSignal(args.fv)
# - or -
f(a, b, ...)
barrierLeave(args.barrier) # for parallel statement
stmtList:
var scratchObj
scratchObj.a = a
scratchObj.b = b
nimSpawn(f_wrapper, addr scratchObj)
scratchObj.fv # optional
"""
proc createWrapperProc(f: PNode; threadParam, argsParam: PSym;
varSection, call: PNode): PSym =
varSection, call, barrier, fv: PNode;
spawnKind: TSpawnResult): PSym =
var body = newNodeI(nkStmtList, f.info)
var threadLocalBarrier: PSym
if barrier != nil:
var varSection = newNodeI(nkVarSection, barrier.info)
threadLocalBarrier = addLocalVar(varSection, argsParam.owner,
barrier.typ, barrier)
body.add varSection
body.add callCodeGenProc("barrierEnter", threadLocalBarrier.newSymNode)
var threadLocalProm: PSym
if spawnKind == srByVar:
threadLocalProm = addLocalVar(varSection, argsParam.owner, fv.typ, fv)
elif fv != nil:
internalAssert fv.typ.kind == tyGenericInst
threadLocalProm = addLocalVar(varSection, argsParam.owner, fv.typ, fv)
body.add varSection
body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam))
body.add call
if fv != nil and spawnKind != srByVar:
# generate:
# fv.owner = threadParam
body.add newAsgnStmt(indirectAccess(threadLocalProm.newSymNode,
"owner", fv.info), threadParam.newSymNode)
body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode)
if spawnKind == srByVar:
body.add newAsgnStmt(genDeref(threadLocalProm.newSymNode), call)
elif fv != nil:
let fk = fv.typ.sons[1].flowVarKind
if fk == fvInvalid:
localError(f.info, "cannot create a flowVar of type: " &
typeToString(fv.typ.sons[1]))
body.add newAsgnStmt(indirectAccess(threadLocalProm.newSymNode,
if fk == fvGC: "data" else: "blob", fv.info), call)
if barrier == nil:
# by now 'fv' is shared and thus might have beeen overwritten! we need
# to use the thread-local view instead:
body.add callCodeGenProc("nimFlowVarSignal", threadLocalProm.newSymNode)
else:
body.add call
if barrier != nil:
body.add callCodeGenProc("barrierLeave", threadLocalBarrier.newSymNode)
var params = newNodeI(nkFormalParams, f.info)
params.add emptyNode
@@ -146,10 +272,152 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode =
result.typ = newType(tyPtr, objType.owner)
result.typ.rawAddSon(objType)
proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
result = newNodeI(nkStmtList, n.info)
if n.kind notin nkCallKinds or not n.typ.isEmptyType:
localError(n.info, "'spawn' takes a call expression of type void")
proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym,
castExpr, call, varSection, result: PNode) =
let formals = n[0].typ.n
let tmpName = getIdent(genPrefix)
for i in 1 .. <n.len:
# we pick n's type here, which hopefully is 'tyArray' and not
# 'tyOpenArray':
var argType = n[i].typ.skipTypes(abstractInst)
if i < formals.len and formals[i].typ.kind == tyVar:
localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter")
elif containsTyRef(argType):
localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure")
let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
var field = newSym(skField, fieldname, objType.owner, n.info)
field.typ = argType
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i])
let temp = addLocalVar(varSection, objType.owner, argType,
indirectAccess(castExpr, field, n.info))
call.add(newSymNode(temp))
proc getRoot*(n: PNode): PSym =
## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression
## like ``obj.x[i].y``. The *root* of a path is the symbol that can be
## determined as the owner; ``obj`` in the example.
case n.kind
of nkSym:
if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}:
result = n.sym
of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr,
nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr:
result = getRoot(n.sons[0])
of nkHiddenStdConv, nkHiddenSubConv, nkConv:
result = getRoot(n.sons[1])
of nkCallKinds:
if getMagic(n) == mSlice: result = getRoot(n.sons[1])
else: discard
proc newIntLit(value: BiggestInt): PNode =
result = nkIntLit.newIntNode(value)
result.typ = getSysType(tyInt)
proc genHigh(n: PNode): PNode =
if skipTypes(n.typ, abstractVar).kind in {tyArrayConstr, tyArray}:
result = newIntLit(lastOrd(skipTypes(n.typ, abstractVar)))
else:
result = newNodeI(nkCall, n.info, 2)
result.typ = getSysType(tyInt)
result.sons[0] = newSymNode(createMagic("high", mHigh))
result.sons[1] = n
proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym;
castExpr, call, varSection, result: PNode) =
let formals = n[0].typ.n
let tmpName = getIdent(genPrefix)
# we need to copy the foreign scratch object fields into local variables
# for correctness: These are called 'threadLocal' here.
for i in 1 .. <n.len:
let n = n[i]
let argType = skipTypes(if i < formals.len: formals[i].typ else: n.typ,
abstractInst)
if containsTyRef(argType):
localError(n.info, "'spawn'ed function cannot refer to 'ref'/closure")
let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
var field = newSym(skField, fieldname, objType.owner, n.info)
if argType.kind in {tyVarargs, tyOpenArray}:
# important special case: we always create a zero-copy slice:
let slice = newNodeI(nkCall, n.info, 4)
slice.typ = n.typ
slice.sons[0] = newSymNode(createMagic("slice", mSlice))
var fieldB = newSym(skField, tmpName, objType.owner, n.info)
fieldB.typ = getSysType(tyInt)
objType.addField(fieldB)
if getMagic(n) == mSlice:
let a = genAddrOf(n[1])
field.typ = a.typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
var fieldA = newSym(skField, tmpName, objType.owner, n.info)
fieldA.typ = getSysType(tyInt)
objType.addField(fieldA)
result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldA), n[2])
result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), n[3])
let threadLocal = addLocalVar(varSection, objType.owner, fieldA.typ,
indirectAccess(castExpr, fieldA, n.info))
slice.sons[2] = threadLocal.newSymNode
else:
let a = genAddrOf(n)
field.typ = a.typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), genHigh(n))
slice.sons[2] = newIntLit(0)
# the array itself does not need to go through a thread local variable:
slice.sons[1] = genDeref(indirectAccess(castExpr, field, n.info))
let threadLocal = addLocalVar(varSection, objType.owner, fieldB.typ,
indirectAccess(castExpr, fieldB, n.info))
slice.sons[3] = threadLocal.newSymNode
call.add slice
elif (let size = computeSize(argType); size < 0 or size > 16) and
n.getRoot != nil:
# it is more efficient to pass a pointer instead:
let a = genAddrOf(n)
field.typ = a.typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
let threadLocal = addLocalVar(varSection, objType.owner, field.typ,
indirectAccess(castExpr, field, n.info))
call.add(genDeref(threadLocal.newSymNode))
else:
# boring case
field.typ = argType
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n)
let threadLocal = addLocalVar(varSection, objType.owner, field.typ,
indirectAccess(castExpr, field, n.info))
call.add(threadLocal.newSymNode)
proc wrapProcForSpawn*(owner: PSym; spawnExpr: PNode; retType: PType;
barrier, dest: PNode = nil): PNode =
# if 'barrier' != nil, then it is in a 'parallel' section and we
# generate quite different code
let n = spawnExpr[1]
let spawnKind = spawnResult(retType, barrier!=nil)
case spawnKind
of srVoid:
internalAssert dest == nil
result = newNodeI(nkStmtList, n.info)
of srFlowVar:
internalAssert dest == nil
result = newNodeIT(nkStmtListExpr, n.info, retType)
of srByVar:
if dest == nil: localError(n.info, "'spawn' must not be discarded")
result = newNodeI(nkStmtList, n.info)
if n.kind notin nkCallKinds:
localError(n.info, "'spawn' takes a call expression")
return
if optThreadAnalysis in gGlobalOptions:
if {tfThread, tfNoSideEffect} * n[0].typ.flags == {}:
@@ -162,6 +430,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
threadParam.typ = ptrType
argsParam.typ = ptrType
argsParam.position = 1
var objType = createObj(owner, n.info)
incl(objType.flags, tfFinal)
let castExpr = createCastExpr(argsParam, objType)
@@ -174,7 +443,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
varSectionB.addVar(scratchObj.newSymNode)
result.add varSectionB
var call = newNodeI(nkCall, n.info)
var call = newNodeIT(nkCall, n.info, n.typ)
var fn = n.sons[0]
# templates and macros are in fact valid here due to the nature of
# the transformation:
@@ -194,35 +463,44 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
call.add(fn)
var varSection = newNodeI(nkVarSection, n.info)
let formals = n[0].typ.n
let tmpName = getIdent(genPrefix)
for i in 1 .. <n.len:
# we pick n's type here, which hopefully is 'tyArray' and not
# 'tyOpenArray':
var argType = n[i].typ.skipTypes(abstractInst)
if i < formals.len and formals[i].typ.kind == tyVar:
localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter")
elif containsTyRef(argType):
localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure")
if barrier.isNil:
setupArgsForConcurrency(n, objType, scratchObj, castExpr, call, varSection, result)
else:
setupArgsForParallelism(n, objType, scratchObj, castExpr, call, varSection, result)
let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
var field = newSym(skField, fieldname, owner, n.info)
field.typ = argType
var barrierAsExpr: PNode = nil
if barrier != nil:
let typ = newType(tyPtr, owner)
typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ)
var field = newSym(skField, getIdent"barrier", owner, n.info)
field.typ = typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i])
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier)
barrierAsExpr = indirectAccess(castExpr, field, n.info)
var temp = newSym(skTemp, tmpName, owner, n.info)
temp.typ = argType
incl(temp.flags, sfFromGeneric)
var fvField, fvAsExpr: PNode = nil
if spawnKind == srFlowVar:
var field = newSym(skField, getIdent"fv", owner, n.info)
field.typ = retType
objType.addField(field)
fvField = newDotExpr(scratchObj, field)
fvAsExpr = indirectAccess(castExpr, field, n.info)
# create flowVar:
result.add newFastAsgnStmt(fvField, callProc(spawnExpr[2]))
if barrier == nil:
result.add callCodeGenProc("nimFlowVarCreateCondVar", fvField)
var vpart = newNodeI(nkIdentDefs, n.info, 3)
vpart.sons[0] = newSymNode(temp)
vpart.sons[1] = ast.emptyNode
vpart.sons[2] = indirectAccess(castExpr, field, n.info)
varSection.add vpart
elif spawnKind == srByVar:
var field = newSym(skField, getIdent"fv", owner, n.info)
field.typ = newType(tyPtr, objType.owner)
field.typ.rawAddSon(retType)
objType.addField(field)
fvAsExpr = indirectAccess(castExpr, field, n.info)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), genAddrOf(dest))
call.add(newSymNode(temp))
let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call)
let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call,
barrierAsExpr, fvAsExpr, spawnKind)
result.add callCodeGenProc("nimSpawn", wrapper.newSymNode,
genAddrOf(scratchObj.newSymNode))
if spawnKind == srFlowVar: result.add fvField

View File

@@ -644,12 +644,13 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: int,
incl(sym.flags, sfNoReturn)
of wDynlib:
processDynLib(c, it, sym)
of wCompilerproc:
of wCompilerproc:
noVal(it) # compilerproc may not get a string!
makeExternExport(sym, "$1", it.info)
incl(sym.flags, sfCompilerProc)
incl(sym.flags, sfUsed) # suppress all those stupid warnings
registerCompilerProc(sym)
if sfFromGeneric notin sym.flags:
makeExternExport(sym, "$1", it.info)
incl(sym.flags, sfCompilerProc)
incl(sym.flags, sfUsed) # suppress all those stupid warnings
registerCompilerProc(sym)
of wProcVar:
noVal(it)
incl(sym.flags, sfProcvar)

View File

@@ -15,7 +15,8 @@ import
magicsys, parser, nversion, nimsets, semfold, importer,
procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch,
intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting,
evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity
evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity,
semparallel
# implementation

View File

@@ -91,6 +91,7 @@ type
generics*: seq[TInstantiationPair] # pending list of instantiated generics to compile
lastGenericIdx*: int # used for the generics stack
hloLoopDetector*: int # used to prevent endless loops in the HLO
inParallelStmt*: int
proc makeInstPair*(s: PSym, inst: PInstantiation): TInstantiationPair =
result.genericSym = s

View File

@@ -1394,11 +1394,6 @@ proc semDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PNode =
result.info = n.info
result.typ = getSysType(tyBool)
proc setMs(n: PNode, s: PSym): PNode =
result = n
n.sons[0] = newSymNode(s)
n.sons[0].info = n.info
proc expectMacroOrTemplateCall(c: PContext, n: PNode): PSym =
## The argument to the proc should be nkCall(...) or similar
## Returns the macro/template symbol
@@ -1590,6 +1585,27 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode =
else:
result = semDirectOp(c, n, flags)
proc createFlowVar(c: PContext; t: PType; info: TLineInfo): PType =
result = newType(tyGenericInvokation, c.module)
addSonSkipIntLit(result, magicsys.getCompilerProc("FlowVar").typ)
addSonSkipIntLit(result, t)
result = instGenericContainer(c, info, result, allowMetaTypes = false)
proc instantiateCreateFlowVarCall(c: PContext; t: PType;
info: TLineInfo): PSym =
let sym = magicsys.getCompilerProc("nimCreateFlowVar")
if sym == nil:
localError(info, errSystemNeeds, "nimCreateFlowVar")
var bindings: TIdTable
initIdTable(bindings)
bindings.idTablePut(sym.ast[genericParamsPos].sons[0].typ, t)
result = c.semGenerateInstance(c, sym, bindings, info)
proc setMs(n: PNode, s: PSym): PNode =
result = n
n.sons[0] = newSymNode(s)
n.sons[0].info = n.info
proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode =
# this is a hotspot in the compiler!
# DON'T forget to update ast.SpecialSemMagics if you add a magic here!
@@ -1611,6 +1627,22 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode =
checkSonsLen(n, 2)
result = newStrNodeT(renderTree(n[1], {renderNoComments}), n)
result.typ = getSysType(tyString)
of mParallel:
result = setMs(n, s)
var x = n.lastSon
if x.kind == nkDo: x = x.sons[bodyPos]
inc c.inParallelStmt
result.sons[1] = semStmt(c, x)
dec c.inParallelStmt
of mSpawn:
result = setMs(n, s)
result.sons[1] = semExpr(c, n.sons[1])
if not result[1].typ.isEmptyType:
if c.inParallelStmt > 0:
result.typ = result[1].typ
else:
result.typ = createFlowVar(c, result[1].typ, n.info)
result.add instantiateCreateFlowVarCall(c, result[1].typ, n.info).newSymNode
else: result = semDirectOp(c, n, flags)
proc semWhen(c: PContext, n: PNode, semCheck = true): PNode =

View File

@@ -1,7 +1,7 @@
#
#
# The Nimrod Compiler
# (c) Copyright 2013 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
@@ -131,4 +131,3 @@ proc magicsAfterOverloadResolution(c: PContext, n: PNode,
of mNBindSym: result = semBindSym(c, n)
of mLocals: result = semLocals(c, n)
else: result = n

465
compiler/semparallel.nim Normal file
View File

@@ -0,0 +1,465 @@
#
#
# The Nimrod Compiler
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Semantic checking for 'parallel'.
# - codegen needs to support mSlice (+)
# - lowerings must not perform unnecessary copies (+)
# - slices should become "nocopy" to openArray (+)
# - need to perform bound checks (+)
#
# - parallel needs to insert a barrier (+)
# - passed arguments need to be ensured to be "const"
# - what about 'f(a)'? --> f shouldn't have side effects anyway
# - passed arrays need to be ensured not to alias
# - passed slices need to be ensured to be disjoint (+)
# - output slices need special logic (+)
import
ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs,
renderer
from trees import getMagic
from strutils import `%`
discard """
one major problem:
spawn f(a[i])
inc i
spawn f(a[i])
is valid, but
spawn f(a[i])
spawn f(a[i])
inc i
is not! However,
spawn f(a[i])
if guard: inc i
spawn f(a[i])
is not valid either! --> We need a flow dependent analysis here.
However:
while foo:
spawn f(a[i])
inc i
spawn f(a[i])
Is not valid either! --> We should really restrict 'inc' to loop endings?
The heuristic that we implement here (that has no false positives) is: Usage
of 'i' in a slice *after* we determined the stride is invalid!
"""
type
TDirection = enum
ascending, descending
MonotonicVar = object
v, alias: PSym # to support the ordinary 'countup' iterator
# we need to detect aliases
lower, upper, stride: PNode
dir: TDirection
blacklisted: bool # blacklisted variables that are not monotonic
AnalysisCtx = object
locals: seq[MonotonicVar]
slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]]
guards: TModel # nested guards
args: seq[PSym] # args must be deeply immutable
spawns: int # we can check that at last 1 spawn is used in
# the 'parallel' section
currentSpawnId: int
inLoop: int
let opSlice = createMagic("slice", mSlice)
proc initAnalysisCtx(): AnalysisCtx =
result.locals = @[]
result.slices = @[]
result.args = @[]
result.guards = @[]
proc lookupSlot(c: AnalysisCtx; s: PSym): int =
for i in 0.. <c.locals.len:
if c.locals[i].v == s or c.locals[i].alias == s: return i
return -1
proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar =
let s = lookupSlot(c, v)
if s >= 0: return addr(c.locals[s])
let L = c.locals.len
c.locals.setLen(L+1)
c.locals[L].v = v
return addr(c.locals[L])
proc gatherArgs(c: var AnalysisCtx; n: PNode) =
for i in 0.. <n.safeLen:
let root = getRoot n[i]
if root != nil:
block addRoot:
for r in items(c.args):
if r == root: break addRoot
c.args.add root
gatherArgs(c, n[i])
proc isSingleAssignable(n: PNode): bool =
n.kind == nkSym and (let s = n.sym;
s.kind in {skTemp, skForVar, skLet} and
{sfAddrTaken, sfGlobal} * s.flags == {})
proc isLocal(n: PNode): bool =
n.kind == nkSym and (let s = n.sym;
s.kind in {skResult, skTemp, skForVar, skVar, skLet} and
{sfAddrTaken, sfGlobal} * s.flags == {})
proc checkLocal(c: AnalysisCtx; n: PNode) =
if isLocal(n):
let s = c.lookupSlot(n.sym)
if s >= 0 and c.locals[s].stride != nil:
localError(n.info, "invalid usage of counter after increment")
else:
for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i])
template `?`(x): expr = x.renderTree
proc checkLe(c: AnalysisCtx; a, b: PNode) =
case proveLe(c.guards, a, b)
of impUnknown:
localError(a.info, "cannot prove: " & ?a & " <= " & ?b)
of impYes: discard
of impNo:
localError(a.info, "can prove: " & ?a & " > " & ?b)
proc checkBounds(c: AnalysisCtx; arr, idx: PNode) =
checkLe(c, arr.lowBound, idx)
checkLe(c, idx, arr.highBound)
proc addLowerBoundAsFacts(c: var AnalysisCtx) =
for v in c.locals:
if not v.blacklisted:
c.guards.addFactLe(v.lower, newSymNode(v.v))
proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) =
checkLocal(c, n)
let le = le.canon
let ri = ri.canon
# perform static bounds checking here; and not later!
let oldState = c.guards.len
addLowerBoundAsFacts(c)
c.checkBounds(x, le)
c.checkBounds(x, ri)
c.guards.setLen(oldState)
c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0))
proc overlap(m: TModel; x,y,c,d: PNode) =
# X..Y and C..D overlap iff (X <= D and C <= Y)
case proveLe(m, x, d)
of impUnknown:
localError(x.info,
"cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" %
[?x, ?d, ?x, ?y, ?c, ?d])
of impYes:
case proveLe(m, c, y)
of impUnknown:
localError(x.info,
"cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" %
[?c, ?y, ?x, ?y, ?c, ?d])
of impYes:
localError(x.info, "($#)..($#) not disjoint from ($#)..($#)" % [?x, ?y, ?c, ?d])
of impNo: discard
of impNo: discard
proc stride(c: AnalysisCtx; n: PNode): BiggestInt =
if isLocal(n):
let s = c.lookupSlot(n.sym)
if s >= 0 and c.locals[s].stride != nil:
result = c.locals[s].stride.intVal
else:
for i in 0 .. <n.safeLen: result += stride(c, n.sons[i])
proc subStride(c: AnalysisCtx; n: PNode): PNode =
# substitute with stride:
if isLocal(n):
let s = c.lookupSlot(n.sym)
if s >= 0 and c.locals[s].stride != nil:
result = n +@ c.locals[s].stride.intVal
else:
result = n
elif n.safeLen > 0:
result = shallowCopy(n)
for i in 0 .. <n.len: result.sons[i] = subStride(c, n.sons[i])
else:
result = n
proc checkSlicesAreDisjoint(c: var AnalysisCtx) =
# this is the only thing that we need to perform after we have traversed
# the whole tree so that the strides are available.
# First we need to add all the computed lower bounds:
addLowerBoundAsFacts(c)
# Every slice used in a loop needs to be disjoint with itself:
for x,a,b,id,inLoop in items(c.slices):
if inLoop: overlap(c.guards, a,b, c.subStride(a), c.subStride(b))
# Another tricky example is:
# while true:
# spawn f(a[i])
# spawn f(a[i+1])
# inc i # inc i, 2 would be correct here
#
# Or even worse:
# while true:
# spawn f(a[i+1 .. i+3])
# spawn f(a[i+4 .. i+5])
# inc i, 4
# Prove that i*k*stride + 3 != i*k'*stride + 5
# For the correct example this amounts to
# i*k*2 != i*k'*2 + 1
# which is true.
# For now, we don't try to prove things like that at all, even though it'd
# be feasible for many useful examples. Instead we attach the slice to
# a spawn and if the attached spawns differ, we bail out:
for i in 0 .. high(c.slices):
for j in i+1 .. high(c.slices):
let x = c.slices[i]
let y = c.slices[j]
if x.spawnId != y.spawnId and guards.sameTree(x.x, y.x):
if not x.inLoop or not y.inLoop:
# XXX strictly speaking, 'or' is not correct here and it needs to
# be 'and'. However this prevents too many obviously correct programs
# like f(a[0..x]); for i in x+1 .. a.high: f(a[i])
overlap(c.guards, x.a, x.b, y.a, y.b)
elif (let k = simpleSlice(x.a, x.b); let m = simpleSlice(y.a, y.b);
k >= 0 and m >= 0):
# ah I cannot resist the temptation and add another sweet heuristic:
# if both slices have the form (i+k)..(i+k) and (i+m)..(i+m) we
# check they are disjoint and k < stride and m < stride:
overlap(c.guards, x.a, x.b, y.a, y.b)
let stride = min(c.stride(x.a), c.stride(y.a))
if k < stride and m < stride:
discard
else:
localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" %
[?x.a, ?x.b, ?y.a, ?y.b])
else:
localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" %
[?x.a, ?x.b, ?y.a, ?y.b])
proc analyse(c: var AnalysisCtx; n: PNode)
proc analyseSons(c: var AnalysisCtx; n: PNode) =
for i in 0 .. <safeLen(n): analyse(c, n[i])
proc min(a, b: PNode): PNode =
if a.isNil: result = b
elif a.intVal < b.intVal: result = a
else: result = b
proc fromSystem(op: PSym): bool = sfSystemModule in getModule(op).flags
proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) =
if op.magic == mSpawn:
inc c.spawns
let oldSpawnId = c.currentSpawnId
c.currentSpawnId = c.spawns
gatherArgs(c, n[1])
analyseSons(c, n)
c.currentSpawnId = oldSpawnId
elif op.magic == mInc or (op.name.s == "+=" and op.fromSystem):
if n[1].isLocal:
let incr = n[2].skipConv
if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0:
let slot = c.getSlot(n[1].sym)
slot.stride = min(slot.stride, incr)
analyseSons(c, n)
elif op.name.s == "[]" and op.fromSystem:
c.addSlice(n, n[1], n[2][1], n[2][2])
analyseSons(c, n)
elif op.name.s == "[]=" and op.fromSystem:
c.addSlice(n, n[1], n[2][1], n[2][2])
analyseSons(c, n)
else:
analyseSons(c, n)
proc analyseCase(c: var AnalysisCtx; n: PNode) =
analyse(c, n.sons[0])
let oldFacts = c.guards.len
for i in 1.. <n.len:
let branch = n.sons[i]
setLen(c.guards, oldFacts)
addCaseBranchFacts(c.guards, n, i)
for i in 0 .. <branch.len:
analyse(c, branch.sons[i])
setLen(c.guards, oldFacts)
proc analyseIf(c: var AnalysisCtx; n: PNode) =
analyse(c, n.sons[0].sons[0])
let oldFacts = c.guards.len
addFact(c.guards, canon(n.sons[0].sons[0]))
analyse(c, n.sons[0].sons[1])
for i in 1.. <n.len:
let branch = n.sons[i]
setLen(c.guards, oldFacts)
for j in 0..i-1:
addFactNeg(c.guards, canon(n.sons[j].sons[0]))
if branch.len > 1:
addFact(c.guards, canon(branch.sons[0]))
for i in 0 .. <branch.len:
analyse(c, branch.sons[i])
setLen(c.guards, oldFacts)
proc analyse(c: var AnalysisCtx; n: PNode) =
case n.kind
of nkAsgn, nkFastAsgn:
if n[0].isSingleAssignable and n[1].isLocal:
let slot = c.getSlot(n[1].sym)
slot.alias = n[0].sym
elif n[0].isLocal:
# since we already ensure sfAddrTaken is not in s.flags, we only need to
# prevent direct assignments to the monotonic variable:
let slot = c.getSlot(n[0].sym)
slot.blackListed = true
invalidateFacts(c.guards, n[0])
analyseSons(c, n)
addAsgnFact(c.guards, n[0], n[1])
of nkCallKinds:
# direct call:
if n[0].kind == nkSym: analyseCall(c, n, n[0].sym)
else: analyseSons(c, n)
of nkBracketExpr:
c.addSlice(n, n[0], n[1], n[1])
analyseSons(c, n)
of nkReturnStmt, nkRaiseStmt, nkTryStmt:
localError(n.info, "invalid control flow for 'parallel'")
# 'break' that leaves the 'parallel' section is not valid either
# or maybe we should generate a 'try' XXX
of nkVarSection:
for it in n:
let value = it.lastSon
if value.kind != nkEmpty:
for j in 0 .. it.len-3:
if it[j].isLocal:
let slot = c.getSlot(it[j].sym)
if slot.lower.isNil: slot.lower = value
else: internalError(it.info, "slot already has a lower bound")
analyse(c, value)
of nkCaseStmt: analyseCase(c, n)
of nkIfStmt, nkIfExpr: analyseIf(c, n)
of nkWhileStmt:
analyse(c, n.sons[0])
# 'while true' loop?
inc c.inLoop
if isTrue(n.sons[0]):
analyseSons(c, n.sons[1])
else:
# loop may never execute:
let oldState = c.locals.len
let oldFacts = c.guards.len
addFact(c.guards, canon(n.sons[0]))
analyse(c, n.sons[1])
setLen(c.locals, oldState)
setLen(c.guards, oldFacts)
# we know after the loop the negation holds:
if not hasSubnodeWith(n.sons[1], nkBreakStmt):
addFactNeg(c.guards, canon(n.sons[0]))
dec c.inLoop
of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef,
nkMacroDef, nkTemplateDef, nkConstSection, nkPragma:
discard
else:
analyseSons(c, n)
proc transformSlices(n: PNode): PNode =
if n.kind in nkCallKinds and n[0].kind == nkSym:
let op = n[0].sym
if op.name.s == "[]" and op.fromSystem:
result = copyNode(n)
result.add opSlice.newSymNode
result.add n[1]
result.add n[2][1]
result.add n[2][2]
return result
if n.safeLen > 0:
result = shallowCopy(n)
for i in 0 .. < n.len:
result.sons[i] = transformSlices(n.sons[i])
else:
result = n
proc transformSpawn(owner: PSym; n, barrier: PNode): PNode
proc transformSpawnSons(owner: PSym; n, barrier: PNode): PNode =
result = shallowCopy(n)
for i in 0 .. < n.len:
result.sons[i] = transformSpawn(owner, n.sons[i], barrier)
proc transformSpawn(owner: PSym; n, barrier: PNode): PNode =
case n.kind
of nkVarSection:
result = nil
for it in n:
let b = it.lastSon
if getMagic(b) == mSpawn:
if it.len != 3: localError(it.info, "invalid context for 'spawn'")
let m = transformSlices(b)
if result.isNil:
result = newNodeI(nkStmtList, n.info)
result.add n
result.add wrapProcForSpawn(owner, m, b.typ, barrier, it[0])
it.sons[it.len-1] = emptyNode
if result.isNil: result = n
of nkAsgn, nkFastAsgn:
let b = n[1]
if getMagic(b) == mSpawn:
let m = transformSlices(b)
return wrapProcForSpawn(owner, m, b.typ, barrier, n[0])
result = transformSpawnSons(owner, n, barrier)
of nkCallKinds:
if getMagic(n) == mSpawn:
result = transformSlices(n)
return wrapProcForSpawn(owner, result, n.typ, barrier, nil)
result = transformSpawnSons(owner, n, barrier)
elif n.safeLen > 0:
result = transformSpawnSons(owner, n, barrier)
else:
result = n
proc checkArgs(a: var AnalysisCtx; n: PNode) =
discard "too implement"
proc generateAliasChecks(a: AnalysisCtx; result: PNode) =
discard "too implement"
proc liftParallel*(owner: PSym; n: PNode): PNode =
# this needs to be called after the 'for' loop elimination
# first pass:
# - detect monotonic local integer variables
# - detect used slices
# - detect used arguments
#echo "PAR ", renderTree(n)
var a = initAnalysisCtx()
let body = n.lastSon
analyse(a, body)
if a.spawns == 0:
localError(n.info, "'parallel' section without 'spawn'")
checkSlicesAreDisjoint(a)
checkArgs(a, body)
var varSection = newNodeI(nkVarSection, n.info)
var temp = newSym(skTemp, getIdent"barrier", owner, n.info)
temp.typ = magicsys.getCompilerProc("Barrier").typ
incl(temp.flags, sfFromGeneric)
let tempNode = newSymNode(temp)
varSection.addVar tempNode
let barrier = genAddrOf(tempNode)
result = newNodeI(nkStmtList, n.info)
generateAliasChecks(a, result)
result.add varSection
result.add callCodeGenProc("openBarrier", barrier)
result.add transformSpawn(owner, body, barrier)
result.add callCodeGenProc("closeBarrier", barrier)

View File

@@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) =
if n.kind != nkSym: return
let s = n.sym
if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}:
# 'x' is not nil, but that doesn't mean it's not nil children
# 'x' is not nil, but that doesn't mean its "not nil" children
# are initialized:
initVar(a, n)
@@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) =
else:
track(tracked, n)
proc isTrue(n: PNode): bool =
proc isTrue*(n: PNode): bool =
n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
n.kind == nkIntLit and n.intVal != 0

View File

@@ -1085,8 +1085,10 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType =
of nkCallKinds:
if isRange(n):
result = semRangeAux(c, n, prev)
elif n[0].kind == nkIdent:
let op = n.sons[0].ident
elif n[0].kind notin nkIdentKinds:
result = semTypeExpr(c, n)
else:
let op = considerQuotedIdent(n.sons[0])
if op.id in {ord(wAnd), ord(wOr)} or op.s == "|":
checkSonsLen(n, 3)
var
@@ -1121,8 +1123,6 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType =
result = semAnyRef(c, n, tyRef, prev)
else:
result = semTypeExpr(c, n)
else:
result = semTypeExpr(c, n)
of nkWhenStmt:
var whenResult = semWhen(c, n, false)
if whenResult.kind == nkStmtList: whenResult.kind = nkStmtListType

View File

@@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) =
nfAllConst in x.node.flags:
# XXX this is hacky; tests/txmlgen triggers it:
x.node = newNode(nkStrLit)
# debug x.node
#assert x.node.kind in {nkStrLit..nkTripleStrLit}
# It not only hackey, it is also wrong for tgentemplate. The primary
# cause of bugs like these is that the VM does not properly distinguish
# between variable defintions (var foo = e) and variable updates (foo = e).
template createStr(x) =
x.node = newNode(nkStrLit)

View File

@@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc"
path="$lib/core"
path="$lib/pure"
path="$lib/pure/collections"
path="$lib/pure/concurrency"
path="$lib/impure"
path="$lib/wrappers"
# path="$lib/wrappers/cairo"

View File

@@ -2823,7 +2823,7 @@ The following builtin procs cannot be overloaded for reasons of implementation
simplicity (they require specialized semantic checking)::
defined, definedInScope, compiles, low, high, sizeOf,
is, of, echo, shallowCopy, getAst
is, of, echo, shallowCopy, getAst, spawn
Thus they act more like keywords than like ordinary identifiers; unlike a
keyword however, a redefinition may `shadow`:idx: the definition in

98
doc/spawn.txt Normal file
View File

@@ -0,0 +1,98 @@
==========================================================
Parallel & Spawn
==========================================================
Nimrod has two flavors of parallelism:
1) `Structured`:idx parallelism via the ``parallel`` statement.
2) `Unstructured`:idx: parallelism via the standalone ``spawn`` statement.
Both need the `threadpool <threadpool.html>`_ module to work.
Somewhat confusingly, ``spawn`` is also used in the ``parallel`` statement
with slightly different semantics. ``spawn`` always takes a call expression of
the form ``f(a, ...)``. Let ``T`` be ``f``'s return type. If ``T`` is ``void``
then ``spawn``'s return type is also ``void``. Within a ``parallel`` section
``spawn``'s return type is ``T``, otherwise it is ``FlowVar[T]``.
The compiler can ensure the location in ``location = spawn f(...)`` is not
read prematurely within a ``parallel`` section and so there is no need for
the overhead of an indirection via ``FlowVar[T]`` to ensure correctness.
Parallel statement
==================
Example:
.. code-block:: nimrod
# Compute PI in an inefficient way
import strutils, math, threadpool
proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1)
proc pi(n: int): float =
var ch = newSeq[float](n+1)
parallel:
for k in 0..ch.high:
ch[k] = spawn term(float(k))
for k in 0..ch.high:
result += ch[k]
echo formatFloat(pi(5000))
The parallel statement is the preferred mechanism to introduce parallelism
in a Nimrod program. A subset of the Nimrod language is valid within a
``parallel`` section. This subset is checked to be free of data races at
compile time. A sophisticated `disjoint checker`:idx: ensures that no data
races are possible even though shared memory is extensively supported!
The subset is in fact the full language with the following
restrictions / changes:
* ``spawn`` within a ``parallel`` section has special semantics.
* Every location of the form ``a[i]`` and ``a[i..j]`` and ``dest`` where
``dest`` is part of the pattern ``dest = spawn f(...)`` has to be
provable disjoint. This is called the *disjoint check*.
* Every other complex location ``loc`` that is used in a spawned
proc (``spawn f(loc)``) has to be immutable for the duration of
the ``parallel`` section. This is called the *immutability check*. Currently
it is not specified what exactly "complex location" means. We need to make
this an optimization!
* Every array access has to be provable within bounds. This is called
the *bounds check*.
* Slices are optimized so that no copy is performed. This optimization is not
yet performed for ordinary slices outside of a ``parallel`` section. Slices
are also special in that they currently do not support negative indexes!
Spawn statement
===============
A standalone ``spawn`` statement is a simple construct. It executes
the passed expression on the thread pool and returns a `data flow variable`:idx:
``FlowVar[T]`` that can be read from. The reading with the ``^`` operator is
**blocking**. However, one can use ``awaitAny`` to wait on multiple flow
variables at the same time:
.. code-block:: nimrod
import threadpool, ...
# wait until 2 out of 3 servers received the update:
proc main =
var responses = newSeq[RawFlowVar](3)
for i in 0..2:
responses[i] = spawn tellServer(Update, "key", "value")
var index = awaitAny(responses)
assert index >= 0
responses.del(index)
discard awaitAny(responses)
Like the ``parallel`` statement data flow variables ensure that no data races
are possible. Due to technical limitations not every type ``T`` is possible in
a data flow variable: ``T`` has to be of the type ``ref``, ``string``, ``seq``
or of a type that doesn't contain a type that is garbage collected. This
restriction will be removed in the future.

View File

@@ -0,0 +1,58 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements procs to determine the number of CPUs / cores.
include "system/inclrtl"
import strutils, os
when not defined(windows):
import posix
when defined(linux):
import linux
when defined(macosx) or defined(bsd):
const
CTL_HW = 6
HW_AVAILCPU = 25
HW_NCPU = 3
proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
a: var csize, b: pointer, c: int): cint {.
importc: "sysctl", header: "<sys/sysctl.h>".}
proc countProcessors*(): int {.rtl, extern: "ncpi$1".} =
## returns the numer of the processors/cores the machine has.
## Returns 0 if it cannot be detected.
when defined(windows):
var x = getEnv("NUMBER_OF_PROCESSORS")
if x.len > 0: result = parseInt(x.string)
elif defined(macosx) or defined(bsd):
var
mib: array[0..3, cint]
numCPU: int
len: csize
mib[0] = CTL_HW
mib[1] = HW_AVAILCPU
len = sizeof(numCPU)
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
if numCPU < 1:
mib[1] = HW_NCPU
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
result = numCPU
elif defined(hpux):
result = mpctl(MPC_GETNUMSPUS, nil, nil)
elif defined(irix):
var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
result = sysconf(SC_NPROC_ONLN)
else:
result = sysconf(SC_NPROCESSORS_ONLN)
if result <= 0: result = 1

View File

@@ -0,0 +1,96 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements a helper for a thread pool to determine whether
## creating a thread is a good idea.
when defined(windows):
import winlean, os, strutils, math
proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime
elif defined(linux):
from cpuinfo import countProcessors
type
ThreadPoolAdvice* = enum
doNothing,
doCreateThread, # create additional thread for throughput
doShutdownThread # too many threads are busy, shutdown one
ThreadPoolState* = object
when defined(windows):
prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME
calls*: int
proc advice*(s: var ThreadPoolState): ThreadPoolAdvice =
when defined(windows):
var
sysIdle, sysKernel, sysUser,
procCreation, procExit, procKernel, procUser: TFILETIME
if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or
getProcessTimes(THandle(-1), procCreation, procExit,
procKernel, procUser) == 0:
return doNothing
if s.calls > 0:
let
sysKernelDiff = sysKernel - s.prevSysKernel
sysUserDiff = sysUser - s.prevSysUser
procKernelDiff = procKernel - s.prevProcKernel
procUserDiff = procUser - s.prevProcUser
sysTotal = int(sysKernelDiff + sysUserDiff)
procTotal = int(procKernelDiff + procUserDiff)
# total CPU usage < 85% --> create a new worker thread.
# Measurements show that 100% and often even 90% is not reached even
# if all my cores are busy.
if sysTotal == 0 or procTotal / sysTotal < 0.85:
result = doCreateThread
s.prevSysKernel = sysKernel
s.prevSysUser = sysUser
s.prevProcKernel = procKernel
s.prevProcUser = procUser
elif defined(linux):
proc fscanf(c: TFile, frmt: cstring) {.varargs, importc,
header: "<stdio.h>".}
var f = open("/proc/loadavg")
var b: float
var busy, total: int
fscanf(f,"%lf %lf %lf %ld/%ld",
addr b, addr b, addr b, addr busy, addr total)
f.close()
let cpus = countProcessors()
if busy-1 < cpus:
result = doCreateThread
elif busy-1 >= cpus*2:
result = doShutdownThread
else:
result = doNothing
else:
# XXX implement this for other OSes
result = doNothing
inc s.calls
when isMainModule:
proc busyLoop() =
while true:
discard random(80)
os.sleep(100)
spawn busyLoop()
spawn busyLoop()
spawn busyLoop()
spawn busyLoop()
var s: ThreadPoolState
for i in 1 .. 70:
echo advice(s)
os.sleep(1000)

View File

@@ -0,0 +1,378 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Implements Nimrod's 'spawn'.
import cpuinfo, cpuload, locks
{.push stackTrace:off.}
type
CondVar = object
c: TCond
L: TLock
counter: int
proc createCondVar(): CondVar =
initCond(result.c)
initLock(result.L)
proc destroyCondVar(cv: var CondVar) {.inline.} =
deinitCond(cv.c)
deinitLock(cv.L)
proc await(cv: var CondVar) =
acquire(cv.L)
while cv.counter <= 0:
wait(cv.c, cv.L)
dec cv.counter
release(cv.L)
proc signal(cv: var CondVar) =
acquire(cv.L)
inc cv.counter
release(cv.L)
signal(cv.c)
const CacheLineSize = 32 # true for most archs
type
Barrier {.compilerProc.} = object
entered: int
cv: CondVar # condvar takes 3 words at least
when sizeof(int) < 8:
cacheAlign: array[CacheLineSize-4*sizeof(int), byte]
left: int
cacheAlign2: array[CacheLineSize-sizeof(int), byte]
interest: bool ## wether the master is interested in the "all done" event
proc barrierEnter(b: ptr Barrier) {.compilerProc, inline.} =
# due to the signaling between threads, it is ensured we are the only
# one with access to 'entered' so we don't need 'atomicInc' here:
inc b.entered
# also we need no 'fence' instructions here as soon 'nimArgsPassingDone'
# will be called which already will perform a fence for us.
proc barrierLeave(b: ptr Barrier) {.compilerProc, inline.} =
atomicInc b.left
when not defined(x86): fence()
if b.interest and b.left == b.entered: signal(b.cv)
proc openBarrier(b: ptr Barrier) {.compilerProc, inline.} =
b.entered = 0
b.left = 0
b.interest = false
proc closeBarrier(b: ptr Barrier) {.compilerProc.} =
fence()
if b.left != b.entered:
b.cv = createCondVar()
fence()
b.interest = true
fence()
while b.left != b.entered: await(b.cv)
destroyCondVar(b.cv)
{.pop.}
# ----------------------------------------------------------------------------
type
foreign* = object ## a region that indicates the pointer comes from a
## foreign thread heap.
AwaitInfo = object
cv: CondVar
idx: int
FlowVarBase* = ref FlowVarBaseObj ## untyped base class for 'FlowVar[T]'
FlowVarBaseObj = object of TObject
ready, usesCondVar: bool
cv: CondVar #\
# for 'awaitAny' support
ai: ptr AwaitInfo
idx: int
data: pointer # we incRef and unref it to keep it alive
owner: pointer # ptr Worker
FlowVarObj[T] = object of FlowVarBaseObj
blob: T
FlowVar*{.compilerProc.}[T] = ref FlowVarObj[T] ## a data flow variable
ToFreeQueue = object
len: int
lock: TLock
empty: TCond
data: array[512, pointer]
WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.}
Worker = object
taskArrived: CondVar
taskStarted: CondVar #\
# task data:
f: WorkerProc
data: pointer
ready: bool # put it here for correct alignment!
initialized: bool # whether it has even been initialized
shutdown: bool # the pool requests to shut down this worker thread
q: ToFreeQueue
proc await*(fv: FlowVarBase) =
## waits until the value for the flowVar arrives. Usually it is not necessary
## to call this explicitly.
if fv.usesCondVar:
fv.usesCondVar = false
await(fv.cv)
destroyCondVar(fv.cv)
proc finished(fv: FlowVarBase) =
doAssert fv.ai.isNil, "flowVar is still attached to an 'awaitAny'"
# we have to protect against the rare cases where the owner of the flowVar
# simply disregards the flowVar and yet the "flowVarr" has not yet written
# anything to it:
await(fv)
if fv.data.isNil: return
let owner = cast[ptr Worker](fv.owner)
let q = addr(owner.q)
var waited = false
while true:
acquire(q.lock)
if q.len < q.data.len:
q.data[q.len] = fv.data
inc q.len
release(q.lock)
break
else:
# the queue is exhausted! We block until it has been cleaned:
release(q.lock)
wait(q.empty, q.lock)
waited = true
fv.data = nil
# wakeup other potentially waiting threads:
if waited: signal(q.empty)
proc cleanFlowVars(w: ptr Worker) =
let q = addr(w.q)
acquire(q.lock)
for i in 0 .. <q.len:
GC_unref(cast[PObject](q.data[i]))
q.len = 0
release(q.lock)
signal(q.empty)
proc fvFinalizer[T](fv: FlowVar[T]) = finished(fv)
proc nimCreateFlowVar[T](): FlowVar[T] {.compilerProc.} =
new(result, fvFinalizer)
proc nimFlowVarCreateCondVar(fv: FlowVarBase) {.compilerProc.} =
fv.cv = createCondVar()
fv.usesCondVar = true
proc nimFlowVarSignal(fv: FlowVarBase) {.compilerProc.} =
if fv.ai != nil:
acquire(fv.ai.cv.L)
fv.ai.idx = fv.idx
inc fv.ai.cv.counter
release(fv.ai.cv.L)
signal(fv.ai.cv.c)
if fv.usesCondVar: signal(fv.cv)
proc awaitAndThen*[T](fv: FlowVar[T]; action: proc (x: T) {.closure.}) =
## blocks until the ``fv`` is available and then passes its value
## to ``action``. Note that due to Nimrod's parameter passing semantics this
## means that ``T`` doesn't need to be copied and so ``awaitAndThen`` can
## sometimes be more efficient than ``^``.
await(fv)
when T is string or T is seq:
action(cast[T](fv.data))
elif T is ref:
{.error: "'awaitAndThen' not available for FlowVar[ref]".}
else:
action(fv.blob)
finished(fv)
proc `^`*[T](fv: FlowVar[ref T]): foreign ptr T =
## blocks until the value is available and then returns this value.
await(fv)
result = cast[foreign ptr T](fv.data)
proc `^`*[T](fv: FlowVar[T]): T =
## blocks until the value is available and then returns this value.
await(fv)
when T is string or T is seq:
result = cast[T](fv.data)
else:
result = fv.blob
proc awaitAny*(flowVars: openArray[FlowVarBase]): int =
## awaits any of the given flowVars. Returns the index of one flowVar for
## which a value arrived. A flowVar only supports one call to 'awaitAny' at
## the same time. That means if you await([a,b]) and await([b,c]) the second
## call will only await 'c'. If there is no flowVar left to be able to wait
## on, -1 is returned.
## **Note**: This results in non-deterministic behaviour and so should be
## avoided.
var ai: AwaitInfo
ai.cv = createCondVar()
var conflicts = 0
for i in 0 .. flowVars.high:
if cas(addr flowVars[i].ai, nil, addr ai):
flowVars[i].idx = i
else:
inc conflicts
if conflicts < flowVars.len:
await(ai.cv)
result = ai.idx
for i in 0 .. flowVars.high:
discard cas(addr flowVars[i].ai, addr ai, nil)
else:
result = -1
destroyCondVar(ai.cv)
proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
let w = cast[ptr Worker](p)
signal(w.taskStarted)
const
MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads
## should be good enough for anybody ;-)
var
currentPoolSize: int
maxPoolSize = MaxThreadPoolSize
minPoolSize = 4
gSomeReady = createCondVar()
readyWorker: ptr Worker
proc slave(w: ptr Worker) {.thread.} =
while true:
w.ready = true
readyWorker = w
signal(gSomeReady)
await(w.taskArrived)
assert(not w.ready)
w.f(w, w.data)
if w.q.len != 0: w.cleanFlowVars
if w.shutdown:
w.shutdown = false
atomicDec currentPoolSize
proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) =
## sets the minimal thread pool size. The default value of this is 4.
minPoolSize = size
proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) =
## sets the minimal thread pool size. The default value of this
## is ``MaxThreadPoolSize``.
maxPoolSize = size
var
workers: array[MaxThreadPoolSize, TThread[ptr Worker]]
workersData: array[MaxThreadPoolSize, Worker]
proc activateThread(i: int) {.noinline.} =
workersData[i].taskArrived = createCondVar()
workersData[i].taskStarted = createCondVar()
workersData[i].initialized = true
initCond(workersData[i].q.empty)
initLock(workersData[i].q.lock)
createThread(workers[i], slave, addr(workersData[i]))
proc setup() =
currentPoolSize = min(countProcessors(), MaxThreadPoolSize)
readyWorker = addr(workersData[0])
for i in 0.. <currentPoolSize: activateThread(i)
proc preferSpawn*(): bool =
## Use this proc to determine quickly if a 'spawn' or a direct call is
## preferable. If it returns 'true' a 'spawn' may make sense. In general
## it is not necessary to call this directly; use 'spawnX' instead.
result = gSomeReady.counter > 0
proc spawn*(call: expr): expr {.magic: "Spawn".}
## always spawns a new task, so that the 'call' is never executed on
## the calling thread. 'call' has to be proc call 'p(...)' where 'p'
## is gcsafe and has a return type that is either 'void' or compatible
## with ``FlowVar[T]``.
template spawnX*(call: expr): expr =
## spawns a new task if a CPU core is ready, otherwise executes the
## call in the calling thread. Usually it is advised to
## use 'spawn' in order to not block the producer for an unknown
## amount of time. 'call' has to be proc call 'p(...)' where 'p'
## is gcsafe and has a return type that is either 'void' or compatible
## with ``FlowVar[T]``.
(if preferSpawn(): spawn call else: call)
proc parallel*(body: stmt) {.magic: "Parallel".}
## a parallel section can be used to execute a block in parallel. ``body``
## has to be in a DSL that is a particular subset of the language. Please
## refer to the manual for further information.
var
state: ThreadPoolState
stateLock: TLock
initLock stateLock
proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool =
if cas(addr w.ready, true, false):
w.data = data
w.f = fn
signal(w.taskArrived)
await(w.taskStarted)
result = true
proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
# implementation of 'spawn' that is used by the code generator.
while true:
if selectWorker(readyWorker, fn, data): return
for i in 0.. <currentPoolSize:
if selectWorker(addr(workersData[i]), fn, data): return
# determine what to do, but keep in mind this is expensive too:
# state.calls < maxPoolSize: warmup phase
# (state.calls and 127) == 0: periodic check
if state.calls < maxPoolSize or (state.calls and 127) == 0:
# ensure the call to 'advice' is atomic:
if tryAcquire(stateLock):
case advice(state)
of doNothing: discard
of doCreateThread:
if currentPoolSize < maxPoolSize:
if not workersData[currentPoolSize].initialized:
activateThread(currentPoolSize)
let w = addr(workersData[currentPoolSize])
atomicInc currentPoolSize
if selectWorker(w, fn, data):
release(stateLock)
return
# else we didn't succeed but some other thread, so do nothing.
of doShutdownThread:
if currentPoolSize > minPoolSize:
let w = addr(workersData[currentPoolSize-1])
w.shutdown = true
# we don't free anything here. Too dangerous.
release(stateLock)
# else the acquire failed, but this means some
# other thread succeeded, so we don't need to do anything here.
await(gSomeReady)
proc sync*() =
## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate
## waiting, you have to use an explicit barrier.
while true:
var allReady = true
for i in 0 .. <currentPoolSize:
if not allReady: break
allReady = allReady and workersData[i].ready
if allReady: break
await(gSomeReady)
setup()

View File

@@ -1,7 +1,7 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2013 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
@@ -13,7 +13,7 @@
include "system/inclrtl"
import
strutils, os, strtabs, streams
strutils, os, strtabs, streams, cpuinfo
when defined(windows):
import winlean
@@ -225,42 +225,10 @@ proc errorHandle*(p: PProcess): TFileHandle {.rtl, extern: "nosp$1",
## it is closed when closing the PProcess ``p``.
result = p.errHandle
when defined(macosx) or defined(bsd):
const
CTL_HW = 6
HW_AVAILCPU = 25
HW_NCPU = 3
proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
a: var csize, b: pointer, c: int): cint {.
importc: "sysctl", header: "<sys/sysctl.h>".}
proc countProcessors*(): int {.rtl, extern: "nosp$1".} =
## returns the numer of the processors/cores the machine has.
## Returns 0 if it cannot be detected.
when defined(windows):
var x = getEnv("NUMBER_OF_PROCESSORS")
if x.len > 0: result = parseInt(x.string)
elif defined(macosx) or defined(bsd):
var
mib: array[0..3, cint]
numCPU: int
len: csize
mib[0] = CTL_HW
mib[1] = HW_AVAILCPU
len = sizeof(numCPU)
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
if numCPU < 1:
mib[1] = HW_NCPU
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
result = numCPU
elif defined(hpux):
result = mpctl(MPC_GETNUMSPUS, nil, nil)
elif defined(irix):
var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
result = sysconf(SC_NPROC_ONLN)
else:
result = sysconf(SC_NPROCESSORS_ONLN)
if result <= 0: result = 1
result = cpuinfo.countProcessors()
proc execProcesses*(cmds: openArray[string],
options = {poStdErrToStdOut, poParentStreams},

View File

@@ -42,7 +42,6 @@ type
cstring* {.magic: Cstring.} ## built-in cstring (*compatible string*) type
pointer* {.magic: Pointer.} ## built-in pointer type, use the ``addr``
## operator to get a pointer to a variable
const
on* = true ## alias for ``true``
off* = false ## alias for ``false``
@@ -51,6 +50,9 @@ const
type
Ordinal* {.magic: Ordinal.}[T]
`ptr`* {.magic: Pointer.}[T] ## built-in generic untraced pointer type
`ref`* {.magic: Pointer.}[T] ## built-in generic traced pointer type
`nil` {.magic: "Nil".}
expr* {.magic: Expr.} ## meta type to denote an expression (for templates)
stmt* {.magic: Stmt.} ## meta type to denote a statement (for templates)
@@ -2983,6 +2985,10 @@ proc locals*(): TObject {.magic: "Locals", noSideEffect.} =
## # -> B is 1
discard
proc deepCopy*[T](x: T): T {.magic: "DeepCopy", noSideEffect.}
## performs a deep copy of `x`. This is also used by the code generator
## for the implementation of ``spawn``.
when not defined(booting):
type
semistatic*[T] = static[T] | T
@@ -2991,6 +2997,3 @@ when not defined(booting):
template isStatic*(x): expr = compiles(static(x))
# checks whether `x` is a value known at compile-time
when hasThreadSupport:
when hostOS != "standalone": include "system/sysspawn"

View File

@@ -179,7 +179,8 @@ when not defined(nimmixin):
# internal proc used for destroying sequences and arrays
for i in countup(0, r.len - 1): destroy(r[i])
else:
# XXX Why is this exported and no compilerproc?
# XXX Why is this exported and no compilerproc? -> compilerprocs cannot be
# generic for now
proc nimDestroyRange*[T](r: T) =
# internal proc used for destroying sequences and arrays
mixin destroy

View File

@@ -1,15 +1,18 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Atomic operations for Nimrod.
{.push stackTrace:off.}
when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang)
when someGcc and hasThreadSupport:
type
AtomMemModel* = enum
ATOMIC_RELAXED, ## No barriers or synchronization.
@@ -152,41 +155,16 @@ when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
## A value of 0 indicates typical alignment should be used. The compiler may also
## ignore this parameter.
template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
elif defined(vcc) and hasThreadSupport:
proc addAndFetch*(p: ptr int, val: int): int {.
importc: "NimXadd", nodecl.}
else:
proc addAndFetch*(p: ptr int, val: int): int {.inline.} =
inc(p[], val)
result = p[]
# atomic compare and swap (CAS) funcitons to implement lock-free algorithms
#if defined(windows) and not defined(gcc) and hasThreadSupport:
# proc InterlockedCompareExchangePointer(mem: ptr pointer,
# newValue: pointer, comparand: pointer) : pointer {.nodecl,
# importc: "InterlockedCompareExchangePointer", header:"windows.h".}
# proc compareAndSwap*[T](mem: ptr T,
# expected: T, newValue: T): bool {.inline.}=
# ## Returns true if successfully set value at mem to newValue when value
# ## at mem == expected
# return InterlockedCompareExchangePointer(addr(mem),
# addr(newValue), addr(expected))[] == expected
#elif not hasThreadSupport:
# proc compareAndSwap*[T](mem: ptr T,
# expected: T, newValue: T): bool {.inline.} =
# ## Returns true if successfully set value at mem to newValue when value
# ## at mem == expected
# var oldval = mem[]
# if oldval == expected:
# mem[] = newValue
# return true
# return false
# Some convenient functions
proc atomicInc*(memLoc: var int, x: int = 1): int =
when defined(gcc) and hasThreadSupport:
result = atomic_add_fetch(memLoc.addr, x, ATOMIC_RELAXED)
@@ -203,3 +181,37 @@ proc atomicDec*(memLoc: var int, x: int = 1): int =
else:
dec(memLoc, x)
result = memLoc
when defined(windows) and not someGcc:
proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
{.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
# XXX fix for 64 bit build
else:
# this is valid for GCC and Intel C++
proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool
{.importc: "__sync_bool_compare_and_swap", nodecl.}
# XXX is this valid for 'int'?
when (defined(x86) or defined(amd64)) and (defined(gcc) or defined(llvm_gcc)):
proc cpuRelax {.inline.} =
{.emit: """asm volatile("pause" ::: "memory");""".}
elif (defined(x86) or defined(amd64)) and defined(vcc):
proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
elif defined(intelc):
proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
elif false:
from os import sleep
proc cpuRelax {.inline.} = os.sleep(1)
when not defined(fence) and hasThreadSupport:
# XXX fixme
proc fence*() {.inline.} =
var dummy: bool
discard cas(addr dummy, false, true)
{.pop.}

View File

@@ -14,30 +14,6 @@ when not defined(NimString):
{.push stackTrace:off.}
when (defined(x86) or defined(amd64)) and defined(gcc):
proc cpuRelax {.inline.} =
{.emit: """asm volatile("pause" ::: "memory");""".}
elif (defined(x86) or defined(amd64)) and defined(vcc):
proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
elif defined(intelc):
proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
elif false:
from os import sleep
proc cpuRelax {.inline.} = os.sleep(1)
when defined(windows) and not defined(gcc):
proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
{.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
proc cas(p: ptr bool; oldValue, newValue: bool): bool =
interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
else:
# this is valid for GCC and Intel C++
proc cas(p: ptr bool; oldValue, newValue: bool): bool
{.importc: "__sync_bool_compare_and_swap", nodecl.}
# We declare our own condition variables here to get rid of the dummy lock
# on Windows:
@@ -54,6 +30,9 @@ proc createCondVar(): CondVar =
initSysLock(result.stupidLock)
#acquireSys(result.stupidLock)
proc destroyCondVar(c: var CondVar) {.inline.} =
deinitSysCond(c.c)
proc await(cv: var CondVar) =
when defined(posix):
acquireSys(cv.stupidLock)
@@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) =
#if cas(addr cv.slowPath, true, false):
signal(cv.slow)
type
Barrier* {.compilerProc.} = object
counter: int
cv: CondVar
proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
atomicInc b.counter
proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
atomicDec b.counter
if b.counter <= 0: signal(b.cv)
proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
b.counter = 0
b.cv = createCondVar()
proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
await(b.cv)
destroyCondVar(b.cv)
{.pop.}
# ----------------------------------------------------------------------------

View File

@@ -0,0 +1 @@
threads:on

View File

@@ -0,0 +1,21 @@
discard """
outputsub: "EVEN 28"
"""
import threadpool
proc odd(a: int) = echo "ODD ", a
proc even(a: int) = echo "EVEN ", a
proc main() =
var a: array[0..30, int]
for i in low(a)..high(a): a[i] = i
parallel:
var i = 0
while i <= 29:
spawn even(a[i])
spawn odd(a[i+1])
inc i, 2
# is correct here
main()

View File

@@ -0,0 +1,33 @@
discard """
output: '''0
1
2
3
4
5
6
7
8'''
sortoutput: true
"""
import threadpool
proc f(a: openArray[int]) =
for x in a: echo x
proc f(a: int) = echo a
proc main() =
var a: array[0..9, int] = [0,1,2,3,4,5,6,7,8,9]
parallel:
spawn f(a[0..2])
#spawn f(a[16..30])
var i = 3
while i <= 8:
spawn f(a[i])
spawn f(a[i+1])
inc i, 2
# is correct here
main()

View File

@@ -0,0 +1,17 @@
discard """
output: '''foobarfoobarbazbearbazbear'''
cmd: "nimrod $target --threads:on $options $file"
"""
import threadpool
proc computeSomething(a, b: string): string = a & b & a & b
proc main =
let fvA = spawn computeSomething("foo", "bar")
let fvB = spawn computeSomething("baz", "bear")
echo(^fvA, ^fvB)
main()
sync()

View File

@@ -0,0 +1,25 @@
discard """
output: '''3
4
5
6
7'''
sortoutput: true
"""
import threadpool, os
proc p(x: int) =
os.sleep(100 - x*10)
echo x
proc testFor(a, b: int; foo: var openArray[int]) =
parallel:
for i in max(a, 0) .. min(b, foo.high):
spawn p(foo[i])
var arr = [0, 1, 2, 3, 4, 5, 6, 7]
testFor(3, 10, arr)

View File

@@ -0,0 +1,25 @@
discard """
errormsg: "can prove: i + 1 > 30"
line: 21
"""
import threadpool
proc f(a: openArray[int]) =
for x in a: echo x
proc f(a: int) = echo a
proc main() =
var a: array[0..30, int]
parallel:
spawn f(a[0..15])
spawn f(a[16..30])
var i = 0
while i <= 30:
spawn f(a[i])
spawn f(a[i+1])
inc i
#inc i # inc i, 2 would be correct here
main()

View File

@@ -0,0 +1,26 @@
discard """
errormsg: "invalid usage of counter after increment"
line: 21
"""
import threadpool
proc f(a: openArray[int]) =
for x in a: echo x
proc f(a: int) = echo a
proc main() =
var a: array[0..30, int]
parallel:
spawn f(a[0..15])
spawn f(a[16..30])
var i = 0
while i <= 30:
inc i
spawn f(a[i])
inc i
#spawn f(a[i+1])
#inc i # inc i, 2 would be correct here
main()

View File

@@ -0,0 +1,25 @@
discard """
errormsg: "cannot prove (i)..(i) disjoint from (i + 1)..(i + 1)"
line: 20
"""
import threadpool
proc f(a: openArray[int]) =
for x in a: echo x
proc f(a: int) = echo a
proc main() =
var a: array[0..30, int]
parallel:
#spawn f(a[0..15])
#spawn f(a[16..30])
var i = 0
while i <= 29:
spawn f(a[i])
spawn f(a[i+1])
inc i
#inc i # inc i, 2 would be correct here
main()

26
tests/parallel/tpi.nim Normal file
View File

@@ -0,0 +1,26 @@
discard """
output: '''3.141792613595791
3.141792613595791'''
"""
import strutils, math, threadpool
proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1)
proc piU(n: int): float =
var ch = newSeq[FlowVar[float]](n+1)
for k in 0..n:
ch[k] = spawn term(float(k))
for k in 0..n:
result += ^ch[k]
proc piS(n: int): float =
var ch = newSeq[float](n+1)
parallel:
for k in 0..ch.high:
ch[k] = spawn term(float(k))
for k in 0..ch.high:
result += ch[k]
echo formatFloat(piU(5000))
echo formatFloat(piS(5000))

View File

@@ -4,20 +4,22 @@ discard """
cmd: "nimrod $target --threads:on $options $file"
"""
import threadpool
var
x, y = 0
proc p1 =
for i in 0 .. 1_000_000:
for i in 0 .. 10_000:
discard
inc x
atomicInc x
proc p2 =
for i in 0 .. 1_000_000:
for i in 0 .. 10_000:
discard
inc y, 2
atomicInc y, 2
for i in 0.. 3:
spawn(p1())

View File

@@ -0,0 +1,9 @@
discard """
line: 9
errormsg: "'spawn' takes a call expression"
cmd: "nimrod $target --threads:on $options $file"
"""
import threadpool
let foo = spawn(1)

View File

@@ -1,7 +0,0 @@
discard """
line: 7
errormsg: "'spawn' takes a call expression of type void"
cmd: "nimrod $target --threads:on $options $file"
"""
spawn(1)

View File

@@ -46,7 +46,7 @@ type
msg*: string
ccodeCheck*: string
err*: TResultEnum
substr*: bool
substr*, sortoutput*: bool
targets*: set[TTarget]
const
@@ -113,6 +113,8 @@ proc parseSpec*(filename: string): TSpec =
result.action = actionRun
result.outp = e.value
result.substr = true
of "sortoutput":
result.sortoutput = parseCfgBool(e.value)
of "exitcode":
discard parseInt(e.value, result.exitCode)
of "msg":

View File

@@ -11,7 +11,8 @@
import
parseutils, strutils, pegs, os, osproc, streams, parsecfg, json,
marshal, backend, parseopt, specs, htmlgen, browsers, terminal
marshal, backend, parseopt, specs, htmlgen, browsers, terminal,
algorithm
const
resultsFile = "testresults.html"
@@ -150,6 +151,11 @@ proc codegenCheck(test: TTest, check: string, given: var TSpec) =
except EIO:
given.err = reCodeNotFound
proc makeDeterministic(s: string): string =
var x = splitLines(s)
sort(x, system.cmp)
result = join(x, "\n")
proc testSpec(r: var TResults, test: TTest) =
# major entry point for a single test
let tname = test.name.addFileExt(".nim")
@@ -191,8 +197,10 @@ proc testSpec(r: var TResults, test: TTest) =
r.addResult(test, "exitcode: " & $expected.exitCode,
"exitcode: " & $exitCode, reExitCodesDiffer)
else:
if strip(buf.string) != strip(expected.outp):
if not (expected.substr and expected.outp in buf.string):
var bufB = strip(buf.string)
if expected.sortoutput: bufB = makeDeterministic(bufB)
if bufB != strip(expected.outp):
if not (expected.substr and expected.outp in bufB):
given.err = reOutputsDiffer
if given.err == reSuccess:
codeGenCheck(test, expected.ccodeCheck, given)

View File

@@ -1,6 +1,23 @@
version 0.9.6
=============
Concurrency
-----------
- the disjoint checker needs to deal with 'a = spawn f(); g = spawn f()'
- implement 'deepCopy' builtin
- implement 'foo[1..4] = spawn(f[4..7])'
- support for exception propagation
- Minor: The copying of the 'ref Promise' into the thead local storage only
happens to work due to the write barrier's implementation
- 'gcsafe' inferrence needs to be fixed
- implement lock levels --> first without the more complex race avoidance
- document the new 'spawn' and 'parallel' statements
Misc
----
- fix the bug that keeps 'defer' template from working
- make '--implicitStatic:on' the default
- fix the tuple unpacking in lambda bug

View File

@@ -2,6 +2,23 @@
News
====
..
2014-06-29 Version 0.9.6 released
=================================
Changes affecting backwards compatibility
-----------------------------------------
- ``spawn`` now uses an elaborate self-adapting thread pool and as such
has been moved into its own module. So to use it, you now have to import
``threadpool``.
Library Additions
-----------------
- Added module ``cpuinfo``.
- Added module ``threadpool``.
2014-04-21 Version 0.9.4 released