Merge branch 'new_spawn' of https://github.com/Araq/Nimrod into new_spawn

This commit is contained in:
Araq
2014-06-01 22:00:06 +02:00
38 changed files with 1923 additions and 159 deletions

View File

@@ -605,9 +605,9 @@ const
# thus cannot be overloaded (also documented in the spec!):
SpecialSemMagics* = {
mDefined, mDefinedInScope, mCompiles, mLow, mHigh, mSizeOf, mIs, mOf,
mEcho, mShallowCopy, mExpandToAst}
mEcho, mShallowCopy, mExpandToAst, mParallel, mSpawn}
type
type
PNode* = ref TNode
TNodeSeq* = seq[PNode]
PType* = ref TType
@@ -885,6 +885,8 @@ const
nkCallKinds* = {nkCall, nkInfix, nkPrefix, nkPostfix,
nkCommand, nkCallStrLit, nkHiddenCallConv}
nkIdentKinds* = {nkIdent, nkSym, nkAccQuoted, nkOpenSymChoice,
nkClosedSymChoice}
nkLiterals* = {nkCharLit..nkTripleStrLit}
nkLambdaKinds* = {nkLambda, nkDo}

View File

@@ -86,7 +86,7 @@ proc openArrayLoc(p: BProc, n: PNode): PRope =
initLocExpr(p, q[2], b)
initLocExpr(p, q[3], c)
let fmt =
case skipTypes(a.t, abstractVar).kind
case skipTypes(a.t, abstractVar+{tyPtr}).kind
of tyOpenArray, tyVarargs, tyArray, tyArrayConstr:
"($1)+($2), ($3)-($2)+1"
of tyString, tySequence:

View File

@@ -1636,7 +1636,10 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) =
of mSlurp..mQuoteAst:
localError(e.info, errXMustBeCompileTime, e.sons[0].sym.name.s)
of mSpawn:
let n = lowerings.wrapProcForSpawn(p.module.module, e.sons[1])
let n = lowerings.wrapProcForSpawn(p.module.module, e[1], e.typ, nil, nil)
expr(p, n, d)
of mParallel:
let n = semparallel.liftParallel(p.module.module, e)
expr(p, n, d)
else: internalError(e.info, "genMagicExpr: " & $op)

View File

@@ -14,7 +14,8 @@ import
options, intsets,
nversion, nimsets, msgs, crc, bitsets, idents, lists, types, ccgutils, os,
times, ropes, math, passes, rodread, wordrecg, treetab, cgmeth,
rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings
rodutils, renderer, idgen, cgendata, ccgmerge, semfold, aliases, lowerings,
semparallel
when options.hasTinyCBackend:
import tccgen

View File

@@ -1,7 +1,7 @@
#
#
# The Nimrod Compiler
# (c) Copyright 2013 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
@@ -9,7 +9,8 @@
## This module implements the 'implies' relation for guards.
import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents
import ast, astalgo, msgs, magicsys, nimsets, trees, types, renderer, idents,
saturate
const
someEq = {mEqI, mEqI64, mEqF64, mEqEnum, mEqCh, mEqB, mEqRef, mEqProc,
@@ -25,6 +26,17 @@ const
someIn = {mInRange, mInSet}
someHigh = {mHigh}
# we don't list unsigned here because wrap around semantics suck for
# proving anything:
someAdd = {mAddI, mAddI64, mAddF64, mSucc}
someSub = {mSubI, mSubI64, mSubF64, mPred}
someMul = {mMulI, mMulI64, mMulF64}
someDiv = {mDivI, mDivI64, mDivF64}
someMod = {mModI, mModI64}
someMax = {mMaxI, mMaxI64, mMaxF64}
someMin = {mMinI, mMinI64, mMinF64}
proc isValue(n: PNode): bool = n.kind in {nkCharLit..nkNilLit}
proc isLocation(n: PNode): bool = not n.isValue
@@ -69,19 +81,24 @@ proc isLetLocation(m: PNode, isApprox: bool): bool =
proc interestingCaseExpr*(m: PNode): bool = isLetLocation(m, true)
proc getMagicOp(name: string, m: TMagic): PSym =
proc createMagic*(name: string, m: TMagic): PSym =
result = newSym(skProc, getIdent(name), nil, unknownLineInfo())
result.magic = m
let
opLe = getMagicOp("<=", mLeI)
opLt = getMagicOp("<", mLtI)
opAnd = getMagicOp("and", mAnd)
opOr = getMagicOp("or", mOr)
opNot = getMagicOp("not", mNot)
opIsNil = getMagicOp("isnil", mIsNil)
opContains = getMagicOp("contains", mInSet)
opEq = getMagicOp("==", mEqI)
opLe = createMagic("<=", mLeI)
opLt = createMagic("<", mLtI)
opAnd = createMagic("and", mAnd)
opOr = createMagic("or", mOr)
opNot = createMagic("not", mNot)
opIsNil = createMagic("isnil", mIsNil)
opContains = createMagic("contains", mInSet)
opEq = createMagic("==", mEqI)
opAdd = createMagic("+", mAddI)
opSub = createMagic("-", mSubI)
opMul = createMagic("*", mMulI)
opDiv = createMagic("div", mDivI)
opLen = createMagic("len", mLengthSeq)
proc swapArgs(fact: PNode, newOp: PSym): PNode =
result = newNodeI(nkCall, fact.info, 3)
@@ -137,17 +154,141 @@ proc neg(n: PNode): PNode =
result.sons[0] = newSymNode(opNot)
result.sons[1] = n
proc buildIsNil(arg: PNode): PNode =
result = newNodeI(nkCall, arg.info, 2)
result.sons[0] = newSymNode(opIsNil)
result.sons[1] = arg
proc buildCall(op: PSym; a: PNode): PNode =
result = newNodeI(nkCall, a.info, 2)
result.sons[0] = newSymNode(op)
result.sons[1] = a
proc buildCall(op: PSym; a, b: PNode): PNode =
result = newNodeI(nkInfix, a.info, 3)
result.sons[0] = newSymNode(op)
result.sons[1] = a
result.sons[2] = b
proc `|+|`(a, b: PNode): PNode =
result = copyNode(a)
if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |+| b.intVal
else: result.floatVal = a.floatVal + b.floatVal
proc `|*|`(a, b: PNode): PNode =
result = copyNode(a)
if a.kind in {nkCharLit..nkUInt64Lit}: result.intVal = a.intVal |*| b.intVal
else: result.floatVal = a.floatVal * b.floatVal
proc negate(a, b, res: PNode): PNode =
if b.kind in {nkCharLit..nkUInt64Lit} and b.intVal != low(BiggestInt):
var b = copyNode(b)
b.intVal = -b.intVal
if a.kind in {nkCharLit..nkUInt64Lit}:
b.intVal = b.intVal |+| a.intVal
result = b
else:
result = buildCall(opAdd, a, b)
elif b.kind in {nkFloatLit..nkFloat64Lit}:
var b = copyNode(b)
b.floatVal = -b.floatVal
result = buildCall(opAdd, a, b)
else:
result = res
proc zero(): PNode = nkIntLit.newIntNode(0)
proc one(): PNode = nkIntLit.newIntNode(1)
proc minusOne(): PNode = nkIntLit.newIntNode(-1)
proc lowBound*(x: PNode): PNode =
result = nkIntLit.newIntNode(firstOrd(x.typ))
result.info = x.info
proc highBound*(x: PNode): PNode =
result = if x.typ.skipTypes(abstractInst).kind == tyArray:
nkIntLit.newIntNode(lastOrd(x.typ))
else:
opAdd.buildCall(opLen.buildCall(x), minusOne())
result.info = x.info
proc reassociation(n: PNode): PNode =
result = n
# (foo+5)+5 --> foo+10; same for '*'
case result.getMagic
of someAdd:
if result[2].isValue and
result[1].getMagic in someAdd and result[1][2].isValue:
result = opAdd.buildCall(result[1][1], result[1][2] |+| result[2])
of someMul:
if result[2].isValue and
result[1].getMagic in someMul and result[1][2].isValue:
result = opAdd.buildCall(result[1][1], result[1][2] |*| result[2])
else: discard
proc canon*(n: PNode): PNode =
# XXX for now only the new code in 'semparallel' uses this
if n.safeLen >= 1:
result = shallowCopy(n)
for i in 0 .. < n.len:
result.sons[i] = canon(n.sons[i])
else:
result = n
case result.getMagic
of someEq, someAdd, someMul, someMin, someMax:
# these are symmetric; put value as last:
if result.sons[1].isValue and not result.sons[2].isValue:
result = swapArgs(result, result.sons[0].sym)
# (4 + foo) + 2 --> (foo + 4) + 2
of someHigh:
# high == len+(-1)
result = opAdd.buildCall(opLen.buildCall(result[1]), minusOne())
of mUnaryMinusI, mUnaryMinusI64:
result = buildCall(opAdd, result[1], newIntNode(nkIntLit, -1))
of someSub:
# x - 4 --> x + (-4)
result = negate(result[1], result[2], result)
of someLen:
result.sons[0] = opLen.newSymNode
else: discard
result = skipConv(result)
result = reassociation(result)
# most important rule: (x-4) < a.len --> x < a.len+4
case result.getMagic
of someLe, someLt:
let x = result[1]
let y = result[2]
if x.kind in nkCallKinds and x.len == 3 and x[2].isValue and
isLetLocation(x[1], true):
case x.getMagic
of someSub:
result = buildCall(result[0].sym, x[1],
reassociation(opAdd.buildCall(y, x[2])))
of someAdd:
# Rule A:
let plus = negate(y, x[2], nil).reassociation
if plus != nil: result = buildCall(result[0].sym, x[1], plus)
else: discard
elif y.kind in nkCallKinds and y.len == 3 and y[2].isValue and
isLetLocation(y[1], true):
# a.len < x-3
case y.getMagic
of someSub:
result = buildCall(result[0].sym, y[1],
reassociation(opAdd.buildCall(x, y[2])))
of someAdd:
let plus = negate(x, y[2], nil).reassociation
# ensure that Rule A will not trigger afterwards with the
# additional 'not isLetLocation' constraint:
if plus != nil and not isLetLocation(x, true):
result = buildCall(result[0].sym, plus, y[1])
else: discard
else: discard
proc `+@`*(a: PNode; b: BiggestInt): PNode =
canon(if b != 0: opAdd.buildCall(a, nkIntLit.newIntNode(b)) else: a)
proc usefulFact(n: PNode): PNode =
case n.getMagic
of someEq:
if skipConv(n.sons[2]).kind == nkNilLit and (
isLetLocation(n.sons[1], false) or isVar(n.sons[1])):
result = buildIsNil(n.sons[1])
result = opIsNil.buildCall(n.sons[1])
else:
if isLetLocation(n.sons[1], true) or isLetLocation(n.sons[2], true):
# XXX algebraic simplifications! 'i-1 < a.len' --> 'i < a.len+1'
@@ -217,7 +358,7 @@ proc addFactNeg*(m: var TModel, n: PNode) =
let n = n.neg
if n != nil: addFact(m, n)
proc sameTree(a, b: PNode): bool =
proc sameTree*(a, b: PNode): bool =
result = false
if a == b:
result = true
@@ -519,7 +660,144 @@ proc doesImply*(facts: TModel, prop: PNode): TImplication =
if result != impUnknown: return
proc impliesNotNil*(facts: TModel, arg: PNode): TImplication =
result = doesImply(facts, buildIsNil(arg).neg)
result = doesImply(facts, opIsNil.buildCall(arg).neg)
proc simpleSlice*(a, b: PNode): BiggestInt =
# returns 'c' if a..b matches (i+c)..(i+c), -1 otherwise. (i)..(i) is matched
# as if it is (i+0)..(i+0).
if guards.sameTree(a, b):
if a.getMagic in someAdd and a[2].kind in {nkCharLit..nkUInt64Lit}:
result = a[2].intVal
else:
result = 0
else:
result = -1
proc pleViaModel(model: TModel; aa, bb: PNode): TImplication
proc ple(m: TModel; a, b: PNode): TImplication =
template `<=?`(a,b): expr = ple(m,a,b) == impYes
# 0 <= 3
if a.isValue and b.isValue:
return if leValue(a, b): impYes else: impNo
# use type information too: x <= 4 iff high(x) <= 4
if b.isValue and a.typ != nil and a.typ.isOrdinalType:
if lastOrd(a.typ) <= b.intVal: return impYes
# 3 <= x iff low(x) <= 3
if a.isValue and b.typ != nil and b.typ.isOrdinalType:
if firstOrd(b.typ) <= a.intVal: return impYes
# x <= x
if sameTree(a, b): return impYes
# 0 <= x.len
if b.getMagic in someLen and a.isValue:
if a.intVal <= 0: return impYes
# x <= y+c if 0 <= c and x <= y
if b.getMagic in someAdd and zero() <=? b[2] and a <=? b[1]: return impYes
# x+c <= y if c <= 0 and x <= y
if a.getMagic in someAdd and a[2] <=? zero() and a[1] <=? b: return impYes
# x <= y*c if 1 <= c and x <= y and 0 <= y
if b.getMagic in someMul:
if a <=? b[1] and one() <=? b[2] and zero() <=? b[1]: return impYes
# x div c <= y if 1 <= c and 0 <= y and x <= y:
if a.getMagic in someDiv:
if one() <=? a[2] and zero() <=? b and a[1] <=? b: return impYes
# slightly subtle:
# x <= max(y, z) iff x <= y or x <= z
# note that 'x <= max(x, z)' is a special case of the above rule
if b.getMagic in someMax:
if a <=? b[1] or a <=? b[2]: return impYes
# min(x, y) <= z iff x <= z or y <= z
if a.getMagic in someMin:
if a[1] <=? b or a[2] <=? b: return impYes
# use the knowledge base:
return pleViaModel(m, a, b)
#return doesImply(m, opLe.buildCall(a, b))
type TReplacements = seq[tuple[a,b: PNode]]
proc replaceSubTree(n, x, by: PNode): PNode =
if sameTree(n, x):
result = by
elif hasSubTree(n, x):
result = shallowCopy(n)
for i in 0 .. safeLen(n)-1:
result.sons[i] = replaceSubTree(n.sons[i], x, by)
else:
result = n
proc applyReplacements(n: PNode; rep: TReplacements): PNode =
result = n
for x in rep: result = result.replaceSubTree(x.a, x.b)
proc pleViaModelRec(m: var TModel; a, b: PNode): TImplication =
# now check for inferrable facts: a <= b and b <= c implies a <= c
for i in 0..m.high:
let fact = m[i]
if fact != nil and fact.getMagic in someLe:
# x <= y implies a <= b if a <= x and y <= b
let x = fact[1]
let y = fact[2]
# mark as used:
m[i] = nil
if ple(m, a, x) == impYes:
if ple(m, y, b) == impYes: return impYes
#if pleViaModelRec(m, y, b): return impYes
# fact: 16 <= i
# x y
# question: i <= 15? no!
result = impliesLe(fact, a, b)
if result != impUnknown: return result
if sameTree(y, a):
result = ple(m, x, b)
if result != impUnknown: return result
proc pleViaModel(model: TModel; aa, bb: PNode): TImplication =
# compute replacements:
var replacements: TReplacements = @[]
for fact in model:
if fact != nil and fact.getMagic in someEq:
let a = fact[1]
let b = fact[2]
if a.kind == nkSym: replacements.add((a,b))
else: replacements.add((b,a))
var m: TModel
var a = aa
var b = bb
if replacements.len > 0:
m = @[]
# make the other facts consistent:
for fact in model:
if fact != nil and fact.getMagic notin someEq:
# XXX 'canon' should not be necessary here, but it is
m.add applyReplacements(fact, replacements).canon
a = applyReplacements(aa, replacements)
b = applyReplacements(bb, replacements)
else:
# we have to make a copy here, because the model will be modified:
m = model
result = pleViaModelRec(m, a, b)
proc proveLe*(m: TModel; a, b: PNode): TImplication =
let x = canon(opLe.buildCall(a, b))
#echo "ROOT ", renderTree(x[1]), " <=? ", renderTree(x[2])
result = ple(m, x[1], x[2])
if result == impUnknown:
# try an alternative: a <= b iff not (b < a) iff not (b+1 <= a):
let y = canon(opLe.buildCall(opAdd.buildCall(b, one()), a))
result = ~ple(m, y[1], y[2])
proc addFactLe*(m: var TModel; a, b: PNode) =
m.add canon(opLe.buildCall(a, b))
proc settype(n: PNode): PType =
result = newType(tySet, n.typ.owner)

View File

@@ -13,6 +13,8 @@ const
genPrefix* = ":tmp" # prefix for generated names
import ast, astalgo, types, idents, magicsys, msgs, options
from guards import createMagic
from trees import getMagic
proc newTupleAccess*(tup: PNode, i: int): PNode =
result = newNodeIT(nkBracketExpr, tup.info, tup.typ.skipTypes(
@@ -68,6 +70,7 @@ proc addField*(obj: PType; s: PSym) =
var field = newSym(skField, getIdent(s.name.s & $s.id), s.owner, s.info)
let t = skipIntLit(s.typ)
field.typ = t
assert t.kind != tyStmt
field.position = sonsLen(obj.n)
addSon(obj.n, newSymNode(field))
@@ -79,19 +82,29 @@ proc newDotExpr(obj, b: PSym): PNode =
addSon(result, newSymNode(field))
result.typ = field.typ
proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode =
proc indirectAccess*(a: PNode, b: string, info: TLineInfo): PNode =
# returns a[].b as a node
var deref = newNodeI(nkHiddenDeref, info)
deref.typ = a.typ.sons[0]
assert deref.typ.kind == tyObject
let field = getSymFromList(deref.typ.n, getIdent(b.name.s & $b.id))
assert field != nil, b.name.s
deref.typ = a.typ.skipTypes(abstractInst).sons[0]
var t = deref.typ
var field: PSym
while true:
assert t.kind == tyObject
field = getSymFromList(t.n, getIdent(b))
if field != nil: break
t = t.sons[0]
if t == nil: break
assert field != nil, b
addSon(deref, a)
result = newNodeI(nkDotExpr, info)
addSon(result, deref)
addSon(result, newSymNode(field))
result.typ = field.typ
proc indirectAccess*(a: PNode, b: PSym, info: TLineInfo): PNode =
# returns a[].b as a node
result = indirectAccess(a, b.name.s & $b.id, info)
proc indirectAccess*(a, b: PSym, info: TLineInfo): PNode =
result = indirectAccess(newSymNode(a), b, info)
@@ -101,6 +114,11 @@ proc genAddrOf*(n: PNode): PNode =
result.typ = newType(tyPtr, n.typ.owner)
result.typ.rawAddSon(n.typ)
proc genDeref*(n: PNode): PNode =
result = newNodeIT(nkHiddenDeref, n.info,
n.typ.skipTypes(abstractInst).sons[0])
result.add n
proc callCodegenProc*(name: string, arg1: PNode;
arg2, arg3: PNode = nil): PNode =
result = newNodeI(nkCall, arg1.info)
@@ -112,13 +130,114 @@ proc callCodegenProc*(name: string, arg1: PNode;
result.add arg1
if arg2 != nil: result.add arg2
if arg3 != nil: result.add arg3
result.typ = sym.typ.sons[0]
# we have 4 cases to consider:
# - a void proc --> nothing to do
# - a proc returning GC'ed memory --> requires a promise
# - a proc returning non GC'ed memory --> pass as hidden 'var' parameter
# - not in a parallel environment --> requires a promise for memory safety
type
TSpawnResult = enum
srVoid, srPromise, srByVar
TPromiseKind = enum
promInvalid # invalid type T for 'Promise[T]'
promGC # Promise of a GC'ed type
promBlob # Promise of a blob type
proc spawnResult(t: PType; inParallel: bool): TSpawnResult =
if t.isEmptyType: srVoid
elif inParallel and not containsGarbageCollectedRef(t): srByVar
else: srPromise
proc promiseKind(t: PType): TPromiseKind =
if t.skipTypes(abstractInst).kind in {tyRef, tyString, tySequence}: promGC
elif containsGarbageCollectedRef(t): promInvalid
else: promBlob
proc addLocalVar(varSection: PNode; owner: PSym; typ: PType; v: PNode): PSym =
result = newSym(skTemp, getIdent(genPrefix), owner, varSection.info)
result.typ = typ
incl(result.flags, sfFromGeneric)
var vpart = newNodeI(nkIdentDefs, varSection.info, 3)
vpart.sons[0] = newSymNode(result)
vpart.sons[1] = ast.emptyNode
vpart.sons[2] = v
varSection.add vpart
discard """
We generate roughly this:
proc f_wrapper(args) =
barrierEnter(args.barrier) # for parallel statement
var a = args.a # thread transfer; deepCopy or shallowCopy or no copy
# depending on whether we're in a 'parallel' statement
var b = args.b
args.prom = nimCreatePromise(thread, sizeof(T)) # optional
nimPromiseCreateCondVar(args.prom) # optional
nimArgsPassingDone() # signal parent that the work is done
#
args.prom.blob = f(a, b, ...)
nimPromiseSignal(args.prom)
# - or -
f(a, b, ...)
barrierLeave(args.barrier) # for parallel statement
stmtList:
var scratchObj
scratchObj.a = a
scratchObj.b = b
nimSpawn(f_wrapper, addr scratchObj)
scratchObj.prom # optional
"""
proc createNimCreatePromiseCall(prom, threadParam: PNode): PNode =
let size = newNodeIT(nkCall, prom.info, getSysType(tyInt))
size.add newSymNode(createMagic("sizeof", mSizeOf))
assert prom.typ.kind == tyGenericInst
size.add newNodeIT(nkType, prom.info, prom.typ.sons[1])
let castExpr = newNodeIT(nkCast, prom.info, prom.typ)
castExpr.add emptyNode
castExpr.add callCodeGenProc("nimCreatePromise", threadParam, size)
result = newFastAsgnStmt(prom, castExpr)
proc createWrapperProc(f: PNode; threadParam, argsParam: PSym;
varSection, call: PNode): PSym =
varSection, call, barrier, prom: PNode;
spawnKind: TSpawnResult): PSym =
var body = newNodeI(nkStmtList, f.info)
if barrier != nil:
body.add callCodeGenProc("barrierEnter", barrier)
var threadLocalProm: PSym
if spawnKind == srByVar:
threadLocalProm = addLocalVar(varSection, argsParam.owner, prom.typ, prom)
body.add varSection
body.add callCodeGenProc("nimArgsPassingDone", newSymNode(threadParam))
body.add call
if prom != nil and spawnKind != srByVar:
body.add createNimCreatePromiseCall(prom, threadParam.newSymNode)
if barrier == nil:
body.add callCodeGenProc("nimPromiseCreateCondVar", prom)
body.add callCodeGenProc("nimArgsPassingDone", threadParam.newSymNode)
if spawnKind == srByVar:
body.add newAsgnStmt(genDeref(threadLocalProm.newSymNode), call)
elif prom != nil:
let fk = prom.typ.sons[1].promiseKind
if fk == promInvalid:
localError(f.info, "cannot create a promise of type: " &
typeToString(prom.typ.sons[1]))
body.add newAsgnStmt(indirectAccess(prom,
if fk == promGC: "data" else: "blob", prom.info), call)
if barrier == nil:
body.add callCodeGenProc("nimPromiseSignal", prom)
else:
body.add call
if barrier != nil:
body.add callCodeGenProc("barrierLeave", barrier)
var params = newNodeI(nkFormalParams, f.info)
params.add emptyNode
@@ -146,10 +265,151 @@ proc createCastExpr(argsParam: PSym; objType: PType): PNode =
result.typ = newType(tyPtr, objType.owner)
result.typ.rawAddSon(objType)
proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
result = newNodeI(nkStmtList, n.info)
if n.kind notin nkCallKinds or not n.typ.isEmptyType:
localError(n.info, "'spawn' takes a call expression of type void")
proc setupArgsForConcurrency(n: PNode; objType: PType; scratchObj: PSym,
castExpr, call, varSection, result: PNode) =
let formals = n[0].typ.n
let tmpName = getIdent(genPrefix)
for i in 1 .. <n.len:
# we pick n's type here, which hopefully is 'tyArray' and not
# 'tyOpenArray':
var argType = n[i].typ.skipTypes(abstractInst)
if i < formals.len and formals[i].typ.kind == tyVar:
localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter")
elif containsTyRef(argType):
localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure")
let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
var field = newSym(skField, fieldname, objType.owner, n.info)
field.typ = argType
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i])
let temp = addLocalVar(varSection, objType.owner, argType,
indirectAccess(castExpr, field, n.info))
call.add(newSymNode(temp))
proc getRoot*(n: PNode): PSym =
## ``getRoot`` takes a *path* ``n``. A path is an lvalue expression
## like ``obj.x[i].y``. The *root* of a path is the symbol that can be
## determined as the owner; ``obj`` in the example.
case n.kind
of nkSym:
if n.sym.kind in {skVar, skResult, skTemp, skLet, skForVar}:
result = n.sym
of nkDotExpr, nkBracketExpr, nkHiddenDeref, nkDerefExpr,
nkObjUpConv, nkObjDownConv, nkCheckedFieldExpr:
result = getRoot(n.sons[0])
of nkHiddenStdConv, nkHiddenSubConv, nkConv:
result = getRoot(n.sons[1])
of nkCallKinds:
if getMagic(n) == mSlice: result = getRoot(n.sons[1])
else: discard
proc newIntLit(value: BiggestInt): PNode =
result = nkIntLit.newIntNode(value)
result.typ = getSysType(tyInt)
proc genHigh(n: PNode): PNode =
if skipTypes(n.typ, abstractVar).kind in {tyArrayConstr, tyArray}:
result = newIntLit(lastOrd(skipTypes(n.typ, abstractVar)))
else:
result = newNodeI(nkCall, n.info, 2)
result.typ = getSysType(tyInt)
result.sons[0] = newSymNode(createMagic("high", mHigh))
result.sons[1] = n
proc setupArgsForParallelism(n: PNode; objType: PType; scratchObj: PSym;
castExpr, call, varSection, result: PNode) =
let formals = n[0].typ.n
let tmpName = getIdent(genPrefix)
# we need to copy the foreign scratch object fields into local variables
# for correctness: These are called 'threadLocal' here.
for i in 1 .. <n.len:
let n = n[i]
let argType = skipTypes(if i < formals.len: formals[i].typ else: n.typ,
abstractInst)
if containsTyRef(argType):
localError(n.info, "'spawn'ed function cannot refer to 'ref'/closure")
let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
var field = newSym(skField, fieldname, objType.owner, n.info)
if argType.kind in {tyVarargs, tyOpenArray}:
# important special case: we always create a zero-copy slice:
let slice = newNodeI(nkCall, n.info, 4)
slice.typ = n.typ
slice.sons[0] = newSymNode(createMagic("slice", mSlice))
var fieldB = newSym(skField, tmpName, objType.owner, n.info)
fieldB.typ = getSysType(tyInt)
objType.addField(fieldB)
if getMagic(n) == mSlice:
let a = genAddrOf(n[1])
field.typ = a.typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
var fieldA = newSym(skField, tmpName, objType.owner, n.info)
fieldA.typ = getSysType(tyInt)
objType.addField(fieldA)
result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldA), n[2])
result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), n[3])
let threadLocal = addLocalVar(varSection, objType.owner, fieldA.typ,
indirectAccess(castExpr, fieldA, n.info))
slice.sons[2] = threadLocal.newSymNode
else:
let a = genAddrOf(n)
field.typ = a.typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
result.add newFastAsgnStmt(newDotExpr(scratchObj, fieldB), genHigh(n))
slice.sons[2] = newIntLit(0)
# the array itself does not need to go through a thread local variable:
slice.sons[1] = genDeref(indirectAccess(castExpr, field, n.info))
let threadLocal = addLocalVar(varSection, objType.owner, fieldB.typ,
indirectAccess(castExpr, fieldB, n.info))
slice.sons[3] = threadLocal.newSymNode
call.add slice
elif (let size = computeSize(argType); size < 0 or size > 16) and
n.getRoot != nil:
# it is more efficient to pass a pointer instead:
let a = genAddrOf(n)
field.typ = a.typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), a)
let threadLocal = addLocalVar(varSection, objType.owner, field.typ,
indirectAccess(castExpr, field, n.info))
call.add(genDeref(threadLocal.newSymNode))
else:
# boring case
field.typ = argType
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n)
let threadLocal = addLocalVar(varSection, objType.owner, field.typ,
indirectAccess(castExpr, field, n.info))
call.add(threadLocal.newSymNode)
proc wrapProcForSpawn*(owner: PSym; n: PNode; retType: PType;
barrier, dest: PNode = nil): PNode =
# if 'barrier' != nil, then it is in a 'parallel' section and we
# generate quite different code
let spawnKind = spawnResult(retType, barrier!=nil)
case spawnKind
of srVoid:
internalAssert dest == nil
result = newNodeI(nkStmtList, n.info)
of srPromise:
internalAssert dest == nil
result = newNodeIT(nkStmtListExpr, n.info, retType)
of srByVar:
if dest == nil: localError(n.info, "'spawn' must not be discarded")
result = newNodeI(nkStmtList, n.info)
if n.kind notin nkCallKinds:
localError(n.info, "'spawn' takes a call expression")
return
if optThreadAnalysis in gGlobalOptions:
if {tfThread, tfNoSideEffect} * n[0].typ.flags == {}:
@@ -162,6 +422,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
threadParam.typ = ptrType
argsParam.typ = ptrType
argsParam.position = 1
var objType = createObj(owner, n.info)
incl(objType.flags, tfFinal)
let castExpr = createCastExpr(argsParam, objType)
@@ -174,7 +435,7 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
varSectionB.addVar(scratchObj.newSymNode)
result.add varSectionB
var call = newNodeI(nkCall, n.info)
var call = newNodeIT(nkCall, n.info, n.typ)
var fn = n.sons[0]
# templates and macros are in fact valid here due to the nature of
# the transformation:
@@ -194,35 +455,39 @@ proc wrapProcForSpawn*(owner: PSym; n: PNode): PNode =
call.add(fn)
var varSection = newNodeI(nkVarSection, n.info)
let formals = n[0].typ.n
let tmpName = getIdent(genPrefix)
for i in 1 .. <n.len:
# we pick n's type here, which hopefully is 'tyArray' and not
# 'tyOpenArray':
var argType = n[i].typ.skipTypes(abstractInst)
if i < formals.len and formals[i].typ.kind == tyVar:
localError(n[i].info, "'spawn'ed function cannot have a 'var' parameter")
elif containsTyRef(argType):
localError(n[i].info, "'spawn'ed function cannot refer to 'ref'/closure")
if barrier.isNil:
setupArgsForConcurrency(n, objType, scratchObj, castExpr, call, varSection, result)
else:
setupArgsForParallelism(n, objType, scratchObj, castExpr, call, varSection, result)
let fieldname = if i < formals.len: formals[i].sym.name else: tmpName
var field = newSym(skField, fieldname, owner, n.info)
field.typ = argType
var barrierAsExpr: PNode = nil
if barrier != nil:
let typ = newType(tyPtr, owner)
typ.rawAddSon(magicsys.getCompilerProc("Barrier").typ)
var field = newSym(skField, getIdent"barrier", owner, n.info)
field.typ = typ
objType.addField(field)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), n[i])
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), barrier)
barrierAsExpr = indirectAccess(castExpr, field, n.info)
var temp = newSym(skTemp, tmpName, owner, n.info)
temp.typ = argType
incl(temp.flags, sfFromGeneric)
var promField, promAsExpr: PNode = nil
if spawnKind == srPromise:
var field = newSym(skField, getIdent"prom", owner, n.info)
field.typ = retType
objType.addField(field)
promField = newDotExpr(scratchObj, field)
promAsExpr = indirectAccess(castExpr, field, n.info)
elif spawnKind == srByVar:
var field = newSym(skField, getIdent"prom", owner, n.info)
field.typ = newType(tyPtr, objType.owner)
field.typ.rawAddSon(retType)
objType.addField(field)
promAsExpr = indirectAccess(castExpr, field, n.info)
result.add newFastAsgnStmt(newDotExpr(scratchObj, field), genAddrOf(dest))
var vpart = newNodeI(nkIdentDefs, n.info, 3)
vpart.sons[0] = newSymNode(temp)
vpart.sons[1] = ast.emptyNode
vpart.sons[2] = indirectAccess(castExpr, field, n.info)
varSection.add vpart
call.add(newSymNode(temp))
let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call)
let wrapper = createWrapperProc(fn, threadParam, argsParam, varSection, call,
barrierAsExpr, promAsExpr, spawnKind)
result.add callCodeGenProc("nimSpawn", wrapper.newSymNode,
genAddrOf(scratchObj.newSymNode))
if spawnKind == srPromise: result.add promField

View File

@@ -15,7 +15,8 @@ import
magicsys, parser, nversion, nimsets, semfold, importer,
procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch,
intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting,
evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity
evaltempl, patterns, parampatterns, sempass2, pretty, semmacrosanity,
semparallel
# implementation

View File

@@ -91,6 +91,7 @@ type
generics*: seq[TInstantiationPair] # pending list of instantiated generics to compile
lastGenericIdx*: int # used for the generics stack
hloLoopDetector*: int # used to prevent endless loops in the HLO
inParallelStmt*: int
proc makeInstPair*(s: PSym, inst: PInstantiation): TInstantiationPair =
result.genericSym = s

View File

@@ -1394,11 +1394,6 @@ proc semDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PNode =
result.info = n.info
result.typ = getSysType(tyBool)
proc setMs(n: PNode, s: PSym): PNode =
result = n
n.sons[0] = newSymNode(s)
n.sons[0].info = n.info
proc expectMacroOrTemplateCall(c: PContext, n: PNode): PSym =
## The argument to the proc should be nkCall(...) or similar
## Returns the macro/template symbol
@@ -1590,6 +1585,17 @@ proc semShallowCopy(c: PContext, n: PNode, flags: TExprFlags): PNode =
else:
result = semDirectOp(c, n, flags)
proc createPromise(c: PContext; t: PType; info: TLineInfo): PType =
result = newType(tyGenericInvokation, c.module)
addSonSkipIntLit(result, magicsys.getCompilerProc("Promise").typ)
addSonSkipIntLit(result, t)
result = instGenericContainer(c, info, result, allowMetaTypes = false)
proc setMs(n: PNode, s: PSym): PNode =
result = n
n.sons[0] = newSymNode(s)
n.sons[0].info = n.info
proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode =
# this is a hotspot in the compiler!
# DON'T forget to update ast.SpecialSemMagics if you add a magic here!
@@ -1611,6 +1617,21 @@ proc semMagic(c: PContext, n: PNode, s: PSym, flags: TExprFlags): PNode =
checkSonsLen(n, 2)
result = newStrNodeT(renderTree(n[1], {renderNoComments}), n)
result.typ = getSysType(tyString)
of mParallel:
result = setMs(n, s)
var x = n.lastSon
if x.kind == nkDo: x = x.sons[bodyPos]
inc c.inParallelStmt
result.sons[1] = semStmt(c, x)
dec c.inParallelStmt
of mSpawn:
result = setMs(n, s)
result.sons[1] = semExpr(c, n.sons[1])
if not result[1].typ.isEmptyType:
if c.inParallelStmt > 0:
result.typ = result[1].typ
else:
result.typ = createPromise(c, result[1].typ, n.info)
else: result = semDirectOp(c, n, flags)
proc semWhen(c: PContext, n: PNode, semCheck = true): PNode =

View File

@@ -1,7 +1,7 @@
#
#
# The Nimrod Compiler
# (c) Copyright 2013 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
@@ -131,4 +131,3 @@ proc magicsAfterOverloadResolution(c: PContext, n: PNode,
of mNBindSym: result = semBindSym(c, n)
of mLocals: result = semLocals(c, n)
else: result = n

465
compiler/semparallel.nim Normal file
View File

@@ -0,0 +1,465 @@
#
#
# The Nimrod Compiler
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Semantic checking for 'parallel'.
# - codegen needs to support mSlice (+)
# - lowerings must not perform unnecessary copies (+)
# - slices should become "nocopy" to openArray (+)
# - need to perform bound checks (+)
#
# - parallel needs to insert a barrier (+)
# - passed arguments need to be ensured to be "const"
# - what about 'f(a)'? --> f shouldn't have side effects anyway
# - passed arrays need to be ensured not to alias
# - passed slices need to be ensured to be disjoint (+)
# - output slices need special logic (+)
import
ast, astalgo, idents, lowerings, magicsys, guards, sempass2, msgs,
renderer
from trees import getMagic
from strutils import `%`
discard """
one major problem:
spawn f(a[i])
inc i
spawn f(a[i])
is valid, but
spawn f(a[i])
spawn f(a[i])
inc i
is not! However,
spawn f(a[i])
if guard: inc i
spawn f(a[i])
is not valid either! --> We need a flow dependent analysis here.
However:
while foo:
spawn f(a[i])
inc i
spawn f(a[i])
Is not valid either! --> We should really restrict 'inc' to loop endings?
The heuristic that we implement here (that has no false positives) is: Usage
of 'i' in a slice *after* we determined the stride is invalid!
"""
type
TDirection = enum
ascending, descending
MonotonicVar = object
v, alias: PSym # to support the ordinary 'countup' iterator
# we need to detect aliases
lower, upper, stride: PNode
dir: TDirection
blacklisted: bool # blacklisted variables that are not monotonic
AnalysisCtx = object
locals: seq[MonotonicVar]
slices: seq[tuple[x,a,b: PNode, spawnId: int, inLoop: bool]]
guards: TModel # nested guards
args: seq[PSym] # args must be deeply immutable
spawns: int # we can check that at last 1 spawn is used in
# the 'parallel' section
currentSpawnId: int
inLoop: int
let opSlice = createMagic("slice", mSlice)
proc initAnalysisCtx(): AnalysisCtx =
result.locals = @[]
result.slices = @[]
result.args = @[]
result.guards = @[]
proc lookupSlot(c: AnalysisCtx; s: PSym): int =
for i in 0.. <c.locals.len:
if c.locals[i].v == s or c.locals[i].alias == s: return i
return -1
proc getSlot(c: var AnalysisCtx; v: PSym): ptr MonotonicVar =
let s = lookupSlot(c, v)
if s >= 0: return addr(c.locals[s])
let L = c.locals.len
c.locals.setLen(L+1)
c.locals[L].v = v
return addr(c.locals[L])
proc gatherArgs(c: var AnalysisCtx; n: PNode) =
for i in 0.. <n.safeLen:
let root = getRoot n[i]
if root != nil:
block addRoot:
for r in items(c.args):
if r == root: break addRoot
c.args.add root
gatherArgs(c, n[i])
proc isSingleAssignable(n: PNode): bool =
n.kind == nkSym and (let s = n.sym;
s.kind in {skTemp, skForVar, skLet} and
{sfAddrTaken, sfGlobal} * s.flags == {})
proc isLocal(n: PNode): bool =
n.kind == nkSym and (let s = n.sym;
s.kind in {skResult, skTemp, skForVar, skVar, skLet} and
{sfAddrTaken, sfGlobal} * s.flags == {})
proc checkLocal(c: AnalysisCtx; n: PNode) =
if isLocal(n):
let s = c.lookupSlot(n.sym)
if s >= 0 and c.locals[s].stride != nil:
localError(n.info, "invalid usage of counter after increment")
else:
for i in 0 .. <n.safeLen: checkLocal(c, n.sons[i])
template `?`(x): expr = x.renderTree
proc checkLe(c: AnalysisCtx; a, b: PNode) =
case proveLe(c.guards, a, b)
of impUnknown:
localError(a.info, "cannot prove: " & ?a & " <= " & ?b)
of impYes: discard
of impNo:
localError(a.info, "can prove: " & ?a & " > " & ?b)
proc checkBounds(c: AnalysisCtx; arr, idx: PNode) =
checkLe(c, arr.lowBound, idx)
checkLe(c, idx, arr.highBound)
proc addLowerBoundAsFacts(c: var AnalysisCtx) =
for v in c.locals:
if not v.blacklisted:
c.guards.addFactLe(v.lower, newSymNode(v.v))
proc addSlice(c: var AnalysisCtx; n: PNode; x, le, ri: PNode) =
checkLocal(c, n)
let le = le.canon
let ri = ri.canon
# perform static bounds checking here; and not later!
let oldState = c.guards.len
addLowerBoundAsFacts(c)
c.checkBounds(x, le)
c.checkBounds(x, ri)
c.guards.setLen(oldState)
c.slices.add((x, le, ri, c.currentSpawnId, c.inLoop > 0))
proc overlap(m: TModel; x,y,c,d: PNode) =
# X..Y and C..D overlap iff (X <= D and C <= Y)
case proveLe(m, x, d)
of impUnknown:
localError(x.info,
"cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" %
[?x, ?d, ?x, ?y, ?c, ?d])
of impYes:
case proveLe(m, c, y)
of impUnknown:
localError(x.info,
"cannot prove: $# > $#; required for ($#)..($#) disjoint from ($#)..($#)" %
[?c, ?y, ?x, ?y, ?c, ?d])
of impYes:
localError(x.info, "($#)..($#) not disjoint from ($#)..($#)" % [?x, ?y, ?c, ?d])
of impNo: discard
of impNo: discard
proc stride(c: AnalysisCtx; n: PNode): BiggestInt =
if isLocal(n):
let s = c.lookupSlot(n.sym)
if s >= 0 and c.locals[s].stride != nil:
result = c.locals[s].stride.intVal
else:
for i in 0 .. <n.safeLen: result += stride(c, n.sons[i])
proc subStride(c: AnalysisCtx; n: PNode): PNode =
# substitute with stride:
if isLocal(n):
let s = c.lookupSlot(n.sym)
if s >= 0 and c.locals[s].stride != nil:
result = n +@ c.locals[s].stride.intVal
else:
result = n
elif n.safeLen > 0:
result = shallowCopy(n)
for i in 0 .. <n.len: result.sons[i] = subStride(c, n.sons[i])
else:
result = n
proc checkSlicesAreDisjoint(c: var AnalysisCtx) =
# this is the only thing that we need to perform after we have traversed
# the whole tree so that the strides are available.
# First we need to add all the computed lower bounds:
addLowerBoundAsFacts(c)
# Every slice used in a loop needs to be disjoint with itself:
for x,a,b,id,inLoop in items(c.slices):
if inLoop: overlap(c.guards, a,b, c.subStride(a), c.subStride(b))
# Another tricky example is:
# while true:
# spawn f(a[i])
# spawn f(a[i+1])
# inc i # inc i, 2 would be correct here
#
# Or even worse:
# while true:
# spawn f(a[i+1 .. i+3])
# spawn f(a[i+4 .. i+5])
# inc i, 4
# Prove that i*k*stride + 3 != i*k'*stride + 5
# For the correct example this amounts to
# i*k*2 != i*k'*2 + 1
# which is true.
# For now, we don't try to prove things like that at all, even though it'd
# be feasible for many useful examples. Instead we attach the slice to
# a spawn and if the attached spawns differ, we bail out:
for i in 0 .. high(c.slices):
for j in i+1 .. high(c.slices):
let x = c.slices[i]
let y = c.slices[j]
if x.spawnId != y.spawnId and guards.sameTree(x.x, y.x):
if not x.inLoop or not y.inLoop:
# XXX strictly speaking, 'or' is not correct here and it needs to
# be 'and'. However this prevents too many obviously correct programs
# like f(a[0..x]); for i in x+1 .. a.high: f(a[i])
overlap(c.guards, x.a, x.b, y.a, y.b)
elif (let k = simpleSlice(x.a, x.b); let m = simpleSlice(y.a, y.b);
k >= 0 and m >= 0):
# ah I cannot resist the temptation and add another sweet heuristic:
# if both slices have the form (i+k)..(i+k) and (i+m)..(i+m) we
# check they are disjoint and k < stride and m < stride:
overlap(c.guards, x.a, x.b, y.a, y.b)
let stride = min(c.stride(x.a), c.stride(y.a))
if k < stride and m < stride:
discard
else:
localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" %
[?x.a, ?x.b, ?y.a, ?y.b])
else:
localError(x.x.info, "cannot prove ($#)..($#) disjoint from ($#)..($#)" %
[?x.a, ?x.b, ?y.a, ?y.b])
proc analyse(c: var AnalysisCtx; n: PNode)
proc analyseSons(c: var AnalysisCtx; n: PNode) =
for i in 0 .. <safeLen(n): analyse(c, n[i])
proc min(a, b: PNode): PNode =
if a.isNil: result = b
elif a.intVal < b.intVal: result = a
else: result = b
proc fromSystem(op: PSym): bool = sfSystemModule in getModule(op).flags
proc analyseCall(c: var AnalysisCtx; n: PNode; op: PSym) =
if op.magic == mSpawn:
inc c.spawns
let oldSpawnId = c.currentSpawnId
c.currentSpawnId = c.spawns
gatherArgs(c, n[1])
analyseSons(c, n)
c.currentSpawnId = oldSpawnId
elif op.magic == mInc or (op.name.s == "+=" and op.fromSystem):
if n[1].isLocal:
let incr = n[2].skipConv
if incr.kind in {nkCharLit..nkUInt32Lit} and incr.intVal > 0:
let slot = c.getSlot(n[1].sym)
slot.stride = min(slot.stride, incr)
analyseSons(c, n)
elif op.name.s == "[]" and op.fromSystem:
c.addSlice(n, n[1], n[2][1], n[2][2])
analyseSons(c, n)
elif op.name.s == "[]=" and op.fromSystem:
c.addSlice(n, n[1], n[2][1], n[2][2])
analyseSons(c, n)
else:
analyseSons(c, n)
proc analyseCase(c: var AnalysisCtx; n: PNode) =
analyse(c, n.sons[0])
let oldFacts = c.guards.len
for i in 1.. <n.len:
let branch = n.sons[i]
setLen(c.guards, oldFacts)
addCaseBranchFacts(c.guards, n, i)
for i in 0 .. <branch.len:
analyse(c, branch.sons[i])
setLen(c.guards, oldFacts)
proc analyseIf(c: var AnalysisCtx; n: PNode) =
analyse(c, n.sons[0].sons[0])
let oldFacts = c.guards.len
addFact(c.guards, canon(n.sons[0].sons[0]))
analyse(c, n.sons[0].sons[1])
for i in 1.. <n.len:
let branch = n.sons[i]
setLen(c.guards, oldFacts)
for j in 0..i-1:
addFactNeg(c.guards, canon(n.sons[j].sons[0]))
if branch.len > 1:
addFact(c.guards, canon(branch.sons[0]))
for i in 0 .. <branch.len:
analyse(c, branch.sons[i])
setLen(c.guards, oldFacts)
proc analyse(c: var AnalysisCtx; n: PNode) =
case n.kind
of nkAsgn, nkFastAsgn:
if n[0].isSingleAssignable and n[1].isLocal:
let slot = c.getSlot(n[1].sym)
slot.alias = n[0].sym
elif n[0].isLocal:
# since we already ensure sfAddrTaken is not in s.flags, we only need to
# prevent direct assignments to the monotonic variable:
let slot = c.getSlot(n[0].sym)
slot.blackListed = true
invalidateFacts(c.guards, n[0])
analyseSons(c, n)
addAsgnFact(c.guards, n[0], n[1])
of nkCallKinds:
# direct call:
if n[0].kind == nkSym: analyseCall(c, n, n[0].sym)
else: analyseSons(c, n)
of nkBracketExpr:
c.addSlice(n, n[0], n[1], n[1])
analyseSons(c, n)
of nkReturnStmt, nkRaiseStmt, nkTryStmt:
localError(n.info, "invalid control flow for 'parallel'")
# 'break' that leaves the 'parallel' section is not valid either
# or maybe we should generate a 'try' XXX
of nkVarSection:
for it in n:
let value = it.lastSon
if value.kind != nkEmpty:
for j in 0 .. it.len-3:
if it[j].isLocal:
let slot = c.getSlot(it[j].sym)
if slot.lower.isNil: slot.lower = value
else: internalError(it.info, "slot already has a lower bound")
analyse(c, value)
of nkCaseStmt: analyseCase(c, n)
of nkIfStmt, nkIfExpr: analyseIf(c, n)
of nkWhileStmt:
analyse(c, n.sons[0])
# 'while true' loop?
inc c.inLoop
if isTrue(n.sons[0]):
analyseSons(c, n.sons[1])
else:
# loop may never execute:
let oldState = c.locals.len
let oldFacts = c.guards.len
addFact(c.guards, canon(n.sons[0]))
analyse(c, n.sons[1])
setLen(c.locals, oldState)
setLen(c.guards, oldFacts)
# we know after the loop the negation holds:
if not hasSubnodeWith(n.sons[1], nkBreakStmt):
addFactNeg(c.guards, canon(n.sons[0]))
dec c.inLoop
of nkTypeSection, nkProcDef, nkConverterDef, nkMethodDef, nkIteratorDef,
nkMacroDef, nkTemplateDef, nkConstSection, nkPragma:
discard
else:
analyseSons(c, n)
proc transformSlices(n: PNode): PNode =
if n.kind in nkCallKinds and n[0].kind == nkSym:
let op = n[0].sym
if op.name.s == "[]" and op.fromSystem:
result = copyNode(n)
result.add opSlice.newSymNode
result.add n[1]
result.add n[2][1]
result.add n[2][2]
return result
if n.safeLen > 0:
result = shallowCopy(n)
for i in 0 .. < n.len:
result.sons[i] = transformSlices(n.sons[i])
else:
result = n
proc transformSpawn(owner: PSym; n, barrier: PNode): PNode
proc transformSpawnSons(owner: PSym; n, barrier: PNode): PNode =
result = shallowCopy(n)
for i in 0 .. < n.len:
result.sons[i] = transformSpawn(owner, n.sons[i], barrier)
proc transformSpawn(owner: PSym; n, barrier: PNode): PNode =
case n.kind
of nkVarSection:
result = nil
for it in n:
let b = it.lastSon
if getMagic(b) == mSpawn:
if it.len != 3: localError(it.info, "invalid context for 'spawn'")
let m = transformSlices(b)
if result.isNil:
result = newNodeI(nkStmtList, n.info)
result.add n
result.add wrapProcForSpawn(owner, m[1], b.typ, barrier, it[0])
it.sons[it.len-1] = emptyNode
if result.isNil: result = n
of nkAsgn, nkFastAsgn:
let b = n[1]
if getMagic(b) == mSpawn:
let m = transformSlices(b)
return wrapProcForSpawn(owner, m[1], b.typ, barrier, n[0])
result = transformSpawnSons(owner, n, barrier)
of nkCallKinds:
if getMagic(n) == mSpawn:
result = transformSlices(n)
return wrapProcForSpawn(owner, result[1], n.typ, barrier, nil)
result = transformSpawnSons(owner, n, barrier)
elif n.safeLen > 0:
result = transformSpawnSons(owner, n, barrier)
else:
result = n
proc checkArgs(a: var AnalysisCtx; n: PNode) =
discard "too implement"
proc generateAliasChecks(a: AnalysisCtx; result: PNode) =
discard "too implement"
proc liftParallel*(owner: PSym; n: PNode): PNode =
# this needs to be called after the 'for' loop elimination
# first pass:
# - detect monotonic local integer variables
# - detect used slices
# - detect used arguments
#echo "PAR ", renderTree(n)
var a = initAnalysisCtx()
let body = n.lastSon
analyse(a, body)
if a.spawns == 0:
localError(n.info, "'parallel' section without 'spawn'")
checkSlicesAreDisjoint(a)
checkArgs(a, body)
var varSection = newNodeI(nkVarSection, n.info)
var temp = newSym(skTemp, getIdent"barrier", owner, n.info)
temp.typ = magicsys.getCompilerProc("Barrier").typ
incl(temp.flags, sfFromGeneric)
let tempNode = newSymNode(temp)
varSection.addVar tempNode
let barrier = genAddrOf(tempNode)
result = newNodeI(nkStmtList, n.info)
generateAliasChecks(a, result)
result.add varSection
result.add callCodeGenProc("openBarrier", barrier)
result.add transformSpawn(owner, body, barrier)
result.add callCodeGenProc("closeBarrier", barrier)

View File

@@ -89,7 +89,7 @@ proc initVarViaNew(a: PEffects, n: PNode) =
if n.kind != nkSym: return
let s = n.sym
if {tfNeedsInit, tfNotNil} * s.typ.flags <= {tfNotNil}:
# 'x' is not nil, but that doesn't mean it's not nil children
# 'x' is not nil, but that doesn't mean its "not nil" children
# are initialized:
initVar(a, n)
@@ -478,7 +478,7 @@ proc trackBlock(tracked: PEffects, n: PNode) =
else:
track(tracked, n)
proc isTrue(n: PNode): bool =
proc isTrue*(n: PNode): bool =
n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or
n.kind == nkIntLit and n.intVal != 0

View File

@@ -1084,8 +1084,10 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType =
of nkCallKinds:
if isRange(n):
result = semRangeAux(c, n, prev)
elif n[0].kind == nkIdent:
let op = n.sons[0].ident
elif n[0].kind notin nkIdentKinds:
result = semTypeExpr(c, n)
else:
let op = considerAcc(n.sons[0])
if op.id in {ord(wAnd), ord(wOr)} or op.s == "|":
checkSonsLen(n, 3)
var
@@ -1120,8 +1122,6 @@ proc semTypeNode(c: PContext, n: PNode, prev: PType): PType =
result = semAnyRef(c, n, tyRef, prev)
else:
result = semTypeExpr(c, n)
else:
result = semTypeExpr(c, n)
of nkWhenStmt:
var whenResult = semWhen(c, n, false)
if whenResult.kind == nkStmtList: whenResult.kind = nkStmtListType

View File

@@ -131,8 +131,9 @@ proc createStrKeepNode(x: var TFullReg) =
nfAllConst in x.node.flags:
# XXX this is hacky; tests/txmlgen triggers it:
x.node = newNode(nkStrLit)
# debug x.node
#assert x.node.kind in {nkStrLit..nkTripleStrLit}
# It not only hackey, it is also wrong for tgentemplate. The primary
# cause of bugs like these is that the VM does not properly distinguish
# between variable defintions (var foo = e) and variable updates (foo = e).
template createStr(x) =
x.node = newNode(nkStrLit)

View File

@@ -16,6 +16,7 @@ arm.linux.gcc.linkerexe = "arm-linux-gcc"
path="$lib/core"
path="$lib/pure"
path="$lib/pure/collections"
path="$lib/pure/concurrency"
path="$lib/impure"
path="$lib/wrappers"
# path="$lib/wrappers/cairo"

View File

@@ -2823,7 +2823,7 @@ The following builtin procs cannot be overloaded for reasons of implementation
simplicity (they require specialized semantic checking)::
defined, definedInScope, compiles, low, high, sizeOf,
is, of, echo, shallowCopy, getAst
is, of, echo, shallowCopy, getAst, spawn
Thus they act more like keywords than like ordinary identifiers; unlike a
keyword however, a redefinition may `shadow`:idx: the definition in

View File

@@ -0,0 +1,58 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements procs to determine the number of CPUs / cores.
include "system/inclrtl"
import strutils, os
when not defined(windows):
import posix
when defined(linux):
import linux
when defined(macosx) or defined(bsd):
const
CTL_HW = 6
HW_AVAILCPU = 25
HW_NCPU = 3
proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
a: var csize, b: pointer, c: int): cint {.
importc: "sysctl", header: "<sys/sysctl.h>".}
proc countProcessors*(): int {.rtl, extern: "ncpi$1".} =
## returns the numer of the processors/cores the machine has.
## Returns 0 if it cannot be detected.
when defined(windows):
var x = getEnv("NUMBER_OF_PROCESSORS")
if x.len > 0: result = parseInt(x.string)
elif defined(macosx) or defined(bsd):
var
mib: array[0..3, cint]
numCPU: int
len: csize
mib[0] = CTL_HW
mib[1] = HW_AVAILCPU
len = sizeof(numCPU)
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
if numCPU < 1:
mib[1] = HW_NCPU
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
result = numCPU
elif defined(hpux):
result = mpctl(MPC_GETNUMSPUS, nil, nil)
elif defined(irix):
var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
result = sysconf(SC_NPROC_ONLN)
else:
result = sysconf(SC_NPROCESSORS_ONLN)
if result <= 0: result = 1

View File

@@ -0,0 +1,96 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements a helper for a thread pool to determine whether
## creating a thread is a good idea.
when defined(windows):
import winlean, os, strutils, math
proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime
elif defined(linux):
from cpuinfo import countProcessors
type
ThreadPoolAdvice* = enum
doNothing,
doCreateThread, # create additional thread for throughput
doShutdownThread # too many threads are busy, shutdown one
ThreadPoolState* = object
when defined(windows):
prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME
calls*: int
proc advice*(s: var ThreadPoolState): ThreadPoolAdvice =
when defined(windows):
var
sysIdle, sysKernel, sysUser,
procCreation, procExit, procKernel, procUser: TFILETIME
if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or
getProcessTimes(THandle(-1), procCreation, procExit,
procKernel, procUser) == 0:
return doNothing
if s.calls > 0:
let
sysKernelDiff = sysKernel - s.prevSysKernel
sysUserDiff = sysUser - s.prevSysUser
procKernelDiff = procKernel - s.prevProcKernel
procUserDiff = procUser - s.prevProcUser
sysTotal = int(sysKernelDiff + sysUserDiff)
procTotal = int(procKernelDiff + procUserDiff)
# total CPU usage < 85% --> create a new worker thread.
# Measurements show that 100% and often even 90% is not reached even
# if all my cores are busy.
if sysTotal == 0 or procTotal / sysTotal < 0.85:
result = doCreateThread
s.prevSysKernel = sysKernel
s.prevSysUser = sysUser
s.prevProcKernel = procKernel
s.prevProcUser = procUser
elif defined(linux):
proc fscanf(c: TFile, frmt: cstring) {.varargs, importc,
header: "<stdio.h>".}
var f = open("/proc/loadavg")
var b: float
var busy, total: int
fscanf(f,"%lf %lf %lf %ld/%ld",
addr b, addr b, addr b, addr busy, addr total)
f.close()
let cpus = countProcessors()
if busy-1 < cpus:
result = doCreateThread
elif busy-1 >= cpus*2:
result = doShutdownThread
else:
result = doNothing
else:
# XXX implement this for other OSes
result = doNothing
inc s.calls
when isMainModule:
proc busyLoop() =
while true:
discard random(80)
os.sleep(100)
spawn busyLoop()
spawn busyLoop()
spawn busyLoop()
spawn busyLoop()
var s: ThreadPoolState
for i in 1 .. 70:
echo advice(s)
os.sleep(1000)

View File

@@ -0,0 +1,347 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Implements Nimrod's 'spawn'.
import cpuinfo, cpuload, locks
{.push stackTrace:off.}
type
CondVar = object
c: TCond
L: TLock
counter: int
proc createCondVar(): CondVar =
initCond(result.c)
initLock(result.L)
proc destroyCondVar(cv: var CondVar) {.inline.} =
deinitCond(cv.c)
deinitLock(cv.L)
proc await(cv: var CondVar) =
acquire(cv.L)
while cv.counter <= 0:
wait(cv.c, cv.L)
dec cv.counter
release(cv.L)
proc signal(cv: var CondVar) =
acquire(cv.L)
inc cv.counter
release(cv.L)
signal(cv.c)
type
Barrier* {.compilerProc.} = object
counter: int
cv: CondVar
proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
atomicInc b.counter
proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
atomicDec b.counter
if b.counter <= 0: signal(b.cv)
proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
b.counter = 0
b.cv = createCondVar()
proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
while b.counter > 0: await(b.cv)
destroyCondVar(b.cv)
{.pop.}
# ----------------------------------------------------------------------------
type
foreign* = object ## a region that indicates the pointer comes from a
## foreign thread heap.
AwaitInfo = object
cv: CondVar
idx: int
RawPromise* = ptr RawPromiseObj ## untyped base class for 'Promise[T]'
RawPromiseObj {.inheritable.} = object # \
# we allocate this with the thread local allocator; this
# is possible since we already need to do the GC_unref
# on the owning thread
ready, usesCondVar: bool
cv: CondVar #\
# for 'awaitAny' support
ai: ptr AwaitInfo
idx: int
data: PObject # we incRef and unref it to keep it alive
owner: ptr Worker
next: RawPromise
align: float64 # a float for proper alignment
Promise* {.compilerProc.} [T] = ptr object of RawPromiseObj
blob: T ## the underlying value, if available. Note that usually
## you should not access this field directly! However it can
## sometimes be more efficient than getting the value via ``^``.
WorkerProc = proc (thread, args: pointer) {.nimcall, gcsafe.}
Worker = object
taskArrived: CondVar
taskStarted: CondVar #\
# task data:
f: WorkerProc
data: pointer
ready: bool # put it here for correct alignment!
initialized: bool # whether it has even been initialized
shutdown: bool # the pool requests to shut down this worker thread
promiseLock: TLock
head: RawPromise
proc finished*(prom: RawPromise) =
## This MUST be called for every created promise to free its associated
## resources. Note that the default reading operation ``^`` is destructive
## and calls ``finished``.
doAssert prom.ai.isNil, "promise is still attached to an 'awaitAny'"
assert prom.next == nil
let w = prom.owner
acquire(w.promiseLock)
prom.next = w.head
w.head = prom
release(w.promiseLock)
proc cleanPromises(w: ptr Worker) =
var it = w.head
acquire(w.promiseLock)
while it != nil:
let nxt = it.next
if it.usesCondVar: destroyCondVar(it.cv)
if it.data != nil: GC_unref(it.data)
dealloc(it)
it = nxt
w.head = nil
release(w.promiseLock)
proc nimCreatePromise(owner: pointer; blobSize: int): RawPromise {.
compilerProc.} =
result = cast[RawPromise](alloc0(RawPromiseObj.sizeof + blobSize))
result.owner = cast[ptr Worker](owner)
proc nimPromiseCreateCondVar(prom: RawPromise) {.compilerProc.} =
prom.cv = createCondVar()
prom.usesCondVar = true
proc nimPromiseSignal(prom: RawPromise) {.compilerProc.} =
if prom.ai != nil:
acquire(prom.ai.cv.L)
prom.ai.idx = prom.idx
inc prom.ai.cv.counter
release(prom.ai.cv.L)
signal(prom.ai.cv.c)
if prom.usesCondVar: signal(prom.cv)
proc await*[T](prom: Promise[T]) =
## waits until the value for the promise arrives.
if prom.usesCondVar: await(prom.cv)
proc awaitAndThen*[T](prom: Promise[T]; action: proc (x: T) {.closure.}) =
## blocks until the value is available and then passes this value
## to ``action``. Note that due to Nimrod's parameter passing semantics this
## means that ``T`` doesn't need to be copied and so ``awaitAndThen`` can
## sometimes be more efficient than ``^``.
if prom.usesCondVar: await(prom)
when T is string or T is seq:
action(cast[T](prom.data))
elif T is ref:
{.error: "'awaitAndThen' not available for Promise[ref]".}
else:
action(prom.blob)
finished(prom)
proc `^`*[T](prom: Promise[ref T]): foreign ptr T =
## blocks until the value is available and then returns this value. Note
## this reading is destructive for reasons of efficiency and convenience.
## This calls ``finished(prom)``.
if prom.usesCondVar: await(prom)
result = cast[foreign ptr T](prom.data)
finished(prom)
proc `^`*[T](prom: Promise[T]): T =
## blocks until the value is available and then returns this value. Note
## this reading is destructive for reasons of efficiency and convenience.
## This calls ``finished(prom)``.
if prom.usesCondVar: await(prom)
when T is string or T is seq:
result = cast[T](prom.data)
else:
result = prom.blob
finished(prom)
proc awaitAny*(promises: openArray[RawPromise]): int =
# awaits any of the given promises. Returns the index of one promise for which
## a value arrived. A promise only supports one call to 'awaitAny' at the
## same time. That means if you await([a,b]) and await([b,c]) the second
## call will only await 'c'. If there is no promise left to be able to wait
## on, -1 is returned.
## **Note**: This results in non-deterministic behaviour and so should be
## avoided.
var ai: AwaitInfo
ai.cv = createCondVar()
var conflicts = 0
for i in 0 .. promises.high:
if cas(addr promises[i].ai, nil, addr ai):
promises[i].idx = i
else:
inc conflicts
if conflicts < promises.len:
await(ai.cv)
result = ai.idx
for i in 0 .. promises.high:
discard cas(addr promises[i].ai, addr ai, nil)
else:
result = -1
destroyCondVar(ai.cv)
proc nimArgsPassingDone(p: pointer) {.compilerProc.} =
let w = cast[ptr Worker](p)
signal(w.taskStarted)
const
MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads
## should be good enough for anybody ;-)
var
currentPoolSize: int
maxPoolSize = MaxThreadPoolSize
minPoolSize = 4
gSomeReady = createCondVar()
readyWorker: ptr Worker
proc slave(w: ptr Worker) {.thread.} =
while true:
w.ready = true
readyWorker = w
signal(gSomeReady)
await(w.taskArrived)
assert(not w.ready)
w.f(w, w.data)
if w.head != nil: w.cleanPromises
if w.shutdown:
w.shutdown = false
atomicDec currentPoolSize
proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) =
## sets the minimal thread pool size. The default value of this is 4.
minPoolSize = size
proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) =
## sets the minimal thread pool size. The default value of this
## is ``MaxThreadPoolSize``.
maxPoolSize = size
var
workers: array[MaxThreadPoolSize, TThread[ptr Worker]]
workersData: array[MaxThreadPoolSize, Worker]
proc activateThread(i: int) {.noinline.} =
workersData[i].taskArrived = createCondVar()
workersData[i].taskStarted = createCondVar()
initLock workersData[i].promiseLock
workersData[i].initialized = true
createThread(workers[i], slave, addr(workersData[i]))
proc setup() =
currentPoolSize = min(countProcessors(), MaxThreadPoolSize)
readyWorker = addr(workersData[0])
for i in 0.. <currentPoolSize: activateThread(i)
proc preferSpawn*(): bool =
## Use this proc to determine quickly if a 'spawn' or a direct call is
## preferable. If it returns 'true' a 'spawn' may make sense. In general
## it is not necessary to call this directly; use 'spawnX' instead.
result = gSomeReady.counter > 0
proc spawn*(call: expr): expr {.magic: "Spawn".}
## always spawns a new task, so that the 'call' is never executed on
## the calling thread. 'call' has to be proc call 'p(...)' where 'p'
## is gcsafe and has 'void' as the return type.
template spawnX*(call: expr): expr =
## spawns a new task if a CPU core is ready, otherwise executes the
## call in the calling thread. Usually it is advised to
## use 'spawn' in order to not block the producer for an unknown
## amount of time. 'call' has to be proc call 'p(...)' where 'p'
## is gcsafe and has 'void' as the return type.
(if preferSpawn(): spawn call else: call)
proc parallel*(body: stmt) {.magic: "Parallel".}
## a parallel section can be used to execute a block in parallel. ``body``
## has to be in a DSL that is a particular subset of the language. Please
## refer to the manual for further information.
var
state: ThreadPoolState
stateLock: TLock
initLock stateLock
proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool =
if cas(addr w.ready, true, false):
w.data = data
w.f = fn
signal(w.taskArrived)
await(w.taskStarted)
result = true
proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} =
# implementation of 'spawn' that is used by the code generator.
while true:
if selectWorker(readyWorker, fn, data): return
for i in 0.. <currentPoolSize:
if selectWorker(addr(workersData[i]), fn, data): return
# determine what to do, but keep in mind this is expensive too:
# state.calls < maxPoolSize: warmup phase
# (state.calls and 127) == 0: periodic check
if state.calls < maxPoolSize or (state.calls and 127) == 0:
# ensure the call to 'advice' is atomic:
if tryAcquire(stateLock):
case advice(state)
of doNothing: discard
of doCreateThread:
if currentPoolSize < maxPoolSize:
if not workersData[currentPoolSize].initialized:
activateThread(currentPoolSize)
let w = addr(workersData[currentPoolSize])
atomicInc currentPoolSize
if selectWorker(w, fn, data):
release(stateLock)
return
# else we didn't succeed but some other thread, so do nothing.
of doShutdownThread:
if currentPoolSize > minPoolSize:
let w = addr(workersData[currentPoolSize-1])
w.shutdown = true
# we don't free anything here. Too dangerous.
release(stateLock)
# else the acquire failed, but this means some
# other thread succeeded, so we don't need to do anything here.
await(gSomeReady)
proc sync*() =
## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate
## waiting, you have to use an explicit barrier.
while true:
var allReady = true
for i in 0 .. <currentPoolSize:
if not allReady: break
allReady = allReady and workersData[i].ready
if allReady: break
await(gSomeReady)
setup()

View File

@@ -1,7 +1,7 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2013 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
@@ -13,7 +13,7 @@
include "system/inclrtl"
import
strutils, os, strtabs, streams
strutils, os, strtabs, streams, cpuinfo
when defined(windows):
import winlean
@@ -225,42 +225,10 @@ proc errorHandle*(p: PProcess): TFileHandle {.rtl, extern: "nosp$1",
## it is closed when closing the PProcess ``p``.
result = p.errHandle
when defined(macosx) or defined(bsd):
const
CTL_HW = 6
HW_AVAILCPU = 25
HW_NCPU = 3
proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer,
a: var csize, b: pointer, c: int): cint {.
importc: "sysctl", header: "<sys/sysctl.h>".}
proc countProcessors*(): int {.rtl, extern: "nosp$1".} =
## returns the numer of the processors/cores the machine has.
## Returns 0 if it cannot be detected.
when defined(windows):
var x = getEnv("NUMBER_OF_PROCESSORS")
if x.len > 0: result = parseInt(x.string)
elif defined(macosx) or defined(bsd):
var
mib: array[0..3, cint]
numCPU: int
len: csize
mib[0] = CTL_HW
mib[1] = HW_AVAILCPU
len = sizeof(numCPU)
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
if numCPU < 1:
mib[1] = HW_NCPU
discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0)
result = numCPU
elif defined(hpux):
result = mpctl(MPC_GETNUMSPUS, nil, nil)
elif defined(irix):
var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint
result = sysconf(SC_NPROC_ONLN)
else:
result = sysconf(SC_NPROCESSORS_ONLN)
if result <= 0: result = 1
result = cpuinfo.countProcessors()
proc execProcesses*(cmds: openArray[string],
options = {poStdErrToStdOut, poParentStreams},

View File

@@ -42,7 +42,6 @@ type
cstring* {.magic: Cstring.} ## built-in cstring (*compatible string*) type
pointer* {.magic: Pointer.} ## built-in pointer type, use the ``addr``
## operator to get a pointer to a variable
const
on* = true ## alias for ``true``
off* = false ## alias for ``false``
@@ -51,6 +50,9 @@ const
type
Ordinal* {.magic: Ordinal.}[T]
`ptr`* {.magic: Pointer.}[T] ## built-in generic untraced pointer type
`ref`* {.magic: Pointer.}[T] ## built-in generic traced pointer type
`nil` {.magic: "Nil".}
expr* {.magic: Expr.} ## meta type to denote an expression (for templates)
stmt* {.magic: Stmt.} ## meta type to denote a statement (for templates)
@@ -2948,6 +2950,3 @@ when not defined(booting):
template isStatic*(x): expr = compiles(static(x))
# checks whether `x` is a value known at compile-time
when hasThreadSupport:
when hostOS != "standalone": include "system/sysspawn"

View File

@@ -179,7 +179,8 @@ when not defined(nimmixin):
# internal proc used for destroying sequences and arrays
for i in countup(0, r.len - 1): destroy(r[i])
else:
# XXX Why is this exported and no compilerproc?
# XXX Why is this exported and no compilerproc? -> compilerprocs cannot be
# generic for now
proc nimDestroyRange*[T](r: T) =
# internal proc used for destroying sequences and arrays
mixin destroy

View File

@@ -1,13 +1,14 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
# (c) Copyright 2014 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Atomic operations for Nimrod.
{.push stackTrace:off.}
when (defined(gcc) or defined(llvm_gcc)) and hasThreadSupport:
type
@@ -203,3 +204,31 @@ proc atomicDec*(memLoc: var int, x: int = 1): int =
else:
dec(memLoc, x)
result = memLoc
when defined(windows) and not defined(gcc):
proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
{.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool =
interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
# XXX fix for 64 bit build
else:
# this is valid for GCC and Intel C++
proc cas*[T: bool|int|ptr](p: ptr T; oldValue, newValue: T): bool
{.importc: "__sync_bool_compare_and_swap", nodecl.}
# XXX is this valid for 'int'?
when (defined(x86) or defined(amd64)) and defined(gcc):
proc cpuRelax {.inline.} =
{.emit: """asm volatile("pause" ::: "memory");""".}
elif (defined(x86) or defined(amd64)) and defined(vcc):
proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
elif defined(intelc):
proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
elif false:
from os import sleep
proc cpuRelax {.inline.} = os.sleep(1)
{.pop.}

View File

@@ -14,30 +14,6 @@ when not defined(NimString):
{.push stackTrace:off.}
when (defined(x86) or defined(amd64)) and defined(gcc):
proc cpuRelax {.inline.} =
{.emit: """asm volatile("pause" ::: "memory");""".}
elif (defined(x86) or defined(amd64)) and defined(vcc):
proc cpuRelax {.importc: "YieldProcessor", header: "<windows.h>".}
elif defined(intelc):
proc cpuRelax {.importc: "_mm_pause", header: "xmmintrin.h".}
elif false:
from os import sleep
proc cpuRelax {.inline.} = os.sleep(1)
when defined(windows) and not defined(gcc):
proc interlockedCompareExchange(p: pointer; exchange, comparand: int32): int32
{.importc: "InterlockedCompareExchange", header: "<windows.h>", cdecl.}
proc cas(p: ptr bool; oldValue, newValue: bool): bool =
interlockedCompareExchange(p, newValue.int32, oldValue.int32) != 0
else:
# this is valid for GCC and Intel C++
proc cas(p: ptr bool; oldValue, newValue: bool): bool
{.importc: "__sync_bool_compare_and_swap", nodecl.}
# We declare our own condition variables here to get rid of the dummy lock
# on Windows:
@@ -54,6 +30,9 @@ proc createCondVar(): CondVar =
initSysLock(result.stupidLock)
#acquireSys(result.stupidLock)
proc destroyCondVar(c: var CondVar) {.inline.} =
deinitSysCond(c.c)
proc await(cv: var CondVar) =
when defined(posix):
acquireSys(cv.stupidLock)
@@ -100,6 +79,26 @@ proc signal(cv: var FastCondVar) =
#if cas(addr cv.slowPath, true, false):
signal(cv.slow)
type
Barrier* {.compilerProc.} = object
counter: int
cv: CondVar
proc barrierEnter*(b: ptr Barrier) {.compilerProc.} =
atomicInc b.counter
proc barrierLeave*(b: ptr Barrier) {.compilerProc.} =
atomicDec b.counter
if b.counter <= 0: signal(b.cv)
proc openBarrier*(b: ptr Barrier) {.compilerProc.} =
b.counter = 0
b.cv = createCondVar()
proc closeBarrier*(b: ptr Barrier) {.compilerProc.} =
await(b.cv)
destroyCondVar(b.cv)
{.pop.}
# ----------------------------------------------------------------------------

View File

@@ -0,0 +1,21 @@
discard """
outputsub: "EVEN 28"
"""
import threadpool
proc odd(a: int) = echo "ODD ", a
proc even(a: int) = echo "EVEN ", a
proc main() =
var a: array[0..30, int]
for i in low(a)..high(a): a[i] = i
parallel:
var i = 0
while i <= 29:
spawn even(a[i])
spawn odd(a[i+1])
inc i, 2
# is correct here
main()

View File

@@ -0,0 +1,33 @@
discard """
output: '''0
1
2
3
4
5
6
7
8'''
sortoutput: true
"""
import threadpool
proc f(a: openArray[int]) =
for x in a: echo x
proc f(a: int) = echo a
proc main() =
var a: array[0..9, int] = [0,1,2,3,4,5,6,7,8,9]
parallel:
spawn f(a[0..2])
#spawn f(a[16..30])
var i = 3
while i <= 8:
spawn f(a[i])
spawn f(a[i+1])
inc i, 2
# is correct here
main()

View File

@@ -0,0 +1,17 @@
discard """
output: '''foobarfoobarbazbearbazbear'''
cmd: "nimrod $target --threads:on $options $file"
"""
import threadpool
proc computeSomething(a, b: string): string = a & b & a & b
proc main =
let fvA = spawn computeSomething("foo", "bar")
let fvB = spawn computeSomething("baz", "bear")
echo(^fvA, ^fvB)
main()
sync()

View File

@@ -0,0 +1,25 @@
discard """
output: '''3
4
5
6
7'''
sortoutput: true
"""
import threadpool, os
proc p(x: int) =
os.sleep(100 - x*10)
echo x
proc testFor(a, b: int; foo: var openArray[int]) =
parallel:
for i in max(a, 0) .. min(b, foo.high):
spawn p(foo[i])
var arr = [0, 1, 2, 3, 4, 5, 6, 7]
testFor(3, 10, arr)

View File

@@ -0,0 +1,25 @@
discard """
errormsg: "can prove: i + 1 > 30"
line: 21
"""
import threadpool
proc f(a: openArray[int]) =
for x in a: echo x
proc f(a: int) = echo a
proc main() =
var a: array[0..30, int]
parallel:
spawn f(a[0..15])
spawn f(a[16..30])
var i = 0
while i <= 30:
spawn f(a[i])
spawn f(a[i+1])
inc i
#inc i # inc i, 2 would be correct here
main()

View File

@@ -0,0 +1,26 @@
discard """
errormsg: "invalid usage of counter after increment"
line: 21
"""
import threadpool
proc f(a: openArray[int]) =
for x in a: echo x
proc f(a: int) = echo a
proc main() =
var a: array[0..30, int]
parallel:
spawn f(a[0..15])
spawn f(a[16..30])
var i = 0
while i <= 30:
inc i
spawn f(a[i])
inc i
#spawn f(a[i+1])
#inc i # inc i, 2 would be correct here
main()

View File

@@ -0,0 +1,25 @@
discard """
errormsg: "cannot prove (i)..(i) disjoint from (i + 1)..(i + 1)"
line: 20
"""
import threadpool
proc f(a: openArray[int]) =
for x in a: echo x
proc f(a: int) = echo a
proc main() =
var a: array[0..30, int]
parallel:
#spawn f(a[0..15])
#spawn f(a[16..30])
var i = 0
while i <= 29:
spawn f(a[i])
spawn f(a[i+1])
inc i
#inc i # inc i, 2 would be correct here
main()

26
tests/parallel/tpi.nim Normal file
View File

@@ -0,0 +1,26 @@
discard """
output: '''3.141792613595791
3.141792613595791'''
"""
import strutils, math, threadpool
proc term(k: float): float = 4 * math.pow(-1, k) / (2*k + 1)
proc piU(n: int): float =
var ch = newSeq[Promise[float]](n+1)
for k in 0..n:
ch[k] = spawn term(float(k))
for k in 0..n:
result += ^ch[k]
proc piS(n: int): float =
var ch = newSeq[float](n+1)
parallel:
for k in 0..ch.high:
ch[k] = spawn term(float(k))
for k in 0..ch.high:
result += ch[k]
echo formatFloat(piU(5000))
echo formatFloat(piS(5000))

View File

@@ -4,20 +4,22 @@ discard """
cmd: "nimrod $target --threads:on $options $file"
"""
import threadpool
var
x, y = 0
proc p1 =
for i in 0 .. 1_000_000:
for i in 0 .. 10_000:
discard
inc x
atomicInc x
proc p2 =
for i in 0 .. 1_000_000:
for i in 0 .. 10_000:
discard
inc y, 2
atomicInc y, 2
for i in 0.. 3:
spawn(p1())

View File

@@ -0,0 +1,9 @@
discard """
line: 9
errormsg: "'spawn' takes a call expression"
cmd: "nimrod $target --threads:on $options $file"
"""
import threadpool
let foo = spawn(1)

View File

@@ -1,7 +0,0 @@
discard """
line: 7
errormsg: "'spawn' takes a call expression of type void"
cmd: "nimrod $target --threads:on $options $file"
"""
spawn(1)

View File

@@ -46,7 +46,7 @@ type
msg*: string
ccodeCheck*: string
err*: TResultEnum
substr*: bool
substr*, sortoutput*: bool
targets*: set[TTarget]
const
@@ -113,6 +113,8 @@ proc parseSpec*(filename: string): TSpec =
result.action = actionRun
result.outp = e.value
result.substr = true
of "sortoutput":
result.sortoutput = parseCfgBool(e.value)
of "exitcode":
discard parseInt(e.value, result.exitCode)
of "msg":

View File

@@ -11,7 +11,8 @@
import
parseutils, strutils, pegs, os, osproc, streams, parsecfg, json,
marshal, backend, parseopt, specs, htmlgen, browsers, terminal
marshal, backend, parseopt, specs, htmlgen, browsers, terminal,
algorithm
const
resultsFile = "testresults.html"
@@ -150,6 +151,11 @@ proc codegenCheck(test: TTest, check: string, given: var TSpec) =
except EIO:
given.err = reCodeNotFound
proc makeDeterministic(s: string): string =
var x = splitLines(s)
sort(x, system.cmp)
result = join(x, "\n")
proc testSpec(r: var TResults, test: TTest) =
# major entry point for a single test
let tname = test.name.addFileExt(".nim")
@@ -191,8 +197,10 @@ proc testSpec(r: var TResults, test: TTest) =
r.addResult(test, "exitcode: " & $expected.exitCode,
"exitcode: " & $exitCode, reExitCodesDiffer)
else:
if strip(buf.string) != strip(expected.outp):
if not (expected.substr and expected.outp in buf.string):
var bufB = strip(buf.string)
if expected.sortoutput: bufB = makeDeterministic(bufB)
if bufB != strip(expected.outp):
if not (expected.substr and expected.outp in bufB):
given.err = reOutputsDiffer
if given.err == reSuccess:
codeGenCheck(test, expected.ccodeCheck, given)

View File

@@ -2,6 +2,23 @@
News
====
..
2014-06-29 Version 0.9.6 released
=================================
Changes affecting backwards compatibility
-----------------------------------------
- ``spawn`` now uses an elaborate self-adapting thread pool and as such
has been moved into its own module. So to use it, you now have to import
``threadpool``.
Library Additions
-----------------
- Added module ``cpuinfo``.
- Added module ``threadpool``.
2014-04-21 Version 0.9.4 released