isolation spec update; WIP (#21843)

* isolation spec update; WIP

* wip

* docs update, WIP

* progress

* Update doc/manual.md
This commit is contained in:
Andreas Rumpf
2023-05-14 16:58:28 +02:00
committed by GitHub
parent 0ece98620f
commit f4a9b258c3
9 changed files with 284 additions and 15 deletions

View File

@@ -515,7 +515,7 @@ type
nfSkipFieldChecking # node skips field visable checking
TNodeFlags* = set[TNodeFlag]
TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 46)
TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 47)
tfVarargs, # procedure has C styled varargs
# tyArray type represeting a varargs list
tfNoSideEffect, # procedure type does not allow side effects
@@ -585,6 +585,7 @@ type
tfIsConstructor
tfEffectSystemWorkaround
tfIsOutParam
tfSendable
TTypeFlags* = set[TTypeFlag]
@@ -634,7 +635,7 @@ const
skError* = skUnknown
var
eqTypeFlags* = {tfIterator, tfNotNil, tfVarIsPtr, tfGcSafe, tfNoSideEffect, tfIsOutParam}
eqTypeFlags* = {tfIterator, tfNotNil, tfVarIsPtr, tfGcSafe, tfNoSideEffect, tfIsOutParam, tfSendable}
## type flags that are essential for type equality.
## This is now a variable because for emulation of version:1.0 we
## might exclude {tfGcSafe, tfNoSideEffect}.

View File

@@ -154,4 +154,4 @@ proc initDefines*(symbols: StringTableRef) =
defineSymbol("nimHasWarnBareExcept")
defineSymbol("nimHasDup")
defineSymbol("nimHasChecksums")
defineSymbol("nimHasSendable")

View File

@@ -72,6 +72,69 @@ proc isValueOnlyType(t: PType): bool =
proc wrap(t: PType): bool {.nimcall.} = t.kind in {tyRef, tyPtr, tyVar, tyLent}
result = not types.searchTypeFor(t, wrap)
type
SearchResult = enum
NotFound, Abort, Found
proc containsDangerousRefAux(t: PType; marker: var IntSet): SearchResult
proc containsDangerousRefAux(n: PNode; marker: var IntSet): SearchResult =
result = NotFound
case n.kind
of nkRecList:
for i in 0..<n.len:
result = containsDangerousRefAux(n[i], marker)
if result == Found: return result
of nkRecCase:
assert(n[0].kind == nkSym)
result = containsDangerousRefAux(n[0], marker)
if result == Found: return result
for i in 1..<n.len:
case n[i].kind
of nkOfBranch, nkElse:
result = containsDangerousRefAux(lastSon(n[i]), marker)
if result == Found: return result
else: discard
of nkSym:
result = containsDangerousRefAux(n.sym.typ, marker)
else: discard
proc containsDangerousRefAux(t: PType; marker: var IntSet): SearchResult =
result = NotFound
if t == nil: return result
if containsOrIncl(marker, t.id): return result
if t.kind == tyRef or (t.kind == tyProc and t.callConv == ccClosure):
result = Found
elif tfSendable in t.flags:
result = Abort
else:
# continue the type traversal:
result = NotFound
if result != NotFound: return result
case t.kind
of tyObject:
if t[0] != nil:
result = containsDangerousRefAux(t[0].skipTypes(skipPtrs), marker)
if result == NotFound: result = containsDangerousRefAux(t.n, marker)
of tyGenericInst, tyDistinct, tyAlias, tySink:
result = containsDangerousRefAux(lastSon(t), marker)
of tyArray, tySet, tyTuple, tySequence:
for i in 0..<t.len:
result = containsDangerousRefAux(t[i], marker)
if result == Found: return result
else:
discard
proc containsDangerousRef(t: PType): bool =
# a `ref` type is "dangerous" if it occurs not within a type that is like `Isolated[T]`.
# For example:
# `ref int` # dangerous
# `Isolated[ref int]` # not dangerous
var marker = initIntSet()
result = containsDangerousRefAux(t, marker) == Found
proc canAlias*(arg, ret: PType): bool =
if isValueOnlyType(arg):
# can alias only with addr(arg.x) and we don't care if it is not safe
@@ -80,12 +143,15 @@ proc canAlias*(arg, ret: PType): bool =
var marker = initIntSet()
result = canAlias(arg, ret, marker)
const
SomeVar = {skForVar, skParam, skVar, skLet, skConst, skResult, skTemp}
proc containsVariable(n: PNode): bool =
case n.kind
of nodesToIgnoreSet:
result = false
of nkSym:
result = n.sym.kind in {skForVar, skParam, skVar, skLet, skConst, skResult, skTemp}
result = n.sym.kind in SomeVar
else:
for ch in n:
if containsVariable(ch): return true
@@ -109,7 +175,7 @@ proc checkIsolate*(n: PNode): bool =
discard "fine, it is isolated already"
else:
let argType = n[i].typ
if argType != nil and not isCompileTimeOnly(argType) and containsTyRef(argType):
if argType != nil and not isCompileTimeOnly(argType) and containsDangerousRef(argType):
if argType.canAlias(n.typ) or containsVariable(n[i]):
# bug #19013: Alias information is not enough, we need to check for potential
# "overlaps". I claim the problem can only happen by reading again from a location
@@ -143,6 +209,12 @@ proc checkIsolate*(n: PNode): bool =
result = checkIsolate(n[^1])
else:
result = false
of nkSym:
result = true
if n.sym.kind in SomeVar:
let argType = n.typ
if argType != nil and not isCompileTimeOnly(argType) and containsDangerousRef(argType):
result = false
else:
# unanalysable expression:
result = false

View File

@@ -71,7 +71,8 @@ const
wPure, wHeader, wCompilerProc, wCore, wFinal, wSize, wShallow,
wIncompleteStruct, wCompleteStruct, wByCopy, wByRef,
wInheritable, wGensym, wInject, wRequiresInit, wUnchecked, wUnion, wPacked,
wCppNonPod, wBorrow, wGcSafe, wPartial, wExplain, wPackage, wCodegenDecl}
wCppNonPod, wBorrow, wGcSafe, wPartial, wExplain, wPackage, wCodegenDecl,
wSendable}
fieldPragmas* = declPragmas + {wGuard, wBitsize, wCursor,
wRequiresInit, wNoalias, wAlign} - {wExportNims, wNodecl} # why exclude these?
varPragmas* = declPragmas + {wVolatile, wRegister, wThreadVar,
@@ -1057,6 +1058,12 @@ proc singlePragma(c: PContext, sym: PSym, n: PNode, i: var int,
if sym.typ != nil:
incl(sym.typ.flags, tfThread)
if sym.typ.callConv == ccClosure: sym.typ.callConv = ccNimCall
of wSendable:
noVal(c, it)
if sym != nil and sym.typ != nil:
incl(sym.typ.flags, tfSendable)
else:
invalidPragma(c, it)
of wGcSafe:
noVal(c, it)
if sym != nil:

View File

@@ -113,6 +113,7 @@ type
wInOut = "inout", wByCopy = "bycopy", wByRef = "byref", wOneWay = "oneway",
wBitsize = "bitsize", wImportHidden = "all",
wSendable = "sendable"
TSpecialWords* = set[TSpecialWord]

View File

@@ -6889,7 +6889,7 @@ iterator in which case the overloading resolution takes place:
var x = 4
write(stdout, x) # not ambiguous: uses the module C's x
```
Modules can share their name, however, when trying to qualify a identifier with the module name the compiler will fail with ambiguous identifier error. One can qualify the identifier by aliasing the module.
Modules can share their name, however, when trying to qualify an identifier with the module name the compiler will fail with ambiguous identifier error. One can qualify the identifier by aliasing the module.
```nim
@@ -8581,10 +8581,8 @@ Threads
The `--threads:on`:option: command-line switch is enabled by default. The [typedthreads module](typedthreads.html) module then contains several threading primitives. See [spawn](manual_experimental.html#parallel-amp-spawn) for
further details.
The only way to create a thread is via `spawn` or
`createThread`. The invoked proc must not use `var` parameters nor must
any of its parameters contain a `ref` or `closure` type. This enforces
the *no heap sharing restriction*.
The only ways to create a thread is via `spawn` or `createThread`.
Thread pragma
-------------
@@ -8703,7 +8701,7 @@ model low level lockfree mechanisms:
The `locks` pragma takes a list of lock expressions `locks: [a, b, ...]`
in order to support *multi lock* statements. Why these are essential is
explained in the [lock levels](manual_experimental.md#lock-levels) section
of experimental manual.
of the experimental manual.
### Protecting general locations

View File

@@ -2007,3 +2007,126 @@ The field is within a `case` section of an `object`.
**Note**: The implementation of "strict case objects" is experimental but the concept
is solid and it is expected that eventually this mode becomes the default in later versions.
Threading under ARC/ORC
=======================
ARC/ORC supports a shared heap out of the box. This means that messages can be sent between
threads without copies. However, without copying the data there is an inherent danger of
data races. Data races are prevented at compile-time if it is enforced that
only **isolated** subgraphs can be sent around.
Isolation
---------
The standard library module `isolation.nim` provides a generic type `Isolated[T]` that
captures the important notion that nothing else can reference the graph that is wrapped
inside `Isolated[T]`. It is what a channel implementation should use in order to enforce
the freedom of data races:
```nim
proc send*[T](c: var Channel[T]; msg: sink Isolated[T])
proc recv*[T](c: var Channel[T]): T
## Note: Returns T, not Isolated[T] for convenience.
proc recvIso*[T](c: var Channel[T]): Isolated[T]
## remembers the data is Isolated[T].
```
In order to create an `Isolated` graph one has to use either `isolate` or `unsafeIsolate`.
`unsafeIsolate` is as its name says unsafe and no checking is performed. It should be considered
to be as dangerous as a `cast` operation.
Construction must ensure that the invariant holds, namely that the wrapped `T`
is free of external aliases into it. `isolate` ensures this invariant. It is
inspired by Pony's `recover` construct:
```nim
func isolate(x: sink T): Isolated[T] {.magic: "Isolate".}
```
As you can see, this is a new builtin because the check it performs on `x` is non-trivial:
If `T` does not contain a `ref` or `closure` type, it is isolated. Else the syntactic
structure of `x` is analyzed:
- Literals like `nil`, `4`, `"abc"` are isolated.
- A local variable or a routine parameter is isolated if either of these conditions is true:
1. Its type is annotated with the `.sendable` pragma. Note `Isolated[T]` is annotated as
`.sendable`.
2. Its type contains the potentially dangerous `ref` and `proc {.closure}` types
only in places that are protected via a `.sendable` container.
- An array constructor `[x...]` is isolated if every element `x` is isolated.
- An object constructor `Obj(x...)` is isolated if every element `x` is isolated.
- An `if` or `case` expression is isolated if all possible values the expression
may return are isolated.
- A type conversion `C(x)` is isolated if `x` is isolated. Analogous for `cast`
expressions.
- A function call `f(x...)` is isolated if `f` is `.noSideEffect` and for every argument `x`:
- `x` is isolated **or**
- `f`'s return type cannot *alias* `x`'s type. This is checked via a form of alias analysis as explained in the next paragraph.
Alias analysis
--------------
We start with an important, simple case that must be valid: Sending the result
of `parseJson` to a channel. Since the signature
is `func parseJson(input: string): JsonNode` it is easy to see that JsonNode
can never simply be a view into `input` which is a `string`.
A different case is the identity function `id`, `send id(myJsonGraph)` must be
invalid because we do not know how many aliases into `myJsonGraph` exist
elsewhere.
In general type `A` can alias type `T` if:
- `A` and `T` are the same types.
- `A` is a distinct type derived from `T`.
- `A` is a field inside `T` if `T` is a final object type.
- `T` is an inheritable object type. (An inherited type could always contain
a `field: A`).
- `T` is a closure type. Reason: `T`'s environment can contain a field of
type `A`.
- `A` is the element type of `T` if `T` is an array, sequence or pointer type.
Sendable pragma
---------------
A container type can be marked as `.sendable`. `.sendable` declares that the type
encapsulates a `ref` type effectively so that a variable of this container type
can be used in an `isolate` context:
```nim
type
Isolated*[T] {.sendable.} = object ## Isolated data can only be moved, not copied.
value: T
proc `=copy`*[T](dest: var Isolated[T]; src: Isolated[T]) {.error.}
proc `=sink`*[T](dest: var Isolated[T]; src: Isolated[T]) {.inline.} =
# delegate to value's sink operation
`=sink`(dest.value, src.value)
proc `=destroy`*[T](dest: var Isolated[T]) {.inline.} =
# delegate to value's destroy operation
`=destroy`(dest.value)
```
The `.sendable` pragma itself is an experimenal, unchecked, unsafe annotation. It is
currently only used by `Isolated[T]`.

View File

@@ -15,7 +15,7 @@
##
type
Isolated*[T] = object ## Isolated data can only be moved, not copied.
Isolated*[T] {.sendable.} = object ## Isolated data can only be moved, not copied.
value: T
proc `=copy`*[T](dest: var Isolated[T]; src: Isolated[T]) {.error.}
@@ -38,9 +38,9 @@ func isolate*[T](value: sink T): Isolated[T] {.magic: "Isolate".} =
func unsafeIsolate*[T](value: sink T): Isolated[T] =
## Creates an isolated subgraph from the expression `value`.
##
##
## .. warning:: The proc doesn't check whether `value` is isolated.
##
##
Isolated[T](value: value)
func extract*[T](src: var Isolated[T]): T =

View File

@@ -0,0 +1,67 @@
discard """
cmd: "nim $target --threads:on $options $file"
action: "compile"
"""
import std / [os, locks, atomics, isolation]
type
MyList {.acyclic.} = ref object
data: string
next: Isolated[MyList]
template withMyLock*(a: Lock, body: untyped) =
acquire(a)
{.gcsafe.}:
try:
body
finally:
release(a)
var head: Isolated[MyList]
var headL: Lock
var shouldStop: Atomic[bool]
initLock headL
proc send(x: sink string) =
withMyLock headL:
head = isolate MyList(data: x, next: move head)
proc worker() {.thread.} =
var workItem = MyList(nil)
var echoed = 0
while true:
withMyLock headL:
var h = extract head
if h != nil:
workItem = h
# workitem is now isolated:
head = move h.next
else:
workItem = nil
# workItem is isolated, so we can access it outside
# the lock:
if workItem.isNil:
if shouldStop.load:
break
else:
# give producer time to breath:
os.sleep 30
else:
if echoed < 100:
echo workItem.data
inc echoed
var thr: Thread[void]
createThread(thr, worker)
send "abc"
send "def"
for i in 0 ..< 10_000:
send "xzy"
send "zzz"
shouldStop.store true
joinThread(thr)