From 4058801607b37fa707eb1b41d129ce74557eccac Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Tue, 29 Sep 2020 23:42:38 +0200 Subject: [PATCH] spec for view types (#15424) * spec for view types * spec additions * refactoring; there are two different kinds of views * refactorings and spec additions * enforce that view types are initialized * enforce borrowing from the first formal parameter * enforce lifetimes for borrowing of locals * typo in the manual * clarify in the implementation what a borrow operation really is --- compiler/ast.nim | 3 - compiler/semexprs.nim | 6 +- compiler/sempass2.nim | 4 +- compiler/typeallowed.nim | 75 +++++-- compiler/varpartitions.nim | 196 ++++++++++++++++-- doc/manual.rst | 8 +- doc/manual_experimental.rst | 190 ++++++++++++++++- tests/views/tcannot_borrow.nim | 13 +- .../tmust_borrow_from_first_parameter.nim | 9 + tests/views/tsplit_into_seq.nim | 5 +- tests/views/tviews1.nim | 3 +- 11 files changed, 446 insertions(+), 66 deletions(-) create mode 100644 tests/views/tmust_borrow_from_first_parameter.nim diff --git a/compiler/ast.nim b/compiler/ast.nim index 6bcc6845d2..8adbfa15cb 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -1403,9 +1403,6 @@ proc copyType*(t: PType, owner: PSym, keepId: bool): PType = proc exactReplica*(t: PType): PType = copyType(t, t.owner, true) -template requiresInit*(t: PType): bool = - t.flags * {tfRequiresInit, tfNotNil} != {} - proc copySym*(s: PSym): PSym = result = newSym(s.kind, s.name, s.owner, s.info, s.options) #result.ast = nil # BUGFIX; was: s.ast which made problems diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index e26ea42dd4..cb852c68ae 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -1601,7 +1601,7 @@ proc takeImplicitAddr(c: PContext, n: PNode; isLent: bool): PNode = proc asgnToResultVar(c: PContext, n, le, ri: PNode) {.inline.} = if le.kind == nkHiddenDeref: var x = le[0] - if (x.typ.kind in {tyVar, tyLent} or isViewType(x.typ)) and x.kind == nkSym and x.sym.kind == skResult: + if (x.typ.kind in {tyVar, tyLent} or classifyViewType(x.typ) != noView) and x.kind == nkSym and x.sym.kind == skResult: n[0] = x # 'result[]' --> 'result' n[1] = takeImplicitAddr(c, ri, x.typ.kind == tyLent) x.typ.flags.incl tfVarIsPtr @@ -1712,8 +1712,8 @@ proc semAsgn(c: PContext, n: PNode; mode=asgnNormal): PNode = if le == nil: localError(c.config, a.info, "expression has no type") elif (skipTypes(le, {tyGenericInst, tyAlias, tySink}).kind notin {tyVar} and - isAssignable(c, a) in {arNone, arLentValue}) or - skipTypes(le, abstractVar).kind in {tyOpenArray, tyVarargs}: + isAssignable(c, a) in {arNone, arLentValue}) or ( + skipTypes(le, abstractVar).kind in {tyOpenArray, tyVarargs} and views notin c.features): # Direct assignment to a discriminant is allowed! localError(c.config, a.info, errXCannotBeAssignedTo % renderTree(a, {renderNoComments})) diff --git a/compiler/sempass2.nim b/compiler/sempass2.nim index 797aff7f71..d591da8550 100644 --- a/compiler/sempass2.nim +++ b/compiler/sempass2.nim @@ -10,7 +10,7 @@ import intsets, ast, astalgo, msgs, renderer, magicsys, types, idents, trees, wordrecg, strutils, options, guards, lineinfos, semfold, semdata, - modulegraphs, varpartitions + modulegraphs, varpartitions, typeallowed when defined(useDfa): import dfa @@ -1244,7 +1244,7 @@ proc trackProc*(c: PContext; s: PSym, body: PNode) = var mutationInfo = MutationInfo() if {strictFuncs, views} * c.features != {}: - var partitions = computeGraphPartitions(s, body) + var partitions = computeGraphPartitions(s, body, g.config) if not t.hasSideEffect and t.hasDangerousAssign: t.hasSideEffect = varpartitions.hasSideEffect(partitions, mutationInfo) if views in c.features: diff --git a/compiler/typeallowed.nim b/compiler/typeallowed.nim index 651a21ade6..aabaa46dd8 100644 --- a/compiler/typeallowed.nim +++ b/compiler/typeallowed.nim @@ -187,42 +187,77 @@ proc typeAllowed*(t: PType, kind: TSymKind; c: PContext; flags: TTypeAllowedFlag var marker = initIntSet() result = typeAllowedAux(marker, t, kind, c, flags) -proc isViewTypeAux(marker: var IntSet, t: PType): bool +type + ViewTypeKind* = enum + noView, immutableView, mutableView -proc isViewTypeNode(marker: var IntSet, n: PNode): bool = +proc combine(dest: var ViewTypeKind, b: ViewTypeKind) {.inline.} = + case dest + of noView, mutableView: + dest = b + of immutableView: + if b == mutableView: dest = b + +proc classifyViewTypeAux(marker: var IntSet, t: PType): ViewTypeKind + +proc classifyViewTypeNode(marker: var IntSet, n: PNode): ViewTypeKind = case n.kind of nkSym: - result = isViewTypeAux(marker, n.typ) + result = classifyViewTypeAux(marker, n.typ) of nkOfBranch: - result = isViewTypeNode(marker, n.lastSon) + result = classifyViewTypeNode(marker, n.lastSon) else: + result = noView for child in n: - result = isViewTypeNode(marker, child) - if result: break + result.combine classifyViewTypeNode(marker, child) + if result == mutableView: break -proc isViewTypeAux(marker: var IntSet, t: PType): bool = - if containsOrIncl(marker, t.id): return false +proc classifyViewTypeAux(marker: var IntSet, t: PType): ViewTypeKind = + if containsOrIncl(marker, t.id): return noView case t.kind - of tyVar, tyLent, tyVarargs, tyOpenArray: - result = true + of tyVar: + result = mutableView + of tyLent, tyVarargs, tyOpenArray: + result = immutableView of tyGenericInst, tyDistinct, tyAlias, tyInferred, tySink, tyOwned, - tyUncheckedArray, tySequence, tyArray, tyRef, tyStatic, tyFromExpr: - result = isViewTypeAux(marker, lastSon(t)) + tyUncheckedArray, tySequence, tyArray, tyRef, tyStatic: + result = classifyViewTypeAux(marker, lastSon(t)) + of tyFromExpr: + if t.len > 0: + result = classifyViewTypeAux(marker, lastSon(t)) + else: + result = noView of tyTuple: + result = noView for i in 0.. 1: + if (n.typ != nil and classifyViewType(n.typ) != noView) or getMagic(n) == mSlice: + n = n[1] + else: + break + else: + break + else: + break + # borrowFromConstExpr(n) is correct here because we need 'node' + # stripped off the path suffixes: + if result == nil and borrowFromConstExpr(n): + result = n + proc allRoots(n: PNode; result: var seq[PSym]; followDotExpr = true) = case n.kind of nkSym: @@ -385,13 +478,57 @@ proc pretendOwnsData(c: var Partitions, s: PSym) = const explainCursors = false +proc borrowFrom(c: var Partitions; dest: PSym; src: PNode) = + const + url = "; see https://nim-lang.github.io/Nim/manual_experimental.html#view-types-algorithm-path-expressions for details" + + let s = pathExpr(src, c.owner) + if s == nil: + localError(c.config, src.info, "cannot borrow from " & $src & ", it is not a path expression; " & url) + elif s.kind == nkSym: + if dest.kind == skResult: + if s.sym.kind != skParam or s.sym.position != 0: + localError(c.config, src.info, "'result' must borrow from the first parameter") + + let vid = variableId(c, dest) + if vid >= 0: + var sourceIdx = variableId(c, s.sym) + if sourceIdx < 0: + sourceIdx = c.s.len + c.s.add VarIndex(con: Connection(kind: isEmptyRoot), sym: s.sym, reassignedTo: 0, + aliveStart: MinTime, aliveEnd: MaxTime) + + c.s[vid].borrowsFrom.add sourceIdx + else: + discard "a valid borrow location that is a deeply constant expression so we have nothing to track" + + +proc borrowingCall(c: var Partitions; destType: PType; n: PNode; i: int) = + let v = pathExpr(n[i], c.owner) + if v.kind == nkSym: + for j in i+1..= 0: + for b in par.s[rid].borrowsFrom: + let sid = root(par, b) + if sid >= 0: + if par.s[sid].con.kind == isRootOf and dangerousMutation(par.graphs[par.s[sid].con.graphIndex], par.s[i]): + cannotBorrow(config, v, par.graphs[par.s[sid].con.graphIndex]) + if par.s[sid].sym.kind != skParam and par.s[sid].aliveEnd < par.s[rid].aliveEnd: + localError(config, v.info, "'" & v.name.s & "' borrows from location '" & par.s[sid].sym.name.s & + "' which does not live long enough") + + #if par.s[rid].con.kind == isRootOf and dangerousMutation(par.graphs[par.s[rid].con.graphIndex], par.s[i]): + # cannotBorrow(config, s, par.graphs[par.s[rid].con.graphIndex]) proc computeCursors*(s: PSym; n: PNode; config: ConfigRef) = - var par = computeGraphPartitions(s, n, true) + var par = computeGraphPartitions(s, n, config, true) for i in 0 ..< par.s.len: let v = addr(par.s[i]) if v.flags * {ownsData, preventCursor} == {} and v.sym.kind notin {skParam, skResult} and diff --git a/doc/manual.rst b/doc/manual.rst index 8c167bf625..11ea6b99cc 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -655,7 +655,7 @@ Precedence level Operators First ================ ======================================================= ================== =============== -Whether an operator is used a prefix operator is also affected by preceding +Whether an operator is used as a prefix operator is also affected by preceding whitespace (this parsing change was introduced with version 0.13.0): .. code-block:: nim @@ -3275,16 +3275,16 @@ programming and are inherently unsafe. .. code-block:: nim cast[int](x) - + The target type of a cast must be a concrete type, for instance, a target type that is a type class (which is non-concrete) would be invalid: .. code-block:: nim type Foo = int or float var x = cast[Foo](1) # Error: cannot cast to a non concrete type: 'Foo' - + Type casts should not be confused with *type conversions,* as mentioned in the -prior section. Unlike type conversions, a type cast cannot change the underlying +prior section. Unlike type conversions, a type cast cannot change the underlying bit pattern of the data being casted (aside from that the size of the target type may differ from the source type). Casting resembles *type punning* in other languages or C++'s ``reinterpret_cast`` and ``bit_cast`` features. diff --git a/doc/manual_experimental.rst b/doc/manual_experimental.rst index 459139f948..abb699d1f8 100644 --- a/doc/manual_experimental.rst +++ b/doc/manual_experimental.rst @@ -1814,18 +1814,49 @@ For example: # an object reachable from 'n' is potentially mutated -The algorithm behind this analysis is currently not documented. +The algorithm behind this analysis is described in +the `view types section <#view-types-algorithm>`_. View types ========== -A view type is a type that contains one of the following types: +**Note**: ``--experimental:views`` is more effective +with ``--experimental:strictFuncs``. + +A view type is a type that is or contains one of the following types: - ``var T`` (mutable view into ``T``) - ``lent T`` (immutable view into ``T``) - ``openArray[T]`` (pair of (pointer to array of ``T``, size)) +For example: + +.. code-block:: nim + + type + View1 = var int + View2 = openArray[byte] + View3 = lent string + View4 = Table[openArray[char], int] + + +Exceptions to this rule are types constructed via ``ptr`` or ``proc``. +For example, the following types are **not** view types: + +.. code-block:: nim + + type + NotView1 = proc (x: openArray[int]) + NotView2 = ptr openArray[char] + NotView3 = ptr array[4, var int] + + +A *mutable* view type is a type that is or contains a ``var T`` type. +An *immutable* view type is a view type that is not a mutable view type. + +A *view* is a symbol (a let, var, const, etc.) that has a view type. + Since version 1.4 Nim allows view types to be used as local variables. This feature needs to be enabled via ``{.experimental: "views".}``. @@ -1860,10 +1891,159 @@ For example: main(@[11, 22, 33]) +A local variable of a view type can borrow from a location +derived from a parameter, another local variable, a global ``const`` or ``let`` +symbol or a thread-local ``var`` or ``let``. -If a view type is used as a return type, the location must borrow from the -first parameter that is passed to the proc. +Let ``p`` the proc that is analysed for the correctness of the borrow operation. + +Let ``source`` be one of: + +- A formal parameter of ``p``. Note that this does not cover parameters of + inner procs. +- The ``result`` symbol of ``p``. +- A local ``var`` or ``let`` or ``const`` of ``p``. Note that this does + not cover locals of inner procs. +- A thread-local ``var`` or ``let``. +- A global ``let`` or ``const``. +- A constant arraq/seq/object/tuple constructor. + + +Path expressions +---------------- + +A location derived from ``source`` is then defined as a path expression that +has ``source`` as the owner. A path expression ``e`` is defined recursively: + +- ``source`` itself is a path expression. +- Container access like ``e[i]`` is a path expression. +- Tuple access ``e[0]`` is a path expression. +- Object field access ``e.field`` is a path expression. +- ``system.toOpenArray(e, ...)`` is a path expression. +- Pointer dereference ``e[]`` is a path expression. +- An address ``addr e``, ``unsafeAddr e`` is a path expression. +- A type conversion ``T(e)`` is a path expression. +- A cast expression ``cast[T](e)`` is a path expression. +- ``f(e, ...)`` is a path expression if ``f``'s return type is a view type. + Because the view can only have been borrowed from ``e``, we then know + that owner of ``f(e, ...)`` is ``e``. + + +If a view type is used as a return type, the location must borrow from a location +that is derived from the first parameter that is passed to the proc. See https://nim-lang.org/docs/manual.html#procedures-var-return-type for details about how this is done for ``var T``. -The algorithm behind this analysis is currently not documented. +A mutable view can borrow from a mutable location, an immutable view can borrow +from both a mutable or an immutable location. + +The *duration* of a borrow is the span of commands beginning from the assignment +to the view and ending with the last usage of the view. + +For the duration of the borrow operation, no mutations to the borrowed locations +may be performed except via the potentially mutable view that borrowed from the +location. The borrowed location is said to be *sealed* during the borrow. + +.. code-block:: nim + + {.experimental: "views".} + + type + Obj = object + field: string + + proc dangerous(s: var seq[Obj]) = + let v: lent Obj = s[0] # seal 's' + s.setLen 0 # prevented at compile-time because 's' is sealed. + echo v.field + + +The scope of the view does not matter: + +.. code-block:: nim + + proc valid(s: var seq[Obj]) = + let v: lent Obj = s[0] # begin of borrow + echo v.field # end of borrow + s.setLen 0 # valid because 'v' isn't used afterwards + + +The analysis requires as much precision about mutations as is reasonably obtainable, +so it is more effective with the experimental `strict funcs <#strict-funcs>`_ +feature. In other words ``--experimental:views`` works better +with ``--experimental:strictFuncs``. + +The analysis is currently control flow insensitive: + +.. code-block:: nim + + proc invalid(s: var seq[Obj]) = + let v: lent Obj = s[0] + if false: + s.setLen 0 + echo v.field + +In this example, the compiler assumes that ``s.setLen 0`` invalidates the +borrow operation of ``v`` even though a human being can easily see that it +will never do that at runtime. + + +Reborrows +--------- + +A view ``v`` can borrow from multiple different locations. However, the borrow +is always the full span of ``v``'s lifetime and every location that is borrowed +from is sealed during ``v``'s lifetime. + + +Algorithm +--------- + +The following section is an outline of the algorithm that the current implementation +uses. The algorithm performs two traversals over the AST of the procedure or global +section of code that uses a view variable. No fixpoint iterations are performed, the +complexity of the analysis is O(N) where N is the number of nodes of the AST. + +The first pass over the AST computes the lifetime of each local variable based on +a notion of an "abstract time", in the implementation it's a simple integer that is +incremented for every visited node. + +In the second pass information about the underlying object "graphs" is computed. +Let ``v`` be a parameter or a local variable. Let ``G(v)`` be the graph +that ``v`` belongs to. A graph is defined by the set of variables that belong +to the graph. Initially for all ``v``: ``G(v) = {v}``. Every variable can only +be part of a single graph. + +Assignments like ``a = b`` "connect" two variables, both variables end up in the +same graph ``{a, b} = G(a) = G(b)``. Unfortunately, the pattern to look for is +much more complex than that and can involve multiple assignment targets +and sources:: + + f(x, y) = g(a, b) + +connects ``x`` and ``y`` to ``a`` and ``b``: ``G(x) = G(y) = G(a) = G(b) = {x, y, a, b}``. +A type based alias analysis rules out some of these combinations, for example +a ``string`` value cannot possibly be connected to a ``seq[int]``. + +A pattern like ``v[] = value`` or ``v.field = value`` marks ``G(v)`` as mutated. +After the second pass a set of disjoint graphs was computed. + +For strict functions it is then enforced that there is no graph that is both mutated +and has an element that is an immutable parameter (that is a parameter that is not +of type ``var T``). + +For borrow checking a different set of checks is performed. Let ``v`` be the view +and ``b`` the location that is borrowed from. + +- The lifetime of ``v`` must not exceed ``b``'s lifetime. Note: The lifetime of + a parameter is the complete proc body. +- If ``v`` is a mutable view and ``v`` is used to actually mutate the + borrowed location, then ``b`` has to be a mutable location. + Note: If it is not actually used for mutation, borrowing a mutable view from an + immutable location is allowed! This allows for many important idioms and will be + justified in an upcoming RFC. +- During ``v``'s lifetime, ``G(b)`` can only be modified by ``v`` (and only if + ``v`` is a mutable view). +- If ``v`` is ``result`` then ``b`` has to be a location derived from the first + formal parameter or from a constant location. +- A view cannot be used for a read or a write access before it was assigned to. diff --git a/tests/views/tcannot_borrow.nim b/tests/views/tcannot_borrow.nim index 699176b046..d1c194b254 100644 --- a/tests/views/tcannot_borrow.nim +++ b/tests/views/tcannot_borrow.nim @@ -1,17 +1,22 @@ discard """ errormsg: "cannot borrow" - nimout: '''tcannot_borrow.nim(16, 7) Error: cannot borrow meh; what it borrows from is potentially mutated -tcannot_borrow.nim(17, 3) the mutation is here -tcannot_borrow.nim(16, 7) is the statement that connected the mutation to the parameter''' - line: 16 + nimout: '''tcannot_borrow.nim(21, 7) Error: cannot borrow meh; what it borrows from is potentially mutated +tcannot_borrow.nim(22, 3) the mutation is here''' + line: 21 """ + {.experimental: "views".} type Foo = object field: string +proc valid(s: var seq[Foo]) = + let v: lent Foo = s[0] # begin of borrow + echo v.field # end of borrow + s.setLen 0 # valid because 'v' isn't used afterwards + proc dangerous(s: var seq[Foo]) = let meh: lent Foo = s[0] s.setLen 0 diff --git a/tests/views/tmust_borrow_from_first_parameter.nim b/tests/views/tmust_borrow_from_first_parameter.nim new file mode 100644 index 0000000000..b2decfb38d --- /dev/null +++ b/tests/views/tmust_borrow_from_first_parameter.nim @@ -0,0 +1,9 @@ +discard """ + errormsg: "'result' must borrow from the first parameter" + line: 9 +""" + +{.experimental: "views".} + +proc p(a, b: openArray[char]): openArray[char] = + result = b diff --git a/tests/views/tsplit_into_seq.nim b/tests/views/tsplit_into_seq.nim index 409451601d..49968f4c20 100644 --- a/tests/views/tsplit_into_seq.nim +++ b/tests/views/tsplit_into_seq.nim @@ -23,9 +23,8 @@ proc split*(s: string, seps: set[char] = Whitespace, while last < len(s) and s[last] notin seps: inc(last) if splits == 0: last = len(s) - {.noSideEffect.}: - result.add toOpenArray(s, first, last-1) - result.add toOpenArray(s, first, last-1) + result.add toOpenArray(s, first, last-1) + result.add toOpenArray(s, first, last-1) if splits == 0: break dec(splits) inc(last) diff --git a/tests/views/tviews1.nim b/tests/views/tviews1.nim index 3ce0bb6d8c..51f17b9d66 100644 --- a/tests/views/tviews1.nim +++ b/tests/views/tviews1.nim @@ -4,6 +4,7 @@ discard """ 33 3 2 +3 3''' targets: "c cpp" """ @@ -22,6 +23,6 @@ proc main(s: seq[int]) = take(x.toOpenArray(0, 1)) let y = x take y - + take x main(@[11, 22, 33])