From be29bcd402287f24ae4dc6d13c48011ce6218359 Mon Sep 17 00:00:00 2001 From: Zoom Date: Wed, 1 Apr 2026 23:01:55 +0400 Subject: [PATCH] Fix `iterable` resolution, prefer iterator overloads (#25679) This fixes type resolution for `iterable[T]`. I want to proceed with RFC [#562](https://github.com/nim-lang/RFCs/issues/562) and this is the main blocker for composability. Fixes #22098 and, arguably, #19206 ```nim import std/strutils template collect[T](it: iterable[T]): seq[T] = block: var res: seq[T] = @[] for x in it: res.add x res const text = "a b c d" let words = text.split.collect() doAssert words == @[ "a", "b", "c", "d" ] ``` In cases like `strutils.split`, where both proc and iterator overload exists, the compiler resolves to the `func` overload causing a type mismatch. The old mode resolved `text.split` to `seq[string]` before the surrounding `iterable[T]` requirement was applied, so the argument no longer matched this template. It should be noted that, compared to older sequtils templates, composable chains based on `iterable[T]` require an iterator-producing expression, e.g. `"foo".items.iterableTmpl()` rather than just `"foo".iterableTmpl()`. This is actually desirable: it keeps the iteration boundary explicit and makes iterable-driven templates intentionally not directly interchangeable with older untyped/loosely-typed templates like those in `sequtils`, whose internal iterator setup we have zero control over (e.g. hard-coding adapters like `items`). Also, I noticed in `semstmts` that anonymous iterators are always `closure`, which is not that surprising if you think about it, but still I added a paragraph to the manual. Regarding implementation: From what I gathered, the root cause is that `semOpAux` eagerly pre-types all arguments with plain flags before overload resolution begins, so by the time `prepareOperand` processes `split` against the `iterable[T]`, the wrong overload has already won. The fix touches a few places: - `prepareOperand` in `sigmatch.nim`: When `formal.kind == tyIterable` and the argument was already typed as something else, it's re-semchecked with the `efPreferIteratorForIterable` flag. The recheck is limited to direct calls (`a[0].kind in {nkIdent, nkAccQuoted, nkSym, nkOpenSym}`) to avoid recursing through `semIndirectOp`/`semOpAux` again. - `iteratorPreference` field `TCandidate`, checked before `genericMatches` in `cmpCandidates`, gives the iterator overload a win without touching the existing iterator heuristic used by `for` loops. **Limitations:** The implementation is still flag-driven rather than purely formal-driven, so the behaviour is a bit too broad `efWantIterable` can cause iterator results to be wrapped as `tyIterable` in iterable-admitting contexts, not only when `iterable[T]` match is being processed. `iterable[T]` still does not accept closure iterator values such as`iterator(): T {.closure.}`. It only matches the compiler's internal `tyIterable`, not arbitrary iterator-typed values. The existing iterator-preference heuristic is still in place, because when I tried to remove it, some loosely-related regressions happened. In particular, ordinary iterator-admitting contexts and iterator chains still rely on early iterator preference during semchecking, before the compiler has enough surrounding context to distinguish between value/iterator producing overloads. Full heuristic removal would require a broader refactor of dot-chain/intermediate-expression semchecking, which is just too much for me ATM. This PR narrows only the tyIterable-specific cases. **Future work:** Rework overload resolution to preserve additional information of matching iterator overloads for calls up to the point where the iterator-requiring context is established, to avoid re-sem in `prepareOperand`. Currently there's no good channel to store that information. Nodes can get rewritten, TCandidate doesn't live long enough, storing in Context or some side-table raises the question how to properly key that info. --- compiler/semcall.nim | 10 +++++-- compiler/semdata.nim | 13 ++++++++- compiler/semexprs.nim | 7 +++-- compiler/semstmts.nim | 27 +++++++++++------- compiler/sigmatch.nim | 26 +++++++++++++++-- doc/manual.md | 44 ++++++++++++++++------------- tests/iter/tinlineitervalue.nim | 6 ++++ tests/iter/titerablereso.nim | 50 +++++++++++++++++++++++++++++++++ 8 files changed, 144 insertions(+), 39 deletions(-) create mode 100644 tests/iter/tinlineitervalue.nim create mode 100644 tests/iter/titerablereso.nim diff --git a/compiler/semcall.nim b/compiler/semcall.nim index 29d19875d4..986b847fd2 100644 --- a/compiler/semcall.nim +++ b/compiler/semcall.nim @@ -160,9 +160,13 @@ proc pickBestCandidate(c: PContext, headSymbol: PNode, addTypeBoundSymbols(c.graph, arg.typ, name, filter, symMarker, syms) if z.state == csMatch: - # little hack so that iterators are preferred over everything else: + # Iterator preference is heuristic in iterator-admitting contexts. + # The dedicated iterable path uses `iteratorPreference`, other + # context use exact-match bump if sym.kind == skIterator: - if not (efWantIterator notin flags and efWantIterable in flags): + if efPreferIteratorForIterable in flags: + inc(z.iteratorPreference) + elif not (efWantIterator notin flags and efWantIterable in flags): inc(z.exactMatches, 200) else: dec(z.exactMatches, 200) @@ -671,7 +675,7 @@ proc bracketNotFoundError(c: PContext; n: PNode; flags: TExprFlags) = # copied from semOverloadedCallAnalyzeEffects, might be overkill: const baseFilter = {skProc, skFunc, skMethod, skConverter, skMacro, skTemplate} let filter = - if flags*{efInTypeof, efWantIterator, efWantIterable} != {}: + if flags*{efInTypeof, efWantIterator, efWantIterable, efPreferIteratorForIterable} != {}: baseFilter + {skIterator} else: baseFilter # this will add the errors: diff --git a/compiler/semdata.nim b/compiler/semdata.nim index a3aae559fd..0fc000051c 100644 --- a/compiler/semdata.nim +++ b/compiler/semdata.nim @@ -54,7 +54,18 @@ type inst*: PInstantiation TExprFlag* = enum - efLValue, efWantIterator, efWantIterable, efInTypeof, + efLValue, + # The expression is used as an assignable location. + efWantIterator, + # Admit iterator candidates and prefer them during overload resolution. + efWantIterable, + # Admit iterator candidates for expressions that may feed iterable-style + # chaining. + efPreferIteratorForIterable, + # Prefer iterator candidates for `iterable[T]` matching and wrap a + # successful iterator call as `tyIterable`. + efInTypeof, + # The expression is being semchecked under `typeof`. efNeedStatic, # Use this in contexts where a static value is mandatory efPreferStatic, diff --git a/compiler/semexprs.nim b/compiler/semexprs.nim index 2763adc074..96a8415807 100644 --- a/compiler/semexprs.nim +++ b/compiler/semexprs.nim @@ -979,7 +979,7 @@ proc semStaticExpr(c: PContext, n: PNode; expectedType: PType = nil): PNode = proc semOverloadedCallAnalyseEffects(c: PContext, n: PNode, nOrig: PNode, flags: TExprFlags; expectedType: PType = nil): PNode = - if flags*{efInTypeof, efWantIterator, efWantIterable} != {}: + if flags*{efInTypeof, efWantIterator, efWantIterable, efPreferIteratorForIterable} != {}: # consider: 'for x in pReturningArray()' --> we don't want the restriction # to 'skIterator' anymore; skIterator is preferred in sigmatch already # for typeof support. @@ -1006,7 +1006,8 @@ proc semOverloadedCallAnalyseEffects(c: PContext, n: PNode, nOrig: PNode, # See bug #2051: result[0] = newSymNode(errorSym(c, n)) elif callee.kind == skIterator: - if efWantIterable in flags: + if result.typ.kind != tyIterable and + flags * {efWantIterable, efPreferIteratorForIterable} != {}: let typ = newTypeS(tyIterable, c) rawAddSon(typ, result.typ) result.typ = typ @@ -1525,7 +1526,7 @@ proc builtinFieldAccess(c: PContext; n: PNode; flags: var TExprFlags): PNode = return # extra flags since LHS may become a call operand: - n[0] = semExprWithType(c, n[0], flags+{efDetermineType, efWantIterable, efAllowSymChoice}) + n[0] = semExprWithType(c, n[0], flags + {efDetermineType, efWantIterable, efAllowSymChoice}) #restoreOldStyleType(n[0]) var i = considerQuotedIdent(c, n[1], n) var ty = n[0].typ diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim index 285b84cfc7..398707bd12 100644 --- a/compiler/semstmts.nim +++ b/compiler/semstmts.nim @@ -1096,7 +1096,12 @@ proc symForVar(c: PContext, n: PNode): PSym = proc semForVars(c: PContext, n: PNode; flags: TExprFlags): PNode = result = n let iterBase = n[^2].typ - var iter = skipTypes(iterBase, {tyGenericInst, tyAlias, tySink, tyOwned}) + let iterType = + if iterBase.kind == tyIterable: + iterBase.skipModifier + else: + skipTypes(iterBase, {tyAlias, tySink, tyOwned}) + var iter = iterType var iterAfterVarLent = iter.skipTypes({tyGenericInst, tyAlias, tyLent, tyVar}) # n.len == 3 means that there is one for loop variable # and thus no tuple unpacking: @@ -1129,10 +1134,9 @@ proc semForVars(c: PContext, n: PNode; flags: TExprFlags): PNode = else: var v = symForVar(c, n[0]) if getCurrOwner(c).kind == skModule: incl(v, sfGlobal) - # BUGFIX: don't use `iter` here as that would strip away - # the ``tyGenericInst``! See ``tests/compile/tgeneric.nim`` - # for an example: - v.typ = iterBase + # Use `iterType` here: it removes outer `tyIterable` / alias-like wrappers + # from the loop source, but still preserves `tyGenericInst` for the loop var. + v.typ = iterType n[0] = newSymNode(v) if sfGenSym notin v.flags and not isDiscardUnderscore(v): addDecl(c, v) elif v.owner == nil: setOwner(v, getCurrOwner(c)) @@ -1196,14 +1200,14 @@ proc semForVars(c: PContext, n: PNode; flags: TExprFlags): PNode = c.p.breakInLoop = oldBreakInLoop dec(c.p.nestedLoopCounter) -proc implicitIterator(c: PContext, it: string, arg: PNode): PNode = +proc implicitIterator(c: PContext, it: string, arg: PNode, flags: TExprFlags): PNode = result = newNodeI(nkCall, arg.info) result.add(newIdentNode(getIdent(c.cache, it), arg.info)) if arg.typ != nil and arg.typ.kind in {tyVar, tyLent}: result.add newDeref(arg) else: result.add arg - result = semExprNoDeref(c, result, {efWantIterator}) + result = semExprNoDeref(c, result, flags + {efWantIterator}) proc isTrivalStmtExpr(n: PNode): bool = for i in 0..