Fix iterable resolution, prefer iterator overloads (#25679)

This fixes type resolution for `iterable[T]`. I want to proceed with RFC [#562](https://github.com/nim-lang/RFCs/issues/562) and this is the main blocker for composability. Fixes #22098 and, arguably, #19206 ```nim import std/strutils template collect[T](it: iterable[T]): seq[T] = block: var res: seq[T] = @[] for x in it: res.add x res const text = "a b c d" let words = text.split.collect() doAssert words == @[ "a", "b", "c", "d" ] ``` In cases like `strutils.split`, where both proc and iterator overload exists, the compiler resolves to the `func` overload causing a type mismatch. The old mode resolved `text.split` to `seq[string]` before the surrounding `iterable[T]` requirement was applied, so the argument no longer matched this template. It should be noted that, compared to older sequtils templates, composable chains based on `iterable[T]` require an iterator-producing expression, e.g. `"foo".items.iterableTmpl()` rather than just `"foo".iterableTmpl()`. This is actually desirable: it keeps the iteration boundary explicit and makes iterable-driven templates intentionally not directly interchangeable with older untyped/loosely-typed templates like those in `sequtils`, whose internal iterator setup we have zero control over (e.g. hard-coding adapters like `items`). Also, I noticed in `semstmts` that anonymous iterators are always `closure`, which is not that surprising if you think about it, but still I added a paragraph to the manual. Regarding implementation: From what I gathered, the root cause is that `semOpAux` eagerly pre-types all arguments with plain flags before overload resolution begins, so by the time `prepareOperand` processes `split` against the `iterable[T]`, the wrong overload has already won. The fix touches a few places: - `prepareOperand` in `sigmatch.nim`: When `formal.kind == tyIterable` and the argument was already typed as something else, it's re-semchecked with the `efPreferIteratorForIterable` flag. The recheck is limited to direct calls (`a[0].kind in {nkIdent, nkAccQuoted, nkSym, nkOpenSym}`) to avoid recursing through `semIndirectOp`/`semOpAux` again. - `iteratorPreference` field `TCandidate`, checked before `genericMatches` in `cmpCandidates`, gives the iterator overload a win without touching the existing iterator heuristic used by `for` loops. **Limitations:** The implementation is still flag-driven rather than purely formal-driven, so the behaviour is a bit too broad `efWantIterable` can cause iterator results to be wrapped as `tyIterable` in iterable-admitting contexts, not only when `iterable[T]` match is being processed. `iterable[T]` still does not accept closure iterator values such as`iterator(): T {.closure.}`. It only matches the compiler's internal `tyIterable`, not arbitrary iterator-typed values. The existing iterator-preference heuristic is still in place, because when I tried to remove it, some loosely-related regressions happened. In particular, ordinary iterator-admitting contexts and iterator chains still rely on early iterator preference during semchecking, before the compiler has enough surrounding context to distinguish between value/iterator producing overloads. Full heuristic removal would require a broader refactor of dot-chain/intermediate-expression semchecking, which is just too much for me ATM. This PR narrows only the tyIterable-specific cases. **Future work:** Rework overload resolution to preserve additional information of matching iterator overloads for calls up to the point where the iterator-requiring context is established, to avoid re-sem in `prepareOperand`. Currently there's no good channel to store that information. Nodes can get rewritten, TCandidate doesn't live long enough, storing in Context or some side-table raises the question how to properly key that info.
2026-07-11 03:39:31 +00:00 · 2026-04-01 23:01:55 +04:00
parent 9c07bb94c1
commit be29bcd402
8 changed files with 144 additions and 39 deletions
--- a/compiler/semcall.nim
+++ b/compiler/semcall.nim
@@ -160,9 +160,13 @@ proc pickBestCandidate(c: PContext, headSymbol: PNode,
          addTypeBoundSymbols(c.graph, arg.typ, name, filter, symMarker, syms)

      if z.state == csMatch:
-        # little hack so that iterators are preferred over everything else:
+        # Iterator preference is heuristic in iterator-admitting contexts.
+        # The dedicated iterable path uses `iteratorPreference`, other
+        # context use exact-match bump
        if sym.kind == skIterator:
-          if not (efWantIterator notin flags and efWantIterable in flags):
+          if efPreferIteratorForIterable in flags:
+            inc(z.iteratorPreference)
+          elif not (efWantIterator notin flags and efWantIterable in flags):
            inc(z.exactMatches, 200)
          else:
            dec(z.exactMatches, 200)
@@ -671,7 +675,7 @@ proc bracketNotFoundError(c: PContext; n: PNode; flags: TExprFlags) =
  # copied from semOverloadedCallAnalyzeEffects, might be overkill:
  const baseFilter = {skProc, skFunc, skMethod, skConverter, skMacro, skTemplate}
  let filter =
-    if flags*{efInTypeof, efWantIterator, efWantIterable} != {}:
+    if flags*{efInTypeof, efWantIterator, efWantIterable, efPreferIteratorForIterable} != {}:
      baseFilter + {skIterator}
    else: baseFilter
  # this will add the errors:
--- a/compiler/semdata.nim
+++ b/compiler/semdata.nim
@@ -54,7 +54,18 @@ type
    inst*: PInstantiation

  TExprFlag* = enum
-    efLValue, efWantIterator, efWantIterable, efInTypeof,
+    efLValue,
+      # The expression is used as an assignable location.
+    efWantIterator,
+      # Admit iterator candidates and prefer them during overload resolution.
+    efWantIterable,
+      # Admit iterator candidates for expressions that may feed iterable-style
+      # chaining.
+    efPreferIteratorForIterable,
+      # Prefer iterator candidates for `iterable[T]` matching and wrap a
+      # successful iterator call as `tyIterable`.
+    efInTypeof,
+      # The expression is being semchecked under `typeof`.
    efNeedStatic,
      # Use this in contexts where a static value is mandatory
    efPreferStatic,
--- a/compiler/semexprs.nim
+++ b/compiler/semexprs.nim
@@ -979,7 +979,7 @@ proc semStaticExpr(c: PContext, n: PNode; expectedType: PType = nil): PNode =

 proc semOverloadedCallAnalyseEffects(c: PContext, n: PNode, nOrig: PNode,
                                     flags: TExprFlags; expectedType: PType = nil): PNode =
-  if flags*{efInTypeof, efWantIterator, efWantIterable} != {}:
+  if flags*{efInTypeof, efWantIterator, efWantIterable, efPreferIteratorForIterable} != {}:
    # consider: 'for x in pReturningArray()' --> we don't want the restriction
    # to 'skIterator' anymore; skIterator is preferred in sigmatch already
    # for typeof support.
@@ -1006,7 +1006,8 @@ proc semOverloadedCallAnalyseEffects(c: PContext, n: PNode, nOrig: PNode,
        # See bug #2051:
        result[0] = newSymNode(errorSym(c, n))
      elif callee.kind == skIterator:
-        if efWantIterable in flags:
+        if result.typ.kind != tyIterable and
+            flags * {efWantIterable, efPreferIteratorForIterable} != {}:
          let typ = newTypeS(tyIterable, c)
          rawAddSon(typ, result.typ)
          result.typ = typ
@@ -1525,7 +1526,7 @@ proc builtinFieldAccess(c: PContext; n: PNode; flags: var TExprFlags): PNode =
    return

  # extra flags since LHS may become a call operand:
-  n[0] = semExprWithType(c, n[0], flags+{efDetermineType, efWantIterable, efAllowSymChoice})
+  n[0] = semExprWithType(c, n[0], flags + {efDetermineType, efWantIterable, efAllowSymChoice})
  #restoreOldStyleType(n[0])
  var i = considerQuotedIdent(c, n[1], n)
  var ty = n[0].typ
--- a/compiler/semstmts.nim
+++ b/compiler/semstmts.nim
@@ -1096,7 +1096,12 @@ proc symForVar(c: PContext, n: PNode): PSym =
 proc semForVars(c: PContext, n: PNode; flags: TExprFlags): PNode =
  result = n
  let iterBase = n[^2].typ
-  var iter = skipTypes(iterBase, {tyGenericInst, tyAlias, tySink, tyOwned})
+  let iterType =
+    if iterBase.kind == tyIterable:
+      iterBase.skipModifier
+    else:
+      skipTypes(iterBase, {tyAlias, tySink, tyOwned})
+  var iter = iterType
  var iterAfterVarLent = iter.skipTypes({tyGenericInst, tyAlias, tyLent, tyVar})
  # n.len == 3 means that there is one for loop variable
  # and thus no tuple unpacking:
@@ -1129,10 +1134,9 @@ proc semForVars(c: PContext, n: PNode; flags: TExprFlags): PNode =
      else:
        var v = symForVar(c, n[0])
        if getCurrOwner(c).kind == skModule: incl(v, sfGlobal)
-        # BUGFIX: don't use `iter` here as that would strip away
-        # the ``tyGenericInst``! See ``tests/compile/tgeneric.nim``
-        # for an example:
-        v.typ = iterBase
+        # Use `iterType` here: it removes outer `tyIterable` / alias-like wrappers
+        # from the loop source, but still preserves `tyGenericInst` for the loop var.
+        v.typ = iterType
        n[0] = newSymNode(v)
        if sfGenSym notin v.flags and not isDiscardUnderscore(v): addDecl(c, v)
        elif v.owner == nil: setOwner(v, getCurrOwner(c))
@@ -1196,14 +1200,14 @@ proc semForVars(c: PContext, n: PNode; flags: TExprFlags): PNode =
  c.p.breakInLoop = oldBreakInLoop
  dec(c.p.nestedLoopCounter)

-proc implicitIterator(c: PContext, it: string, arg: PNode): PNode =
+proc implicitIterator(c: PContext, it: string, arg: PNode, flags: TExprFlags): PNode =
  result = newNodeI(nkCall, arg.info)
  result.add(newIdentNode(getIdent(c.cache, it), arg.info))
  if arg.typ != nil and arg.typ.kind in {tyVar, tyLent}:
    result.add newDeref(arg)
  else:
    result.add arg
-  result = semExprNoDeref(c, result, {efWantIterator})
+  result = semExprNoDeref(c, result, flags + {efWantIterator})

 proc isTrivalStmtExpr(n: PNode): bool =
  for i in 0..<n.len-1:
@@ -1289,7 +1293,8 @@ proc semFor(c: PContext, n: PNode; flags: TExprFlags): PNode =
  if result != nil: return result
  openScope(c)
  result = n
-  n[^2] = semExprNoDeref(c, n[^2], {efWantIterator})
+  let iteratorFlags = flags * {efPreferIteratorForIterable}
+  n[^2] = semExprNoDeref(c, n[^2], iteratorFlags + {efWantIterator})
  var call = n[^2]

  if call.kind == nkStmtListExpr and (isTrivalStmtExpr(call) or (call.lastSon.kind in nkCallKinds and call.lastSon[0].sym.kind == skIterator)):
@@ -1309,14 +1314,16 @@ proc semFor(c: PContext, n: PNode; flags: TExprFlags): PNode =
  elif not isCallExpr or call[0].kind != nkSym or
      call[0].sym.kind != skIterator:
    if n.len == 3:
-      n[^2] = implicitIterator(c, "items", n[^2])
+      n[^2] = implicitIterator(c, "items", n[^2], iteratorFlags)
    elif n.len == 4:
-      n[^2] = implicitIterator(c, "pairs", n[^2])
+      n[^2] = implicitIterator(c, "pairs", n[^2], iteratorFlags)
    else:
      localError(c.config, n[^2].info, "iterator within for loop context expected")
    result = semForVars(c, n, flags)
  else:
    result = semForVars(c, n, flags)
+  if n[^2].typ != nil and n[^2].typ.kind == tyIterable:
+    n[^2].typ = n[^2].typ.skipModifier
  # propagate any enforced VoidContext:
  if n[^1].typ == c.enforceVoidContext:
    result.typ = c.enforceVoidContext
--- a/compiler/sigmatch.nim
+++ b/compiler/sigmatch.nim
@@ -46,7 +46,8 @@ type

  TCandidate* = object
    c*: PContext
-    exactMatches*: int       # also misused to prefer iters over procs
+    exactMatches*: int
+    iteratorPreference*: int # prefer iterators in iterator-oriented contexts
    genericMatches: int      # also misused to prefer constraints
    subtypeMatches: int
    intConvMatches: int      # conversions to int are not as expensive
@@ -110,7 +111,8 @@ proc markOwnerModuleAsUsed*(c: PContext; s: PSym)
 proc initCandidateAux(ctx: PContext,
                      callee: PType): TCandidate {.inline.} =
  result = TCandidate(c: ctx, exactMatches: 0, subtypeMatches: 0,
-                      convMatches: 0, intConvMatches: 0, genericMatches: 0,
+                      iteratorPreference: 0, convMatches: 0, intConvMatches: 0,
+                      genericMatches: 0,
                      state: csEmpty, firstMismatch: MismatchInfo(),
                      callee: callee, call: nil, baseTypeMatch: false,
                      genericConverter: false, inheritancePenalty: -1
@@ -393,6 +395,7 @@ proc complexDisambiguation(a, b: PType): int =
 proc writeMatches*(c: TCandidate) =
  echo "Candidate '", c.calleeSym.name.s, "' at ", c.c.config $ c.calleeSym.info
  echo "  exact matches: ", c.exactMatches
+  echo "  iterator preference: ", c.iteratorPreference
  echo "  generic matches: ", c.genericMatches
  echo "  subtype matches: ", c.subtypeMatches
  echo "  intconv matches: ", c.intConvMatches
@@ -411,6 +414,8 @@ proc cmpInheritancePenalty(a, b: int): int =
 proc cmpCandidates*(a, b: TCandidate, isFormal=true): int =
  result = a.exactMatches - b.exactMatches
  if result != 0: return
+  result = a.iteratorPreference - b.iteratorPreference
+  if result != 0: return
  result = a.genericMatches - b.genericMatches
  if result != 0: return
  result = a.subtypeMatches - b.subtypeMatches
@@ -2748,7 +2753,8 @@ proc prepareOperand(c: PContext; formal: PType; a: PNode, newlyTyped: var bool):
    result = a
  elif a.typ.isNil:
    if formal.kind == tyIterable:
-      let flags = {efDetermineType, efAllowStmt, efWantIterator, efWantIterable}
+      let flags = {efDetermineType, efAllowStmt, efWantIterator, efWantIterable,
+                   efPreferIteratorForIterable}
      result = c.semOperand(c, a, flags)
    else:
      # XXX This is unsound! 'formal' can differ from overloaded routine to
@@ -2765,6 +2771,20 @@ proc prepareOperand(c: PContext; formal: PType; a: PNode, newlyTyped: var bool):
    considerGenSyms(c, result)
    if result.kind != nkHiddenDeref and result.typ.kind in {tyVar, tyLent} and c.matchedConcept == nil:
      result = newDeref(result)
+    # Recovery for calls resolved too early as non-iterators.
+    # TODO: retry only skIterator overloads instead of re-semming,
+    # or preserve iterator-candidates info from the earlier semcheck.
+    if formal.kind == tyIterable and result.typ.kind != tyIterable and
+        a.kind in nkCallKinds and a[0].kind in {nkIdent, nkAccQuoted, nkSym, nkOpenSym}:
+      let recheck = copyTree(a)
+      recheck.typ = nil
+      if recheck[0].kind == nkSym and recheck[0].sym != nil:
+        recheck[0] = newIdentNode(recheck[0].sym.name, recheck[0].info)
+      let flags = {efDetermineType, efAllowStmt, efNoUndeclared,
+                   efWantIterator, efWantIterable, efPreferIteratorForIterable}
+      let fresh = c.semOperand(c, recheck, flags)
+      if fresh.typ != nil and fresh.typ.kind == tyIterable:
+        return fresh

 proc prepareOperand(c: PContext; a: PNode, newlyTyped: var bool): PNode =
  if a.typ.isNil:
--- a/doc/manual.md
+++ b/doc/manual.md
@@ -2628,10 +2628,10 @@ Overload resolution
 In a call `p(args)` where `p` may refer to more than one
 candidate, it is said to be a symbol choice. Overload resolution will attempt to
 find the best candidate, thus transforming the symbol choice into a resolved symbol.
-The routine `p` that matches best is selected following a series of trials explained below. 
+The routine `p` that matches best is selected following a series of trials explained below.
 In order: Category matching, Hierarchical Order Comparison, and finally, Complexity Analysis.

-If multiple candidates match equally well after all trials have been tested, the ambiguity 
+If multiple candidates match equally well after all trials have been tested, the ambiguity
 is reported during semantic analysis.

 First Trial: Category matching
@@ -2664,7 +2664,7 @@ resolved symbol.
 For example, if a candidate with one exact match is compared to a candidate with multiple
 generic matches and zero exact matches, the candidate with an exact match will win.

-Below is a pseudocode interpretation of category matching, `count(p, m)` counts the number 
+Below is a pseudocode interpretation of category matching, `count(p, m)` counts the number
 of matches of the matching category `m` for the routine `p`.

 A routine `p` matches better than a routine `q` if the following
@@ -2692,11 +2692,11 @@ type A[T] = object
 ```

 Matching formals for this type include `T`, `object`, `A`, `A[...]` and `A[C]` where `C` is a concrete type, `A[...]`
-is a generic typeclass composition and `T` is an unconstrained generic type variable. This list is in order of 
+is a generic typeclass composition and `T` is an unconstrained generic type variable. This list is in order of
 specificity with respect to `A` as each subsequent category narrows the set of types that are members of their match set.

 In this trial, the formal parameters of candidates are compared in order (1st parameter, 2nd parameter, etc.) to search for
-a candidate that has an unrivaled specificity. If such a formal parameter is found, the candidate it belongs to is chosen 
+a candidate that has an unrivaled specificity. If such a formal parameter is found, the candidate it belongs to is chosen
 as the resolved symbol.

 Third Trial: Complexity Analysis
@@ -2951,13 +2951,13 @@ proc sort*[I: Index; T: Comparable](x: var Indexable[I, T])

 In the above example, `Comparable` and `Indexable` are types that will match any type that
 can can bind each definition declared in the concept body. The special `Self` type defined
-in the concept body refers to the type being matched, also called the "implementation" of 
-the concept. Implementations that match the concept are generic matches, and the concept 
+in the concept body refers to the type being matched, also called the "implementation" of
+the concept. Implementations that match the concept are generic matches, and the concept
 typeclasses themselves work in a similar way to generic type variables in that they are never
 concrete types themselves (even if they have concrete type parameters such as `Indexable[int, int]`)
-and expressions like `typeof(x)` in the body of `proc sort` from the above example will return the 
+and expressions like `typeof(x)` in the body of `proc sort` from the above example will return the
 type of the implementation, not the concept typeclass. Concepts are useful for providing information
-to the compiler in generic contexts, most notably for generic type checking, and as a tool for 
+to the compiler in generic contexts, most notably for generic type checking, and as a tool for
 [Overload resolution]. Generic type checking is forthcoming, so this will only explain overload
 resolution for now.

@@ -2984,7 +2984,7 @@ Concept overload resolution

 When an operand's type is being matched to a concept, the operand's type  is set as the "potential
 implementation". For each definition in the concept body, overload resolution is performed by substituting `Self`
-for the potential implementation to try and find a match for each definition. If this succeeds, the concept 
+for the potential implementation to try and find a match for each definition. If this succeeds, the concept
 matches. Implementations do not need to exactly match the definitions in the concept. For example:

 ```nim
@@ -3008,7 +3008,7 @@ This leads to confusing and impractical behavior in most situations, so the rule
 1. if a concept is being compared with `T` or any type that accepts all other types (`auto`) the concept
 is more specific
 2. if the concept is being compared with another concept the result is deferred to [Concept subset matching]
-3. in any other case the concept is less specific then it's competitor 
+3. in any other case the concept is less specific then it's competitor

 Currently, the concept evaluation mechanism evaluates to a successful match on the first acceptable candidate
 for each defined binding. This has a couple of notable effects:
@@ -4610,10 +4610,10 @@ for any type (with some exceptions) by defining a routine with the name `[]`.
  ```nim
  type Foo = object
    data: seq[int]
-  
+
  proc `[]`(foo: Foo, i: int): int =
    result = foo.data[i]
-  
+
  let foo = Foo(data: @[1, 2, 3])
  echo foo[1] # 2
  ```
@@ -4624,12 +4624,12 @@ which has precedence over assigning to the result of `[]`.
  ```nim
  type Foo = object
    data: seq[int]
-  
+
  proc `[]`(foo: Foo, i: int): int =
    result = foo.data[i]
  proc `[]=`(foo: var Foo, i: int, val: int) =
    foo.data[i] = val
-  
+
  var foo = Foo(data: @[1, 2, 3])
  echo foo[1] # 2
  foo[1] = 5
@@ -4861,7 +4861,14 @@ default to being inline, but this may change in future versions of the
 implementation.

 The `iterator` type is always of the calling convention `closure`
-implicitly; the following example shows how to use iterators to implement
+implicitly.
+
+Unlike named iterators, anonymous iterator expressions evaluate
+to the `iterator` type. In practice, this means a named iterator declaration
+without `{.closure.}` defaults to inline, but an expression like `let it =
+iterator(): int = yield 1` produces a callable closure iterator value.
+
+The following example shows how to use iterators to implement
 a `collaborative tasking`:idx: system:

  ```nim
@@ -6401,7 +6408,7 @@ The default for symbols of entity `type`, `var`, `let` and `const`
 is `gensym`. For `proc`, `iterator`, `converter`, `template`,
 `macro`, the default is `inject`, but if a `gensym` symbol with the same name
 is defined in the same syntax-level scope, it will be `gensym` by default.
-This can be overridden by marking the routine as `inject`. 
+This can be overridden by marking the routine as `inject`.

 If the name of the entity is passed as a template parameter, it is an `inject`'ed symbol:

@@ -7242,7 +7249,7 @@ identifier is considered ambiguous, which can be resolved in the following ways:

  write(stdout, x) # error: x is ambiguous
  write(stdout, A.x) # no error: qualifier used
-  
+
  proc bar(a: int): int = a + 1
  assert bar(x) == x + 1 # no error: only A.x of type int matches

@@ -9324,4 +9331,3 @@ It is not valid to pass an lvalue of a supertype to an `out T` parameter:

 However, in the future this could be allowed and provide a better way to write object
 constructors that take inheritance into account.
-
--- a/tests/iter/tinlineitervalue.nim
+++ b/tests/iter/tinlineitervalue.nim
@@ -0,0 +1,6 @@
+discard """
+  action: reject
+  errormsg: "attempting to call routine: 'items'"
+"""
+
+let chars = "abc".items()
--- a/tests/iter/titerablereso.nim
+++ b/tests/iter/titerablereso.nim
@@ -0,0 +1,50 @@
+discard """
+  action: "run"
+"""
+
+import std/[assertions, options, strutils]
+from std/sequtils import toSeq
+
+# block: # TODO: make iterable accept closure iterators?
+#   template mymap[T, U](s: iterable[T], f: proc(x: T): U): untyped =
+#     let res = iterator (): U =
+#       for val in s:
+#         yield f(val)
+#     res
+
+#   proc foo(x: string): string = x & "0"
+
+#   let a = "1\n2\n3\n4".splitLines().mymap(foo).toSeq()
+#   echo a
+#   echo typeof(a)
+
+block splitIterable: # #22098
+  template collect[T](it: iterable[T]): seq[T] =
+    var res: seq[T] = @[]
+    for x in it:
+      res.add x
+    res
+
+  const text = "a b c d"
+  let words = text.split.collect()
+  doAssert words == @["a", "b", "c", "d"]
+
+block optionElements:
+  iterator its(_: int; default: Option[string] = none(string)): Option[string] =
+    yield some("x")
+
+  var fromCall = none(string)
+  for x in its(0):
+    fromCall = x
+  doAssert fromCall == some("x")
+
+  var fromDot = none(string)
+  for x in 0.its:
+    fromDot = x
+  doAssert fromDot == some("x")
+
+block closureIteratorCallsStayCallable:
+  let next = iterator (): string =
+    yield "x"
+
+  doAssert next() == "x"