From da419ea8c076f48f9e73acd19da133f9bf53a7f7 Mon Sep 17 00:00:00 2001 From: Araq Date: Mon, 8 Jun 2026 15:43:56 +0200 Subject: [PATCH] progress --- compiler/ast.nim | 8 +++++++- compiler/ast2nif.nim | 38 +++++++++++++++++++++++--------------- compiler/ccgexprs.nim | 15 ++++++++++++--- compiler/ccgstmts.nim | 7 ++++++- compiler/commands.nim | 5 +++++ compiler/deps.nim | 6 ++++++ compiler/lambdalifting.nim | 17 +++++++++++++++-- compiler/options.nim | 6 ++++++ compiler/semcall.nim | 10 ++++++++-- compiler/semfold.nim | 8 +++++++- 10 files changed, 95 insertions(+), 25 deletions(-) diff --git a/compiler/ast.nim b/compiler/ast.nim index 93688df604..a1c6b797e2 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -228,7 +228,10 @@ proc position*(s: PSym): int {.inline.} = result = s.positionImpl proc `position=`*(s: PSym, val: int) {.inline.} = - assert s.state != Sealed + # No `Sealed` guard: the VM reuses `position` as a register slot while compiling + # a macro for execution (see `vmgen.genGenericParams`), which under IC may be a + # macro loaded from a NIF file. The macro is run, not code-generated, so this + # scratch mutation is harmless. if s.state == Partial: loadSym(s) s.positionImpl = val @@ -1278,6 +1281,9 @@ proc transitionNoneToSym*(n: PNode) = transitionNodeKindCommon(nkSym) template transitionSymKindCommon*(k: TSymKind) = + # Under IC the symbol may still be an unloaded stub (`skStub`); materialise it + # first so its kind-specific fields (read below as `obj.*`) actually exist. + if s.state == Partial: loadSym(s) let obj {.inject.} = s[] s[] = TSym(kindImpl: k, itemId: obj.itemId, magicImpl: obj.magicImpl, typImpl: obj.typImpl, name: obj.name, infoImpl: obj.infoImpl, ownerFieldImpl: obj.ownerFieldImpl, flagsImpl: obj.flagsImpl, astImpl: obj.astImpl, diff --git a/compiler/ast2nif.nim b/compiler/ast2nif.nim index 841a982d73..861cca6526 100644 --- a/compiler/ast2nif.nim +++ b/compiler/ast2nif.nim @@ -162,16 +162,15 @@ type writtenPackages: HashSet[string] proc isLocalSym(sym: PSym): bool {.inline.} = - ## Params (and generic params, see `writeSymDef`) are emitted as *global* - ## (module-suffixed) names so their `sdef` gets an index entry and is - ## resolvable by index lookup even when referenced from a different index entry - ## than the one that physically contains the definition: generic params of a - ## forward declaration vs its implementation, and proc-type params shared - ## between an enclosing proc and a nested object's proc-type field. The - ## per-module `disamb` counter keeps `name.disamb.module` unique, so this is - ## clash-free. The kinds below are only ever used within the single index entry - ## of their owning routine, so they stay local (smaller index, and the codegen - ## name mangler only handles real module owners). + ## Every symbol is emitted as a *global* (module-suffixed) name so that its + ## `sdef` gets an index entry and is resolvable by index lookup even when + ## referenced from a different index entry than the one that physically + ## contains the definition. This matters for symbols shared across entries: + ## generic params of a forward declaration vs its implementation, and proc-type + ## params shared between an enclosing proc and a nested object's proc-type + ## field. The per-module `disamb` counter keeps `name.disamb.module` unique, so + ## globalising cannot cause clashes. This trades index size for correctness; + ## size/speed can be optimised later. false proc toNifSymName(w: var Writer; sym: PSym): string = @@ -828,6 +827,15 @@ proc setMainModule*(c: var DecodeContext; fileIdx: FileIndex) = ## own symbols by dependencies are not turned into duplicate stubs. c.mainModuleSuffix = modname(fileIdx.int, c.infos.config) +proc loadedState(c: DecodeContext): ItemState {.inline.} = + ## State to give a freshly loaded symbol or type. During the C code generation + ## phase (`nim nifc`) the backend (lambda lifting, the transformer, etc.) + ## legitimately mutates the loaded entities and never writes them back to a NIF, + ## so they must be mutable (`Complete`). During semantic checking (`nim m`) a + ## loaded entity belongs to an already-compiled dependency and must stay + ## `Sealed` so accidental mutations are caught. + if c.infos.config.cmd == cmdNifC: Complete else: Sealed + proc cursorFromIndexEntry(c: var DecodeContext; module: FileIndex; entry: NifIndexEntry; buf: var TokenBuf): Cursor = let s = addr c.mods[module].stream @@ -973,7 +981,7 @@ proc extractLocalSymsFromTree(c: var DecodeContext; n: var Cursor; thisModule: s # We're currently at the `(sd` position, need to skip to SymbolDef inc n # skip past `sd` tag to get to SymbolDef loadSymFromCursor(c, sym, n, thisModule, localSyms) - sym.state = Sealed # mark as fully loaded + sym.state = c.loadedState # mark as fully loaded # Continue processing - loadSymFromCursor already advanced n past the closing `)` continue inc depth @@ -998,7 +1006,7 @@ proc loadTypeStub(c: var DecodeContext; n: var Cursor; localSyms: var Table[stri let s = n.firstSon.symId result = createTypeStub(c, s) if result.state == Partial: - result.state = Sealed # Mark as loaded to prevent loadType from re-loading with empty localSyms + result.state = c.loadedState # Mark as loaded to prevent loadType from re-loading with empty localSyms loadTypeFromCursor(c, n, result, localSyms) else: skip n # Type already loaded, skip over the td block @@ -1122,7 +1130,7 @@ proc loadTypeFromCursor(c: var DecodeContext; n: var Cursor; t: PType; localSyms proc loadType*(c: var DecodeContext; t: PType) = if t.state != Partial: return - t.state = Sealed + t.state = c.loadedState var buf = createTokenBuf(30) let typeName = typeToNifSym(t, c.infos.config) var n = cursorFromIndexEntry(c, t.itemId.module.FileIndex, c.types[typeName][1], buf) @@ -1209,7 +1217,7 @@ proc loadSymFromCursor(c: var DecodeContext; s: PSym; n: var Cursor; thisModule: proc loadSym*(c: var DecodeContext; s: PSym) = if s.state != Partial: return - s.state = Sealed + s.state = c.loadedState var buf = createTokenBuf(30) let symsModule = s.itemId.module.FileIndex let nifname = globalName(s, c.infos.config) @@ -1302,7 +1310,7 @@ proc loadNode(c: var DecodeContext; n: var Cursor; thisModule: string; # Now fully load the symbol from the sdef inc n # skip `sd` tag loadSymFromCursor(c, sym, n, thisModule, localSyms) - sym.state = Sealed # mark as fully loaded + sym.state = c.loadedState # mark as fully loaded result = newSymNode(sym, info) else: sym = c.loadSymStub(name.symId, thisModule, localSyms) diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index c6e6057223..32e39e5f9f 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -3555,9 +3555,13 @@ proc expr(p: BProc, n: PNode, d: var TLoc) = of skProc, skConverter, skIterator, skFunc: #if sym.kind == skIterator: # echo renderTree(sym.getBody, {renderIds}) - if sfCompileTime in sym.flags: - localError(p.config, n.info, "request to generate code for .compileTime proc: " & - sym.name.s) + if isGenericRoutineStrict(sym) or sfCompileTime in sym.flags: + # Under IC a module's top-level routine definitions are serialized as bare + # symbol references that reappear in the loaded statement list. Uninstantiated + # generic routines (incl. those with type-class params like `tuple`) and + # `.compileTime` routines have no run-time code, so skip them here. (A real + # run-time *use* of a `.compileTime` proc is still rejected at the call site.) + return if delayedCodegen(p.module) and sym.typ.callConv != ccInline: fillProcLoc(p.module, n) genProcPrototype(p.module, sym) @@ -3629,6 +3633,11 @@ proc expr(p: BProc, n: PNode, d: var TLoc) = # echo renderTree(p.prc.ast, {renderIds}) internalError(p.config, n.info, "expr: param not init " & sym.name.s & "_" & $sym.id) putLocIntoDest(p, d, sym.loc) + of skTemplate, skMacro: + # Under IC a module's top-level template/macro definitions are serialized as + # bare symbol references (only their interface matters), so they reappear in + # the loaded statement list. They are compile-time only and produce no code. + discard else: internalError(p.config, n.info, "expr(" & $sym.kind & "); unknown symbol") of nkNilLit: if not isEmptyType(n.typ): diff --git a/compiler/ccgstmts.nim b/compiler/ccgstmts.nim index bc9c06fa1d..a30c1b1bf2 100644 --- a/compiler/ccgstmts.nim +++ b/compiler/ccgstmts.nim @@ -1986,4 +1986,9 @@ proc genStmts(p: BProc, t: PNode) = if isPush: pushInfoContext(p.config, t.info) expr(p, t, a) if isPush: popInfoContext(p.config) - internalAssert p.config, a.k in {locNone, locTemp, locLocalVar, locExpr} + # A bare `nkSym` statement is how IC serializes a definition that lives inside a + # top-level block (e.g. a nested `proc`/`var`): codegen emits the definition and + # leaves the symbol's own location in `a` (e.g. `locProc`), which is discarded + # here, so the value-sanity check below does not apply to it. + internalAssert p.config, t.kind == nkSym or + a.k in {locNone, locTemp, locLocalVar, locExpr} diff --git a/compiler/commands.nim b/compiler/commands.nim index a37cb348ae..64821a4743 100644 --- a/compiler/commands.nim +++ b/compiler/commands.nim @@ -923,6 +923,11 @@ proc processSwitch*(switch, arg: string, pass: TCmdLinePass, info: TLineInfo; else: localError(conf, info, errOnOrOffExpectedButXFound % arg) of "noimportdoc": processOnOffSwitchG(conf, {optNoImportdoc}, arg, pass, info) + of "ismainmodule": + # `nim m` (IC) only: marks the single module being checked as the program's + # real entry point so that `isMainModule` and `when isMainModule:` resolve + # correctly even though every module is compiled with `sfMainModule` set. + conf.isMainModule = switchOn(arg) of "import": expectArg(conf, switch, arg, pass, info) if pass in {passCmd2, passPP}: diff --git a/compiler/deps.nim b/compiler/deps.nim index 1d2378337a..d8b9b70a31 100644 --- a/compiler/deps.nim +++ b/compiler/deps.nim @@ -509,6 +509,12 @@ proc generateBuildFile(c: DepContext): string = let pair = node.files[0] b.addTree "do" b.addIdent "nim_m" + # The root module (node 0) is the program's real entry point; mark it so + # `isMainModule` resolves to true only for it (every module otherwise gets + # `sfMainModule` for NIF writing under `nim m`). + if i == 0: + b.withTree "args": + b.addStrLit "--isMainModule:on" # Input: all parsed files for this module b.withTree "input": b.addStrLit node.files[0].nimFile diff --git a/compiler/lambdalifting.nim b/compiler/lambdalifting.nim index a94db43211..4610f89a8d 100644 --- a/compiler/lambdalifting.nim +++ b/compiler/lambdalifting.nim @@ -164,9 +164,21 @@ proc getClosureIterResult*(g: ModuleGraph; iter: PSym; idgen: IdGenerator): PSym incl(result.flagsImpl, sfUsed) iter.ast.add newSymNode(result) +proc closureParams(routine: PSym): PNode = + ## The formal parameters node lambda lifting reads and extends. In a + ## from-source compilation `routine.ast[paramsPos]` and `routine.typ.n` are the + ## very same node (see the `typ.n.len` based position math below). Under IC the + ## loaded proc AST omits the parameters (they are kept only in `typ.n`), so + ## restore the shared node here. + result = routine.ast[paramsPos] + if (result == nil or result.kind == nkEmpty) and routine.typ != nil and + routine.typ.n != nil and routine.ast.len > paramsPos: + result = routine.typ.n + routine.ast[paramsPos] = result + proc addHiddenParam(routine: PSym, param: PSym) = assert param.kind == skParam - var params = routine.ast[paramsPos] + var params = closureParams(routine) # -1 is correct here as param.position is 0 based but we have at position 0 # some nkEffect node: param.position = routine.typ.n.len-1 @@ -177,7 +189,8 @@ proc addHiddenParam(routine: PSym, param: PSym) = proc getEnvParam*(routine: PSym): PSym = if routine.ast.isNil: return nil - let params = routine.ast[paramsPos] + let params = closureParams(routine) + if params == nil or params.len == 0: return nil let hidden = lastSon(params) if hidden.kind == nkSym and hidden.sym.kind == skParam and hidden.sym.name.s == paramName: result = hidden.sym diff --git a/compiler/options.nim b/compiler/options.nim index 7a28b1dc6f..b9715b7803 100644 --- a/compiler/options.nim +++ b/compiler/options.nim @@ -426,6 +426,12 @@ type lastMsgWasDot*: set[StdOrrKind] # the last compiler message was a single '.' projectMainIdx*: FileIndex # the canonical path id of the main module projectMainIdx2*: FileIndex # consider merging with projectMainIdx + isMainModule*: bool # `nim m`/IC only: whether the single module being + # semantically checked is the program's real entry point. + # Under IC every module is compiled via `nim m` (which sets + # `sfMainModule` so the module writes its own NIF), so + # `sfMainModule` can no longer answer `isMainModule`. The IC + # build file passes `--isMainModule:on` for the root module. command*: string # the main command (e.g. cc, check, scan, etc) commandArgs*: seq[string] # any arguments after the main command commandLine*: string diff --git a/compiler/semcall.nim b/compiler/semcall.nim index 48d29610e5..f0a4a1385b 100644 --- a/compiler/semcall.nim +++ b/compiler/semcall.nim @@ -90,8 +90,14 @@ proc addTypeBoundSymbols(graph: ModuleGraph, arg: PType, name: PIdent, # argument must be typed first, meaning arguments always # matching `untyped` are ignored let t = nominalRoot(arg) - if t != nil and t.owner.kind == skModule: - # search module for routines attachable to `t` + if t != nil and t.owner.kind == skModule and + t.owner.position >= 0 and t.owner.position < graph.ifaces.len: + # search module for routines attachable to `t`. + # Under IC the nominal type may have been loaded from a NIF file, in which + # case its owner module is a stub whose `position` (a NIF-suffix file index) + # has no `ifaces` slot; such type-bound ops are reachable through normal + # imports instead, so skip the direct module scan to avoid an out-of-range + # access. let module = t.owner var iter = default(ModuleIter) var s = initModuleIter(iter, graph, module, name) diff --git a/compiler/semfold.nim b/compiler/semfold.nim index 1a3f40a47a..49526a948d 100644 --- a/compiler/semfold.nim +++ b/compiler/semfold.nim @@ -613,7 +613,13 @@ proc getConstExpr(m: PSym, n: PNode; idgen: IdGenerator; g: ModuleGraph): PNode result = newIntNodeT(toInt128(s.position), n, idgen, g) of skConst: case s.magic - of mIsMainModule: result = newIntNodeT(toInt128(ord(sfMainModule in m.flags)), n, idgen, g) + of mIsMainModule: + # Under `nim m` (IC) `sfMainModule` is set on every module that is being + # compiled (so it writes its own NIF), so it cannot answer `isMainModule`; + # the IC build file marks the real entry point with `--isMainModule:on`. + let isMain = if g.config.cmd == cmdM: g.config.isMainModule + else: sfMainModule in m.flags + result = newIntNodeT(toInt128(ord(isMain)), n, idgen, g) of mCompileDate: result = newStrNodeT(getDateStr(), n, g) of mCompileTime: result = newStrNodeT(getClockStr(), n, g) of mCpuEndian: result = newIntNodeT(toInt128(ord(CPU[g.config.target.targetCPU].endian)), n, idgen, g)