From 89e8a91db45640b2d1f23f9ad0f8856bdaf3784a Mon Sep 17 00:00:00 2001 From: Araq Date: Wed, 10 Jun 2026 12:04:42 +0200 Subject: [PATCH] IC: progress --- compiler/ast.nim | 24 ++++++ compiler/ast2nif.nim | 120 ++++++++++++++++++++++------ compiler/ccgcalls.nim | 10 +++ compiler/cgen.nim | 6 +- compiler/deps.nim | 72 +++++++++++++++-- compiler/ic/replayer.nim | 34 ++++++++ compiler/liftdestructors.nim | 5 ++ compiler/mangleutils.nim | 30 ++++--- compiler/modulegraphs.nim | 147 ++++++++++++++++++++++++++--------- compiler/msgs.nim | 3 + compiler/nifbackend.nim | 7 +- compiler/nim.nim | 9 ++- compiler/semcall.nim | 9 +++ compiler/semfold.nim | 5 ++ compiler/seminst.nim | 10 ++- compiler/semtypes.nim | 18 +++-- compiler/sighashes.nim | 61 +++++++++++++++ compiler/typekeys.nim | 38 +++++++-- compiler/vm.nim | 2 +- compiler/vmgen.nim | 2 +- 20 files changed, 517 insertions(+), 95 deletions(-) diff --git a/compiler/ast.nim b/compiler/ast.nim index d9cfe11300..a4d9aa1d9c 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -517,6 +517,25 @@ proc idGeneratorFromModule*(m: PSym): IdGenerator = result = IdGenerator(module: m.itemId.module, symId: m.itemId.item, typeId: 0, disambTable: initCountTable[PIdent]()) result.disambTable.inc m.name +const BackendIdOffset* = 10_000_000'i32 + ## Base for symbol/type ids minted by `idGeneratorForBackend`; ids at or + ## above this offset identify backend-minted (IC codegen) entities. + +proc idGeneratorForBackend*(m: PSym): IdGenerator = + ## Like `idGeneratorFromModule`, but for IC codegen (`nim nifc`): symbols and + ## types minted fresh during codegen (transf labels/temps, lifted hooks, type + ## copies) must not collide with the itemIds the NIF loader synthesizes for + ## lazily-loaded symbols/types of the same module — those come from a + ## per-module load-order counter that keeps running while codegen mints its + ## own ids. A collision corrupts itemId-keyed tables, e.g. `transf`'s inline + ## iterator mapping then substitutes a random loaded sym (a call's callee) + ## with a `:tmp` block label. Start far above any realistic loaded count so + ## the two id spaces stay disjoint. + assert m.kind == skModule + result = IdGenerator(module: m.itemId.module, symId: m.itemId.item + BackendIdOffset, + typeId: BackendIdOffset, disambTable: initCountTable[PIdent]()) + result.disambTable.inc m.name + proc idGeneratorForPackage*(nextIdWillBe: int32): IdGenerator = result = IdGenerator(module: PackageModuleId, symId: nextIdWillBe - 1'i32, typeId: 0, disambTable: initCountTable[PIdent]()) @@ -1057,6 +1076,11 @@ proc newType*(kind: TTypeKind; idgen: IdGenerator; owner: PSym; son: sink PType if result.itemId.module == 55 and result.itemId.item == 2: echo "KNID ", kind writeStackTrace() + when defined(icDbg): + if kind == tyOpenArray: + echo "NEWTYPE openArray id=", id.module, ".", id.item, + " owner=", (if owner != nil: owner.name.s else: "nil") + echo getStackTrace() proc setSons*(dest: PType; sons: sink seq[PType]) {.inline.} = assert dest.kind != tyProc or sons.len <= 1 diff --git a/compiler/ast2nif.nim b/compiler/ast2nif.nim index 2ac6963a02..4af394cc57 100644 --- a/compiler/ast2nif.nim +++ b/compiler/ast2nif.nim @@ -10,7 +10,7 @@ ## AST to NIF bridge. import std / [assertions, tables, sets] -from std / strutils import startsWith +from std / strutils import startsWith, endsWith, contains from std / os import fileExists import astdef, idents, msgs, options import lineinfos as astli @@ -144,7 +144,7 @@ const symDefTagName = "sd" typeDefTagName = "td" -let +var sdefTag = registerTag(symDefTagName) tdefTag = registerTag(typeDefTagName) hiddenTypeTag = registerTag(hiddenTypeTagName) @@ -173,10 +173,24 @@ proc isLocalSym(sym: PSym): bool {.inline.} = ## size/speed can be optimised later. false +const + PkgMarker = "`pkg" + ## Appended to the ident of `skPackage` symbols in NIF names. A package sym + ## has no module of its own: it is written once into every module NIF that + ## references it, named with that module's suffix and its own (independent) + ## disamb counter. Without the marker it can collide with a module-level + ## symbol of the same name and disamb — e.g. extccomp's `compiler` template + ## vs the `compiler` package — and the module sym's owner then resolves to + ## the wrong symbol on load, producing a cyclic owner chain that hangs every + ## owner-walk (sighashes.hashSym etc.). Backtick cannot appear in a Nim + ## identifier, mirroring the "`t" namespace used by `typeToNifSym`. + proc toNifSymName(w: var Writer; sym: PSym): string = ## Generate NIF name for a symbol: local names are `ident.disamb`, ## global names are `ident.disamb.moduleSuffix` result = sym.name.s + if sym.kindImpl == skPackage: + result.add PkgMarker result.add '.' result.addInt sym.disamb if not isLocalSym(sym) and sym.itemId notin w.locals: @@ -424,11 +438,24 @@ proc writeSym(w: var Writer; dest: var TokenBuf; sym: PSym) = proc writeSymNode(w: var Writer; dest: var TokenBuf; n: PNode; sym: PSym) = if sym == nil: dest.addDotToken() - elif shouldWriteSymDef(w, sym): + return + # Compare lazy-aware, not the raw field: a sym node loaded from a NIF carries + # `typField == nil` plus `nfLazyType`, meaning "my type is the symbol's + # type". Comparing `typField` directly would re-serialize such a node as + # `(ht . sym)` — an explicitly nil node type — and the next loader gets a + # nil-typed node *without* the lazy fallback (semfold & friends crash on + # `n.typ == nil`). Only a genuinely nil node type keeps the explicit form. + # (ast.nim's `typ` accessor is not importable here; replicate its fallback. + # For a still-Partial sym `typImpl` is nil, which also compares equal below + # and yields the plain SymUse form — exactly the lazy round-trip we want.) + var nodeTyp = n.typField + if nodeTyp == nil and nfLazyType in n.flags: + nodeTyp = sym.typImpl + if shouldWriteSymDef(w, sym): sym.state = Sealed - if n.typField != n.sym.typImpl: + if nodeTyp != n.sym.typImpl: dest.buildTree hiddenTypeTag, trLineInfo(w, n.info): - writeType(w, dest, n.typField) + writeType(w, dest, nodeTyp) writeSymDef(w, dest, sym) else: writeSymDef(w, dest, sym) @@ -436,9 +463,9 @@ proc writeSymNode(w: var Writer; dest: var TokenBuf; n: PNode; sym: PSym) = # NIF has direct support for symbol references so we don't need to use a tag here, # unlike what we do for types! let info = trLineInfo(w, n.info) - if n.typField != n.sym.typImpl: + if nodeTyp != n.sym.typImpl: dest.buildTree hiddenTypeTag, info: - writeType(w, dest, n.typField) + writeType(w, dest, nodeTyp) dest.addSymUse pool.syms.getOrIncl(w.toNifSymName(sym)), info else: dest.addSymUse pool.syms.getOrIncl(w.toNifSymName(sym)), info @@ -517,21 +544,49 @@ proc trExport(w: var Writer; n: PNode) = w.deps.addSymUse pool.syms.getOrIncl(w.toNifSymName(s)), NoLineInfo w.deps.addParRi -let replayTag = registerTag("replay") -let repConverterTag = registerTag("repconverter") -let repDestroyTag = registerTag("repdestroy") -let repWasMovedTag = registerTag("repwasmoved") -let repCopyTag = registerTag("repcopy") -let repSinkTag = registerTag("repsink") -let repDupTag = registerTag("repdup") -let repTraceTag = registerTag("reptrace") -let repDeepCopyTag = registerTag("repdeepcopy") -let repEnumToStrTag = registerTag("repenumtostr") -let repMethodTag = registerTag("repmethod") -#let repClassTag = registerTag("repclass") -let includeTag = registerTag("include") -let importTag = registerTag("import") -let implTag = registerTag("implementation") +var replayTag = registerTag("replay") +var repConverterTag = registerTag("repconverter") +var repDestroyTag = registerTag("repdestroy") +var repWasMovedTag = registerTag("repwasmoved") +var repCopyTag = registerTag("repcopy") +var repSinkTag = registerTag("repsink") +var repDupTag = registerTag("repdup") +var repTraceTag = registerTag("reptrace") +var repDeepCopyTag = registerTag("repdeepcopy") +var repEnumToStrTag = registerTag("repenumtostr") +var repMethodTag = registerTag("repmethod") +#var repClassTag = registerTag("repclass") +var includeTag = registerTag("include") +var importTag = registerTag("import") +var implTag = registerTag("implementation") + +proc registerNifAstTags*() = + ## (Re)registers ast2nif's NIF tags explicitly. The top-level `registerTag` + ## initializers above depend on `nifstreams.pool` having been initialized + ## FIRST (`pool = createLiterals(TagData)` in nifstreams' module init) — an + ## inter-module init-order requirement. The IC-built compiler currently emits + ## module init calls in a different order, so the initializers registered + ## into a pool that was subsequently replaced: the tag ids then denoted + ## builtin tags (`replay` came out as `deref`, `repdestroy` as `pat`, ...) + ## and every written NIF was silently corrupted. Called from `nim.nim` + ## before any command runs; idempotent (`getOrIncl` by name). + sdefTag = registerTag(symDefTagName) + tdefTag = registerTag(typeDefTagName) + hiddenTypeTag = registerTag(hiddenTypeTagName) + replayTag = registerTag("replay") + repConverterTag = registerTag("repconverter") + repDestroyTag = registerTag("repdestroy") + repWasMovedTag = registerTag("repwasmoved") + repCopyTag = registerTag("repcopy") + repSinkTag = registerTag("repsink") + repDupTag = registerTag("repdup") + repTraceTag = registerTag("reptrace") + repDeepCopyTag = registerTag("repdeepcopy") + repEnumToStrTag = registerTag("repenumtostr") + repMethodTag = registerTag("repmethod") + includeTag = registerTag("include") + importTag = registerTag("import") + implTag = registerTag("implementation") proc writeNode(w: var Writer; dest: var TokenBuf; n: PNode; forAst = false) = if n == nil: @@ -872,6 +927,9 @@ proc setMainModule*(c: var DecodeContext; fileIdx: FileIndex) = ## own symbols by dependencies are not turned into duplicate stubs. c.mainModuleSuffix = modname(fileIdx.int, c.infos.config) +proc getMainModuleSuffix*(c: DecodeContext): string {.inline.} = + c.mainModuleSuffix + proc loadedState(c: DecodeContext): ItemState {.inline.} = ## State to give a freshly loaded symbol or type. During the C code generation ## phase (`nim nifc`) the backend (lambda lifting, the transformer, etc.) @@ -1222,6 +1280,11 @@ proc loadSymFromCursor(c: var DecodeContext; s: PSym; n: var Cursor; thisModule: s.kindImpl = parse(TSymKind, pool.tags[n.tagId]) inc n + if s.kindImpl == skPackage and s.name.s.endsWith(PkgMarker): + # Stubs keep the marked NIF ident (see PkgMarker) so that `globalName` + # reconstructs the index key; the in-memory sym gets the real name back. + s.name = c.cache.getIdent(s.name.s[0 ..< s.name.s.len - PkgMarker.len]) + case s.kindImpl of skLet, skVar, skField, skForVar: s.guardImpl = loadSymStub(c, n, thisModule, localSyms) @@ -1675,6 +1738,7 @@ proc processTopLevel(c: var DecodeContext; s: var Stream; flags: set[LoadFlag]; #elif t.tagId == repClassTag: # t = loadLogOp(c, logOps, s, ClassEntry, attachedTrace, module) elif t.tagId == exportTag: + var lastGood = "" t = next(s) # skip (export if t.kind == DotToken: t = next(s) # skip dot @@ -1683,20 +1747,30 @@ proc processTopLevel(c: var DecodeContext; s: var Stream; flags: set[LoadFlag]; while true: if t.kind == Symbol: let symAsStr = pool.syms[t.symId] + lastGood = symAsStr # Skip symbols that are re-exported by this dependency but actually # belong to the module we are compiling fresh: loading them as stubs # would shadow/collide with the freshly compiled originals. if c.mainModuleSuffix.len == 0 or parseSymName(symAsStr).module != c.mainModuleSuffix: + # Resolving an exported symbol of this very module (`export` of a + # symbol that lives in a `when` branch of the same file) lazily + # loads it from the stream we are currently iterating, moving the + # cursor into the symbol's `(sd ...)` definition. Save/restore the + # position so the export-list parse continues where it left off. + let saved = offset(s.r) let sym = resolveSym(c, symAsStr, false) if sym != nil: strTableAdd(interf, sym) addReexportedEnumFields(c, sym, interf) + s.r.jumpTo(saved) t = next(s) elif t.kind == ParRi: break else: - raiseAssert "expected Symbol or ParRi but got " & $t.kind + raiseAssert "expected Symbol or ParRi but got " & $t.kind & + " (" & (if t.kind == ParLe: pool.tags[t.tagId] else: "") & + ") in export list of module " & suffix & ", last symbol: " & lastGood t = next(s) elif t.tagId == includeTag: t = skipTree(s) diff --git a/compiler/ccgcalls.nim b/compiler/ccgcalls.nim index b2521069d4..35d417fe94 100644 --- a/compiler/ccgcalls.nim +++ b/compiler/ccgcalls.nim @@ -909,6 +909,16 @@ proc isInactiveDestructorCall(p: BProc, e: PNode): bool = proc genAsgnCall(p: BProc, le, ri: PNode, d: var TLoc) = if p.withinBlockLeaveActions > 0 and isInactiveDestructorCall(p, ri): return + when defined(icDbgHash): + if ri[0].typ == nil: + echo "NILCALLEE kind=", ri[0].kind, + " sym=", (if ri[0].kind == nkSym: ri[0].sym.name.s else: "-"), + " symKind=", (if ri[0].kind == nkSym: $ri[0].sym.kind else: "-"), + " flags=", (if ri[0].kind == nkSym: $ri[0].sym.flags else: "-"), + " lazy=", nfLazyType in ri[0].flags, + " inProc=", (if p.prc != nil: p.prc.name.s else: "NIL"), + " module=", p.module.module.name.s + raiseAssert "nil callee type, see NILCALLEE above" if ri[0].typ.skipTypes({tyGenericInst, tyAlias, tySink, tyOwned}).callConv == ccClosure: genClosureCall(p, le, ri, d) elif ri[0].kind == nkSym and sfInfixCall in ri[0].sym.flags: diff --git a/compiler/cgen.nim b/compiler/cgen.nim index 8d3b486ca3..1368bae833 100644 --- a/compiler/cgen.nim +++ b/compiler/cgen.nim @@ -51,7 +51,7 @@ when not declared(dynlib.libCandidates): else: dest.add(s) -when options.hasTinyCBackend: +when defined(tinyc): # == hasTinyCBackend; spelled out for the IC dep scanner import tccgen proc hcrOn(m: BModule): bool = m.config.hcrOn @@ -71,11 +71,11 @@ proc findPendingModule(m: BModule, s: PSym): BModule = var ms = getModule(s) registerModule m.g.graph, ms if ms.position >= m.g.mods.len: - result = newModule(m.g, ms, m.config, idGeneratorFromModule(ms)) + result = newModule(m.g, ms, m.config, idGeneratorForBackend(ms)) else: result = m.g.mods[ms.position] if result == nil: - result = newModule(m.g, ms, m.config, idGeneratorFromModule(ms)) + result = newModule(m.g, ms, m.config, idGeneratorForBackend(ms)) else: var ms = getModule(s) result = m.g.mods[ms.position] diff --git a/compiler/deps.nim b/compiler/deps.nim index 0c7c2bb517..d8e6632d10 100644 --- a/compiler/deps.nim +++ b/compiler/deps.nim @@ -10,8 +10,8 @@ ## Generate a .build.nif file for nifmake from a Nim project. ## This enables incremental and parallel compilation using the `m` switch. -import std / [os, tables, sets, times, osproc, algorithm] -import options, msgs, lineinfos, pathutils +import std / [os, tables, sets, times, osproc, algorithm, strtabs] +import options, msgs, lineinfos, pathutils, condsyms import "../dist/nimony/src/lib" / [nifstreams, bitabs, nifreader, nifbuilder] import "../dist/nimony/src/gear2" / modnames @@ -81,6 +81,14 @@ proc runNifler(c: DepContext; nimFile: string): bool = proc resolveImport(c: DepContext; origin, toResolve: string): string = ## Resolve an import path using the compiler's normal module lookup rules. + var toResolve = toResolve + if '$' in toResolve: + # string-literal import paths support `$nim`-style substitutions + # (see modulepaths.getModuleName) + try: + toResolve = pathSubs(c.config, toResolve, origin.splitFile().dir) + except ValueError: + discard result = findModule(c.config, toResolve, origin).string proc resolveInclude(c: DepContext; origin, toResolve: string): string = @@ -342,6 +350,19 @@ proc parseImportPath(s: var Stream; t: var PackedToken): seq[string] = if prefix.len > 0: result.add prefix & "/" & r else: result.add r if t.kind == ParRi: t = next(s) # skip closing ')' + elif tag == "prefix": + # Relative import paths: `import ../dist/checksums/...` parses as + # `(prefix ../ dist)` — a path-prefix operator (`../`, `./`) applied to + # the first path component. Concatenate operator and operand verbatim; + # `findModule` resolves the relative path against the importing module. + t = next(s) # skip 'prefix' tag + var op = "" + if t.kind == Ident: + op = pool.strings[t.litId] + t = next(s) + for r in parseImportPath(s, t): + result.add op & r + if t.kind == ParRi: t = next(s) # skip closing ')' elif tag == "bracket": t = next(s) # skip 'bracket' tag while t.kind != ParRi and t.kind != EofToken: @@ -511,6 +532,24 @@ proc generateBuildFile(c: DepContext): string = b.addHeader("nim ic", "nifmake") b.addTree "stmts" + # Forward the project's configuration to the per-module child processes. + # Non-incremental compilation semchecks every module in one process with one + # define set (the project's config files apply to the stdlib too); the IC + # children compile with the *module* as their project file and would miss + # e.g. compiler/nim.cfg's `define:nimPreviewSlimSystem`, so their `when` + # bodies — and thus their import sets and NIF contents — would silently + # diverge from the dependency graph computed here. Forward every define that + # is not part of the compiler's built-in baseline, plus the threads switch. + var forwardedArgs: seq[string] = @[] + block: + let baseline = newStringTable(modeStyleInsensitive) + initDefines(baseline) + for k, v in pairs(c.config.symbols): + if not baseline.hasKey(k) or baseline[k] != v: + forwardedArgs.add "--define:" & k & (if v == "true": "" else: "=" & v) + sort forwardedArgs + forwardedArgs.add "--threads:" & (if optThreads in c.config.globalOptions: "on" else: "off") + # Define nifler command b.addTree "cmd" b.addSymbolDef "nifler" @@ -532,6 +571,8 @@ proc generateBuildFile(c: DepContext): string = # Add search paths for p in c.config.searchPaths: b.addStrLit "--path:" & p.string + for a in forwardedArgs: + b.addStrLit a b.addTree "args" b.endTree() b.withTree "input": @@ -547,6 +588,8 @@ proc generateBuildFile(c: DepContext): string = # Add search paths for p in c.config.searchPaths: b.addStrLit "--path:" & p.string + for a in forwardedArgs: + b.addStrLit a b.addTree "input" b.addIntLit 0 b.endTree() @@ -691,10 +734,27 @@ proc commandIc*(conf: ConfigRef) = c.processedModules[rootPair.modname] = 0 # model the system.nim dependency: - let sysNode = Node(files: @[toPair(c, (conf.libpath / RelativeFile"system.nim").string)], id: 1) - c.nodes.add sysNode - c.systemNodeId = sysNode.id - rootNode.deps.add sysNode.id + let sysPair = toPair(c, (conf.libpath / RelativeFile"system.nim").string) + if sysPair.modname != rootPair.modname: + let sysNode = Node(files: @[sysPair], id: 1) + c.nodes.add sysNode + c.systemNodeId = sysNode.id + rootNode.deps.add sysNode.id + c.processedModules[sysPair.modname] = sysNode.id + # Traverse system.nim's own dependency tree. `nim m system.nim` compiles + # system's entire import closure from source in one process (none of it + # can be precompiled: every module implicitly imports system) and writes + # a NIF for each closure member. Every member also gets the implicit + # dependency edge on system, so Tarjan folds the whole closure into + # system's strongly-connected component and the build file contains a + # single rule producing all of those NIFs. Without this traversal each + # closure member that is also imported by an ordinary module got its own + # `nim m` rule whose output silently OVERWROTE the system-written NIF + # with freshly numbered type ids, leaving dangling type references (the + # ids are baked into sysma2dyk.nif and into every module semchecked + # against the first version) — "symbol has no offset" failures that + # depended on nifmake's scheduling. + traverseDeps(c, sysPair, sysNode) # Process dependencies traverseDeps(c, rootPair, rootNode) diff --git a/compiler/ic/replayer.nim b/compiler/ic/replayer.nim index 6152ffb48d..9eea4ee499 100644 --- a/compiler/ic/replayer.nim +++ b/compiler/ic/replayer.nim @@ -86,3 +86,37 @@ proc replayStateChanges*(module: PSym; g: ModuleGraph) = g.cacheSeqs[destKey].add val else: internalAssert g.config, false + +proc replayBackendActions*(g: ModuleGraph; module: PSym; list: PNode) = + ## Applies the backend-relevant replay actions (C compile/link directives) + ## found in a NIF-loaded module's top-level statement list. The `nifc` + ## backend loads modules without going through sem's `replayStateChanges`, + ## so e.g. math's `{.passL: "-lm".}` was lost and the final link failed + ## with undefined references. VM cache actions are deliberately NOT + ## replayed here — codegen does not run macros. + if list == nil: return + for n in list: + if n.kind == nkReplayAction and n.len >= 2 and + n[0].kind == nkStrLit and n[1].kind == nkStrLit: + case n[0].strVal + of "compile": + if n.len == 4 and n[2].kind == nkStrLit: + let cname = AbsoluteFile n[1].strVal + var cf = Cfile(nimname: splitFile(cname).name, cname: cname, + obj: AbsoluteFile n[2].strVal, + flags: {CfileFlag.External}, + customArgs: n[3].strVal) + extccomp.addExternalFileToCompile(g.config, cf) + of "link": + extccomp.addExternalFileToLink(g.config, AbsoluteFile n[1].strVal) + of "passl": + extccomp.addLinkOption(g.config, n[1].strVal) + of "passc": + extccomp.addCompileOption(g.config, n[1].strVal) + of "localpassc": + extccomp.addLocalCompileOption(g.config, n[1].strVal, + toFullPathConsiderDirty(g.config, module.info.fileIndex)) + of "cppdefine": + options.cppDefine(g.config, n[1].strVal) + else: + discard diff --git a/compiler/liftdestructors.nim b/compiler/liftdestructors.nim index 9c37038fb5..bba0735a88 100644 --- a/compiler/liftdestructors.nim +++ b/compiler/liftdestructors.nim @@ -711,6 +711,11 @@ proc useSeqOrStrOp(c: var TLiftCtx; t: PType; body, x, y: PNode) = doAssert t.asink != nil body.add newHookCall(c, t.asink, x, y) of attachedDestructor: + when defined(icDbg): + if t.destructor == nil: + echo "MISSING destructor: ", typeToString(t), " kind=", t.kind, + " itemId=", t.itemId, " uniqueId=", t.uniqueId, " state=", t.state, + " owner=", (if t.owner != nil: t.owner.name.s else: "nil") doAssert t.destructor != nil body.add destructorCall(c, t.destructor, x) of attachedTrace: diff --git a/compiler/mangleutils.nim b/compiler/mangleutils.nim index edb11c1390..52778fd2f8 100644 --- a/compiler/mangleutils.nim +++ b/compiler/mangleutils.nim @@ -61,12 +61,24 @@ proc mangleProcNameExt*(graph: ModuleGraph, s: PSym): string = # collision-free: the mangled base name plus `disamb` already disambiguate. if s.itemId.module >= 0 and s.itemId.module < graph.ifaces.len: result.add graph.ifaces[s.itemId.module].uniqueName - result.add "_u" - # Use `disamb` rather than `itemId.item`: under incremental compilation a - # symbol loaded from a NIF file gets a fresh, load-order-dependent `itemId.item` - # (from the per-module symbol counter), which is neither stable across the - # processes that compile vs. use a module nor guaranteed distinct from another - # loaded symbol's. `disamb` is assigned deterministically per (module, name) - # and, together with the already-prepended mangled name, yields a unique and - # stable C identifier. - result.addInt s.disamb + if s.itemId.item >= BackendIdOffset: + # A symbol minted during IC codegen (`idGeneratorForBackend`): its idgen + # starts with an EMPTY per-name disamb table, so its `disamb` restarts at 0 + # and collides with same-named sem-time symbols loaded from NIFs (two + # `=destroy` hooks both mangling to `_u2` → "conflicting types for ..." in + # the generated C). These symbols never cross a process boundary (nifc + # lifts, emits and compiles them in one run), so the per-module-unique + # item id is a safe and deterministic discriminator; the `_c` marker keeps + # the namespace disjoint from `_u`. + result.add "_c" + result.addInt s.itemId.item - BackendIdOffset + else: + result.add "_u" + # Use `disamb` rather than `itemId.item`: under incremental compilation a + # symbol loaded from a NIF file gets a fresh, load-order-dependent `itemId.item` + # (from the per-module symbol counter), which is neither stable across the + # processes that compile vs. use a module nor guaranteed distinct from another + # loaded symbol's. `disamb` is assigned deterministically per (module, name) + # and, together with the already-prepended mangled name, yields a unique and + # stable C identifier. + result.addInt s.disamb diff --git a/compiler/modulegraphs.nim b/compiler/modulegraphs.nim index fe01fa6b2d..e869bad1b1 100644 --- a/compiler/modulegraphs.nim +++ b/compiler/modulegraphs.nim @@ -126,6 +126,7 @@ type procGlobals*: seq[PNode] nifReplayActions*: Table[int32, seq[PNode]] # module position -> replay actions for NIF cachedMods: IntSet + hookClosure: IntSet # modules whose serialized hooks were already registered TPassContext* = object of RootObj # the pass's context idgen*: IdGenerator @@ -280,34 +281,58 @@ proc getAttachedOp*(g: ModuleGraph; t: PType; op: TTypeAttachedOp): PSym = let key = typeKey(t, g.config, loadTypeCallback, loadSymCallback) result = g.loadedOps[op].getOrDefault(key) #echo "fallback ", key, " ", op, " ", result + when defined(icDbgHash): + if result == nil and op == attachedDestructor: + echo "HOOK MISS key=", key, " table.len=", g.loadedOps[op].len, + " kind=", t.kind, " sym=", (if t.sym != nil: t.sym.name.s else: "NIL") + if key.len > 10: + let probe = key[3 ..< min(key.len, 18)] + for k in g.loadedOps[op].keys: + if probe in k: echo " candidate: ", k else: result = nil proc setAttachedOp*(g: ModuleGraph; module: int; t: PType; op: TTypeAttachedOp; value: PSym) = ## we also need to record this to the packed module. - if not g.attachedOps[op].contains(t.itemId): - let key = typeKey(t, g.config, loadTypeCallback, loadSymCallback) - # Use key-based deduplication for opsLog because different type objects - # (e.g. canon vs orig) can have different itemIds but same structural key - if key notin g.loadedOps[op]: - # For a *nominal*, non-instantiated type the hook belongs to the module - # that defines the type (so it is emitted once there). But for a generic - # instance or a structural type (ref/ptr/seq/... over an instance) there is - # no honest definition site: the generic's home module is upstream of the - # type arguments and structurally blind to the instance, so it can never - # realize the hook. Such hooks can only be produced by the *instantiating* - # module — which is exactly the one running now (`module`). Stamping them - # with the type's def module produced a `LogEntry` that no module ever - # writes (the def module never instantiates it; the instantiating module's - # writer skips it because `op.module != thisModule`), so the hook was lost - # and codegen failed with "'=destroy' operator not found". Duplicate - # registrations across instantiating modules are reconciled deterministically - # at load time (see the HookEntry replay in `replayStateChanges`). - let nominal = t.sym != nil and t.kind in {tyObject, tyEnum, tyDistinct} and - tfFromGeneric notin t.flags - let ownerModule = if nominal: t.sym.itemId.module.int else: module - g.opsLog.add LogEntry(kind: HookEntry, op: op, module: ownerModule, key: key, sym: value) - g.loadedOps[op][key] = value + # Key-based deduplication for opsLog: different type objects (e.g. canon vs + # orig) can have different itemIds but the same structural key. + let key = typeKey(t, g.config, loadTypeCallback, loadSymCallback) + let existing = g.loadedOps[op].getOrDefault(key) + if existing == nil: + # Stamp the entry with the module whose compilation produced the hook + # (`module`), NOT the type's def module: each `nim m` is a separate + # process, so a hook lifted while compiling a *downstream* module simply + # does not exist in the def module's process — stamping it with the def + # module produced a `LogEntry` that no module ever writes (the def + # module's writer ran in another process that never lifted it; this + # module's writer skips it because `op.module != thisModule`) and codegen + # failed with "'=destroy' operator not found" (e.g. astdef's `TStrTable`, + # whose destroy is first needed by modulegraphs). This holds for nominal + # types as much as for generic/structural instances. Duplicate + # registrations across lifting modules are reconciled deterministically + # at load time (see the HookEntry replay in `replayStateChanges`). + g.opsLog.add LogEntry(kind: HookEntry, op: op, module: module, key: key, sym: value) + g.loadedOps[op][key] = value + elif existing != value: + # Re-registration replacing an earlier sym for the same key. This happens + # legitimately: `createTypeBoundOps` first registers empty `symPrototype` + # placeholders, then `produceSym` replaces them — in particular + # `produceSymDistinctType` replaces a distinct type's placeholder with the + # BASE type's hook (a `distinct string` uses string's `=sink`). The log + # must follow the replacement, otherwise the NIF ships the dead, + # empty-bodied prototype and codegen in another process calls a no-op + # `=sink`/`=copy`, silently losing the value (e.g. `conf.projectPath` + # ended up empty: "cannot open '/'"). + g.loadedOps[op][key] = value + var updated = false + for e in mitems(g.opsLog): + if e.kind == HookEntry and e.op == op and e.key == key: + e.sym = value + e.module = module + updated = true + break + if not updated: + g.opsLog.add LogEntry(kind: HookEntry, op: op, module: module, key: key, sym: value) g.attachedOps[op][t.itemId] = value proc setAttachedOp*(g: ModuleGraph; module: int; typeId: ItemId; op: TTypeAttachedOp; value: PSym) = @@ -356,8 +381,10 @@ proc getToStringProc*(g: ModuleGraph; t: PType): PSym = proc setToStringProc*(g: ModuleGraph; t: PType; value: PSym) = g.enumToStringProcs[t.itemId] = value let key = typeKey(t, g.config, loadTypeCallback, loadSymCallback) - let ownerModule = if t.sym != nil: t.sym.itemId.module.int else: value.itemId.module.int - g.opsLog.add LogEntry(kind: EnumToStrEntry, module: ownerModule, key: key, sym: value) + # Stamp with the module that owns the generated proc, not the enum's def + # module: the def module's process may never have generated it (same + # "written by nobody" failure as hook entries, see setAttachedOp). + g.opsLog.add LogEntry(kind: EnumToStrEntry, module: value.itemId.module.int, key: key, sym: value) iterator methodsForGeneric*(g: ModuleGraph; t: PType): (int, PSym) = if g.methodsPerGenericType.contains(t.itemId): @@ -556,6 +583,7 @@ proc initModuleGraphFields(result: ModuleGraph) = result.emittedTypeInfo = initTable[string, FileIndex]() result.cachedFiles = newStringTable() result.cachedMods = initIntSet() + result.hookClosure = initIntSet() proc newModuleGraph*(cache: IdentCache; config: ConfigRef): ModuleGraph = result = ModuleGraph() @@ -671,6 +699,50 @@ proc getBody*(g: ModuleGraph; s: PSym): PNode {.inline.} = assert result != nil when not defined(nimKochBootstrap): + proc registerLoadedHooks(g: ModuleGraph; logOps: seq[LogEntry]) = + let mainSuffix = getMainModuleSuffix(ast.program) + for x in logOps: + # A dependency's NIF may carry hooks whose syms belong to the module we + # are compiling fresh (e.g. a stale NIF of that very module written by an + # earlier in-process compilation). Loading those would collide with the + # freshly semchecked hook declarations. + if mainSuffix.len > 0 and + cachedModuleSuffix(g.config, x.sym.itemId.module.FileIndex) == mainSuffix: + continue + case x.kind + of HookEntry: + # The same structural hook may be serialized by several instantiating + # modules (a generic/structural instance has no single def site, so each + # using module owns its copy). Pick one deterministic program-wide winner + # by the smaller owning-module name, so every lookup resolves to the same + # sym regardless of module load order. + let existing = g.loadedOps[x.op].getOrDefault(x.key) + if existing == nil or + cachedModuleSuffix(g.config, x.sym.itemId.module.FileIndex) < + cachedModuleSuffix(g.config, existing.itemId.module.FileIndex): + g.loadedOps[x.op][x.key] = x.sym + of EnumToStrEntry: + g.loadedEnumToStringProcs[x.key] = x.sym + else: + discard + + proc loadTransitiveHooks(g: ModuleGraph; deps: seq[ModuleSuffix]) = + ## Registers the serialized hooks (and enum-to-string procs) of every module + ## in the import closure of `deps`. Deliberately does NOT use + ## `moduleFromNifFile`: that would register the dep as a fully loaded module + ## and a later direct import of it would then skip `replayStateChanges`. + var stack = deps + var interf = initStrTable() + var interfHidden = initStrTable() + while stack.len > 0: + let suffix = stack.pop() + var isKnownFile = false + let fileIdx = g.config.registerNifSuffix(string suffix, isKnownFile) + if not g.hookClosure.containsOrIncl(fileIdx.int): + let precomp = loadNifModule(ast.program, suffix, interf, interfHidden, {}) + registerLoadedHooks(g, precomp.logOps) + for d in precomp.deps: stack.add d + proc moduleFromNifFile*(g: ModuleGraph; fileIdx: FileIndex; flags: set[LoadFlag] = {}): PrecompiledModule = ## Returns 'nil' if the module needs to be recompiled. @@ -699,31 +771,30 @@ when not defined(nimKochBootstrap): # Mark module as cached g.cachedMods.incl fileIdx.int + g.hookClosure.incl fileIdx.int # Register hooks from NIF index with the module graph + registerLoadedHooks(g, result.logOps) for x in result.logOps: case x.kind - of HookEntry: - # The same structural hook may be serialized by several instantiating - # modules (a generic/structural instance has no single def site, so each - # using module owns its copy). Pick one deterministic program-wide winner - # by the smaller owning-module name, so every lookup resolves to the same - # sym regardless of module load order. - let existing = g.loadedOps[x.op].getOrDefault(x.key) - if existing == nil or - cachedModuleSuffix(g.config, x.sym.itemId.module.FileIndex) < - cachedModuleSuffix(g.config, existing.itemId.module.FileIndex): - g.loadedOps[x.op][x.key] = x.sym of ConverterEntry: g.ifaces[fileIdx.int].converters.add x.sym of MethodEntry: discard "todo" - of EnumToStrEntry: - g.loadedEnumToStringProcs[x.key] = x.sym of GenericInstEntry: raiseAssert "GenericInstEntry should not be in the NIF index" + of HookEntry, EnumToStrEntry: + discard "already done by registerLoadedHooks" # Register methods per type from NIF index discard "todo" + # `nim m` loads only its *direct* imports through this proc, but a hook for + # a structural type (e.g. `=destroy` for `seq[PNode]`) lives in the NIF of + # whichever module first lifted it — possibly a dependency of a dependency + # that the current module never imports directly. Walk the whole import + # closure so every serialized hook is visible. (Codegen, `nim nifc`, already + # walks the closure in nifbackend.loadModuleDependencies.) + if g.config.cmd == cmdM: + loadTransitiveHooks(g, result.deps) proc configComplete*(g: ModuleGraph) = #rememberStartupConfig(g.startupPackedConfig, g.config) diff --git a/compiler/msgs.nim b/compiler/msgs.nim index 6c20310205..c092ac8841 100644 --- a/compiler/msgs.nim +++ b/compiler/msgs.nim @@ -511,6 +511,9 @@ proc sourceLine*(conf: ConfigRef; i: TLineInfo): string = ## 1-based index (matches editor line numbers); 1st line is for i.line = 1 ## last valid line is `numLines` inclusive if i.fileIndex.int32 < 0: return "" + # line 0 means "unknown": nodes synthesized from an IC-loaded template or + # macro body carry no source position. + if i.line.int < 1: return "" let num = numLines(conf, i.fileIndex) # can happen if the error points to EOF: if i.line.int > num: return "" diff --git a/compiler/nifbackend.nim b/compiler/nifbackend.nim index 181a3dc040..ecead9edc3 100644 --- a/compiler/nifbackend.nim +++ b/compiler/nifbackend.nim @@ -24,6 +24,7 @@ when defined(nimPreviewSlimSystem): import ast, options, lineinfos, modulegraphs, cgendata, cgen, pathutils, extccomp, msgs, modulepaths, idents, types, ast2nif +import ic / replayer proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex): seq[PrecompiledModule] = ## Traverse the module dependency graph using a stack. @@ -62,7 +63,7 @@ proc setupNifBackendModule(g: ModuleGraph; module: PSym): BModule = ## Set up a BModule for code generation from a NIF module. if g.backend == nil: g.backend = cgendata.newModuleList(g) - result = cgen.newModule(BModuleList(g.backend), module, g.config, idGeneratorFromModule(module)) + result = cgen.newModule(BModuleList(g.backend), module, g.config, idGeneratorForBackend(module)) proc finishModule(g: ModuleGraph; bmod: BModule) = # Finalize the module (this adds it to modulesClosed) @@ -81,6 +82,10 @@ proc generateCodeForModule(g: ModuleGraph; precomp: PrecompiledModule) = if bmod == nil: bmod = setupNifBackendModule(g, precomp.module) + # Apply the module's recorded C compile/link directives (passl/passc/...) + # before generating code: the link step needs them (e.g. math's -lm). + replayBackendActions(g, precomp.module, precomp.topLevel) + # Generate code for the module's top-level statements if precomp.topLevel != nil: cgen.genTopLevelStmt(bmod, precomp.topLevel) diff --git a/compiler/nim.nim b/compiler/nim.nim index a60e030118..fd33aca040 100644 --- a/compiler/nim.nim +++ b/compiler/nim.nim @@ -28,10 +28,12 @@ import commands, options, msgs, extccomp, main, idents, lineinfos, cmdlinehelper, pathutils, modulegraphs +from ast2nif import registerNifAstTags + from std/browsers import openDefaultBrowser from nodejs import findNodeJs -when hasTinyCBackend: +when defined(tinyc): # == hasTinyCBackend; spelled out for the IC dep scanner import tccgen when defined(profiler) or defined(memProfiler): @@ -96,6 +98,11 @@ proc getNimRunExe(conf: ConfigRef): string = result = "" proc handleCmdLine(cache: IdentCache; conf: ConfigRef) = + # NIF tag registration must not depend on module init order — the IC-built + # compiler orders module init calls differently and the top-level + # `registerTag` initializers then ran against a not-yet-initialized pool, + # corrupting every written NIF (see registerNifAstTags). + registerNifAstTags() let self = NimProg( supportsStdinFile: true, processCmdLine: processCmdLine diff --git a/compiler/semcall.nim b/compiler/semcall.nim index f0a4a1385b..a27531c186 100644 --- a/compiler/semcall.nim +++ b/compiler/semcall.nim @@ -732,6 +732,15 @@ proc indexTypesMatch(c: PContext, f, a: PType, arg: PNode): PNode = result = paramTypesMatch(m, f, a, arg, nil) if m.genericConverter and result != nil: instGenericConvertersArg(c, result, m) + when defined(icDbg): + if result == nil and f != nil and a != nil and f.kind == tyEnum: + echo "INDEXMISMATCH f=", typeToString(f), " itemId=", f.itemId, + " uniqueId=", f.uniqueId, " mod=", toFullPath(c.config, f.itemId.module.FileIndex), + " sym=", (if f.sym != nil: $f.sym.itemId else: "nil"), " state=", f.state + let a2 = a.skipTypes({tyRange}) + echo " a=", typeToString(a), " itemId=", a2.itemId, " uniqueId=", a2.uniqueId, + " mod=", toFullPath(c.config, a2.itemId.module.FileIndex), + " sym=", (if a2.sym != nil: $a2.sym.itemId else: "nil"), " state=", a2.state proc inferWithMetatype(c: PContext, formal: PType, arg: PNode, coerceDistincts = false): PNode = diff --git a/compiler/semfold.nim b/compiler/semfold.nim index 49526a948d..42285618ff 100644 --- a/compiler/semfold.nim +++ b/compiler/semfold.nim @@ -610,6 +610,11 @@ proc getConstExpr(m: PSym, n: PNode; idgen: IdGenerator; g: ModuleGraph): PNode var s = n.sym case s.kind of skEnumField: + when defined(icDbg): + if n.typ == nil: + echo "ENUMFIELD niltyp sym=", s.name.s, " symtyp=", + (if s.typ == nil: "nil" else: $s.typ.kind), " lazy=", nfLazyType in n.flags, + " symstate=", s.state, " symid=", s.itemId result = newIntNodeT(toInt128(s.position), n, idgen, g) of skConst: case s.magic diff --git a/compiler/seminst.nim b/compiler/seminst.nim index a34467636a..f8b19d79ac 100644 --- a/compiler/seminst.nim +++ b/compiler/seminst.nim @@ -245,7 +245,7 @@ proc instantiateProcType(c: PContext, pt: LayeredIdTable, let originalParams = result.n result.n = originalParams.shallowCopy for i in 1 ..< originalParams.len: - let resulti = originalParams[i].sym.typ + var resulti = originalParams[i].sym.typ # twrong_field_caching requires these 'resetIdTable' calls: if i > FirstParamAt: resetIdTable(cl.symMap) @@ -258,6 +258,11 @@ proc instantiateProcType(c: PContext, pt: LayeredIdTable, let needsStaticSkipping = resulti.kind == tyFromExpr let needsTypeDescSkipping = resulti.kind == tyTypeDesc and tfUnresolved in resulti.flags if resulti.kind == tyFromExpr: + if resulti.state == Sealed: + # The generic was loaded from a NIF; do not brand the shared original. + # A tyFromExpr is a placeholder that `replaceTypeVarsT` resolves away, + # so a copy carries no identity that later comparisons could miss. + resulti = copyType(resulti, c.idgen, resulti.owner) resulti.incl tfNonConstExpr var paramType = replaceTypeVarsT(cl, resulti) if needsStaticSkipping: @@ -283,6 +288,9 @@ proc instantiateProcType(c: PContext, pt: LayeredIdTable, if oldParam.ast != nil: var def = oldParam.ast.copyTree if def.typ.kind == tyFromExpr: + if def.typ.state == Sealed: + # `copyTree` shares types; see the `resulti` comment above. + def.typ = copyType(def.typ, c.idgen, def.typ.owner) def.typ.incl tfNonConstExpr if not isIntLit(def.typ): def = prepareNode(cl, def) diff --git a/compiler/semtypes.nim b/compiler/semtypes.nim index 2de2c0e163..6dcbeabc5c 100644 --- a/compiler/semtypes.nim +++ b/compiler/semtypes.nim @@ -512,11 +512,13 @@ proc semArrayIndex(c: PContext, n: PNode): PType = if c.inGenericContext > 0: result.incl tfUnresolved else: result = e.typ.skipTypes({tyTypeDesc}) - if result.state == Sealed: - # The index type was loaded from the IC cache and must not be mutated - # in place; work on a copy so we can mark it as an implicit static. - result = copyType(result, c.idgen, getCurrOwner(c)) - result.incl tfImplicitStatic + if result.state != Sealed: + # For a type loaded from the IC cache we skip the flag instead of + # mutating (or copying) the type: tfImplicitStatic has no readers in + # the compiler, and a copy would get a fresh itemId, breaking enum + # identity (`sameEnumTypes` compares ids) — `arr[enumVal]` on an + # `array[LoadedEnum, T]` would no longer typecheck. + result.incl tfImplicitStatic elif e.kind in (nkCallKinds + {nkBracketExpr}) and hasUnresolvedArgs(c, e): if not isOrdinalType(e.typ.skipTypes({tyStatic, tyAlias, tyGenericInst, tySink})): localError(c.config, n[1].info, errOrdinalTypeExpected % typeToString(e.typ, preferDesc)) @@ -1895,6 +1897,12 @@ proc semTypeExpr(c: PContext, n: PNode; prev: PType): PType = # by macros. Only macros can summon unnamed types # and cast spell upon AST. Here we need to give # it a name taken from left hand side's node + if result.state == Sealed: + # The unnamed type was loaded from a dependency's NIF and must not + # be mutated in place; attach the name to a fresh copy instead. + let orig = result + result = copyType(orig, c.idgen, getCurrOwner(c)) + copyTypeProps(c.graph, c.idgen.module, result, orig) result.sym = prev.sym result.sym.typ = result else: diff --git a/compiler/sighashes.nim b/compiler/sighashes.nim index 3b688920e4..67607b31ed 100644 --- a/compiler/sighashes.nim +++ b/compiler/sighashes.nim @@ -52,7 +52,17 @@ proc hashSym(c: var MD5Context, s: PSym) = c &= ":anon" else: var it = s + when defined(icDbgHash): + var ownerSteps = 0 while it != nil: + when defined(icDbgHash): + inc ownerSteps + if ownerSteps >= 1000 and ownerSteps <= 1030: + echo "OWNERLOOP(hashSym) n=", ownerSteps, " sym=", it.name.s, " kind=", it.kind, + " id=", it.itemId, " flags=", it.flags, " state=", it.state, + " start=", s.name.s, " startId=", s.itemId + elif ownerSteps == 1031: + raiseAssert "owner-chain cycle detected, see OWNERLOOP dump above" c &= it.name.s c &= "." it = it.owner @@ -65,7 +75,17 @@ proc hashTypeSym(c: var MD5Context, s: PSym; conf: ConfigRef) = else: var it = s c &= customPath(conf.toFullPath(s.info)) + when defined(icDbgHash): + var ownerSteps = 0 while it != nil: + when defined(icDbgHash): + inc ownerSteps + if ownerSteps >= 1000 and ownerSteps <= 1030: + echo "OWNERLOOP n=", ownerSteps, " sym=", it.name.s, " kind=", it.kind, + " id=", it.itemId, " flags=", it.flags, " state=", it.state, + " start=", s.name.s, " startId=", s.itemId + elif ownerSteps == 1031: + raiseAssert "owner-chain cycle detected, see OWNERLOOP dump above" if sfFromGeneric in it.flags and it.kind in routineKinds and it.typ != nil: hashType c, it.typ, {CoProc}, conf @@ -102,10 +122,28 @@ proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]; conf: Confi else: for i in 0.. hashMaxDepth: hashMaxDepth = hashDepth + if hashCalls >= 500_000_000 and hashCalls <= 500_000_300: + echo "HASHLOOP n=", hashCalls, " d=", hashDepth, " kind=", t.kind, " id=", t.itemId, + " uniq=", t.uniqueId, " sym=", (if t.sym != nil: t.sym.name.s else: "NIL"), + " state=", t.state, " owner=", (if t.owner != nil: t.owner.name.s else: "NIL") + elif hashCalls == 500_000_301: + echo "HASHLOOP maxDepth=", hashMaxDepth + raiseAssert "hashType runaway detected, see HASHLOOP dump above" + defer: + dec hashDepth # Ensure type is fully loaded before hashing to avoid hash changing # as properties are accessed and trigger lazy loading. @@ -248,6 +286,29 @@ proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]; conf: Confi c.hashType(param.typ, flags, conf) c &= ',' c.hashType(t.returnType, flags, conf) + elif t.n != nil and t.n.kind == nkFormalParams: + # Under IC a loaded proc type stores its parameters only in `n`; `sons` + # holds just the return type. Hashing `t.signature` would silently drop + # every parameter, collapsing distinct proc types onto one hash, so the + # same logical type got different C struct names in different TUs + # ("incompatible type for argument" on closure args). Hash the return + # type first and then the parameter types from `n` — for from-source + # types `n`'s param types equal `sons[1..]`, so non-IC hashes are + # unchanged. (Same fix as typekeys' tyProc branch.) + c.hashType(t.returnType, flags, conf) + for i in 1..