From 3e95fcb4b038225deb108a26142d1b08cabd07d8 Mon Sep 17 00:00:00 2001 From: Araq Date: Sat, 13 Jun 2026 13:55:58 +0200 Subject: [PATCH] IC: cleaner design, updated design document --- compiler/ast2nif.nim | 18 +++ compiler/deps.nim | 88 +++++++---- compiler/options.nim | 4 +- compiler/pipelines.nim | 31 ++-- compiler/semdata.nim | 11 +- doc/ic.md | 352 ++++++++++++++++++++++++++--------------- 6 files changed, 332 insertions(+), 172 deletions(-) diff --git a/compiler/ast2nif.nim b/compiler/ast2nif.nim index d054f3bab8..0d665099a1 100644 --- a/compiler/ast2nif.nim +++ b/compiler/ast2nif.nim @@ -1214,6 +1214,24 @@ proc writeEdgesFile(config: ConfigRef; thisModule: int32; implDeps: seq[int]) = # edit was reverted). Nimony's analog is its always-written `.s.nif`. writeFile(dest, path) +proc writeSemDeps*(config: ConfigRef; thisModule: int32; importPaths: seq[string]) = + ## The module's REAL direct imports as `nim m` sem resolved them — static + ## plus any a macro generated — recorded as full source paths. `nim ic` reads + ## this `.s.deps.nif` to re-derive the build graph: imports the static scanner + ## missed become new nodes (replacing the old build-failure discovery loop), + ## and `when false` imports the scanner over-included are pruned. Always + ## written so it is current after every successful sem (like `.edges`). + let selfSuffix = modname(thisModule, config) + var paths = importPaths + sort paths + var dest = createTokenBuf(4 + 2*paths.len) + dest.addParLe pool.tags.getOrIncl("semdeps"), NoLineInfo + for p in paths: + dest.addStrLit p + dest.addParRi + let path = toGeneratedFile(config, AbsoluteFile(selfSuffix), ".s.deps.nif").string + writeFile(dest, path) + proc writeNifModule*(config: ConfigRef; thisModule: int32; n: PNode; opsLog: seq[LogEntry]; replayActions: seq[PNode] = @[]; diff --git a/compiler/deps.nim b/compiler/deps.nim index ca3019a806..fb99586931 100644 --- a/compiler/deps.nim +++ b/compiler/deps.nim @@ -83,6 +83,26 @@ proc readNeedsImpl(c: DepContext; f: FilePair): seq[string] = finally: close s +proc semDepsFile(c: DepContext; f: FilePair): string = + getNimcacheDir(c.config).string / f.modname & ".s.deps.nif" + +proc readSemDeps(c: DepContext; f: FilePair): seq[string] = + ## The module's REAL direct imports (full source paths) as sem resolved them, + ## including macro-generated imports the static scanner missed + ## (ast2nif.writeSemDeps). Missing file (not yet semmed) -> empty. + result = @[] + if fileExists(c.semDepsFile(f)): + var s = nifstreams.open(c.semDepsFile(f)) + try: + discard processDirectives(s.r) + while true: + let t = next(s) + if t.kind == EofToken: break + if t.kind == StringLit: + result.add pool.strings[t.litId] + finally: + close s + proc findNifler(): string = # Look for nifler in common locations let nimDir = getAppDir() @@ -100,6 +120,10 @@ proc findNifmake(): string = proc runNifler(c: DepContext; nimFile: string): bool = ## Run nifler deps on a file if needed. Returns true on success. + ## NOTE: the `setLastModificationTime` coordination below is a known hack; its + ## clean removal lands with the Phase 2 frontend/backend split, which redefines + ## this pre-scan's role. (A naive switch to keying on the parsed file produced + ## a stale warm rebuild, so it's left intact until the restructure.) let pair = c.toPair(nimFile) let depsPath = c.depsFile(pair) @@ -933,21 +957,20 @@ proc commandIc*(conf: ConfigRef) = # Process dependencies traverseDeps(c, rootPair, rootNode) - # Discovery loop: imports GENERATED by macros (chronicles builds + # Discovery via `.s.deps`: imports GENERATED by macros (chronicles builds # `import chronicles/textlines` via parseStmt from the chronicles_sinks - # define) are invisible to the static scanner. A failing `nim m` child - # records "missing-path \t importer-path" in icmissing.txt; we add the - # module (and an edge from its importer) to the graph and rerun — - # nifmake's mtime pruning keeps completed work. - let missingFile = getNimcacheDir(conf).string & "/icmissing.txt" - removeFile missingFile + # define) are invisible to the static scanner. Each `nim m` records the + # imports it ACTUALLY resolved (static + macro-generated) into a + # `.s.deps.nif` sidecar (ast2nif.writeSemDeps); a child that fails on a + # not-yet-built import flushes it before erroring. We re-derive the graph + # from those sidecars — adding any module the scanner missed, plus the edge + # from its importer — and rerun; nifmake's mtime pruning keeps completed + # work. A round that discovers nothing new but still fails is a real error. var rounds = 0 while true: - # Generate build file let buildFile = generateBuildFile(c) rawMessage(conf, hintSuccess, "generated: " & buildFile) - # Automatically run nifmake let nifmake = findNifmake() if nifmake.len == 0: rawMessage(conf, hintSuccess, "run: nifmake run " & buildFile) @@ -956,31 +979,32 @@ proc commandIc*(conf: ConfigRef) = rawMessage(conf, hintExecuting, cmd) let exitCode = execShellCmd(cmd) if exitCode == 0: break + + # Re-derive from the post-sem deps of every node compiled so far. Imports + # the static scanner missed become new nodes; the importer->import edge + # the scanner could not see is added so the discovered module builds + # first. (Static-import edges are already present, so `notin deps` skips + # the redundant ones.) var discovered = false inc rounds - if rounds <= 20 and fileExists(missingFile): - for line in lines(missingFile): - let parts = line.split('\t') - if parts.len != 2 or parts[0].len == 0: continue - let pair = c.toPair(parts[0]) - let importerIdx = c.processedModules.getOrDefault( - c.toPair(parts[1]).modname, -1) - var idx = c.processedModules.getOrDefault(pair.modname, -1) - if idx == -1: - let newNode = Node(files: @[pair], id: c.nodes.len) - if c.systemNodeId >= 0: - newNode.deps.add c.systemNodeId - c.processedModules[pair.modname] = newNode.id - c.nodes.add newNode - idx = newNode.id - traverseDeps(c, pair, newNode) - discovered = true - if importerIdx >= 0 and idx >= 0 and idx notin c.nodes[importerIdx].deps: - # the build-graph edge the scanner could not see: forces the - # discovered module to be built before its importer re-sems - c.nodes[importerIdx].deps.add idx - discovered = true - removeFile missingFile + if rounds <= 20: + let n0 = c.nodes.len # snapshot: new nodes are traversed as they're added + for ni in 0 ..< n0: + for p in readSemDeps(c, c.nodes[ni].files[0]): + let pair = c.toPair(p) + var idx = c.processedModules.getOrDefault(pair.modname, -1) + if idx == -1: + let newNode = Node(files: @[pair], id: c.nodes.len) + if c.systemNodeId >= 0: + newNode.deps.add c.systemNodeId + c.processedModules[pair.modname] = newNode.id + c.nodes.add newNode + idx = newNode.id + traverseDeps(c, pair, newNode) + discovered = true + if idx != ni and idx notin c.nodes[ni].deps: + c.nodes[ni].deps.add idx + discovered = true if not discovered: rawMessage(conf, errGenerated, "nifmake failed with exit code: " & $exitCode) break diff --git a/compiler/options.nim b/compiler/options.nim index 70bbb415e8..1c78a77181 100644 --- a/compiler/options.nim +++ b/compiler/options.nim @@ -29,7 +29,7 @@ const nimEnableCovariance* = defined(nimEnableCovariance) - icFormatVersion* = "2" + icFormatVersion* = "3" ## Version of the IC cache format (the sem-NIF module layout written by ## ast2nif.nim plus the iface/impl/edges side files). Bump it whenever ## that layout changes: `commandIc` wipes a nimcache whose `ic.version` @@ -40,6 +40,8 @@ const ## body folding); body access now records a NeedsImpl edge instead. A v1 ## cache mixes body-sensitive and body-insensitive cookies, so it must be ## wiped rather than warm-rebuilt. + ## v3: added the `.s.deps` sidecar (real post-sem imports) and switched the + ## macro-generated-import discovery from `icmissing.txt` to it. type # please make sure we have under 32 options # (improves code efficiency a lot!) diff --git a/compiler/pipelines.nim b/compiler/pipelines.nim index 69fc3fd722..5849417151 100644 --- a/compiler/pipelines.nim +++ b/compiler/pipelines.nim @@ -271,6 +271,12 @@ proc processPipelineModule*(graph: ModuleGraph; module: PSym; idgen: IdGenerator for id in graph.icImplDeps: implDeps.add id writeNifModule(graph.config, module.position.int32, topLevelStmts, graph.opsLog, replayActions, implDeps, reexportedModuleSyms(graph, module)) + # The module's REAL direct imports (incl. macro-generated) for `nim ic`'s + # graph re-derivation; see ast2nif.writeSemDeps / semdata.addImportFileDep. + var semDepPaths: seq[string] = @[] + for f in graph.importDeps.getOrDefault(module.position.FileIndex, @[]): + semDepPaths.add toFullPath(graph.config, f).string + writeSemDeps(graph.config, module.position.int32, semDepPaths) result = true @@ -304,18 +310,19 @@ proc compilePipelineModule*(graph: ModuleGraph; fileIdx: FileIndex; flags: TSymF let precomp = moduleFromNifFile(graph, fileIdx) if precomp.module == nil: let nifPath = toNifFilename(graph.config, fileIdx) - # Record the miss for `nim ic`'s discovery loop: imports GENERATED - # by macros (e.g. chronicles' parseStmt("import chronicles/textlines"), - # driven by the chronicles_sinks define) are invisible to the static - # dependency scanner. The parent reads this file, adds the module — - # plus an edge from this importer — to the build graph and reruns. - try: - let f = open(getNimcacheDir(graph.config).string & "/icmissing.txt", fmAppend) - f.writeLine(toFullPath(graph.config, fileIdx) & "\t" & - graph.config.projectFull.string) - f.close() - except IOError, OSError: - discard + # Macro-generated imports (e.g. chronicles' parseStmt("import + # chronicles/textlines") driven by the chronicles_sinks define) are + # invisible to the static scanner, so this module's NIF was never + # built. The importer already recorded this import via + # addImportFileDep, so flush every module's `.s.deps`: `nim ic` reads + # it, re-derives the graph with the missing node + edge, and reruns + # the frontend. We still error — this process cannot finish sem + # without the import — but the discovery is structured data now, not + # a side-channel file. + for importer, deps in graph.importDeps.pairs: + var paths: seq[string] = @[] + for f in deps: paths.add toFullPath(graph.config, f).string + writeSemDeps(graph.config, importer.int32, paths) globalError(graph.config, unknownLineInfo, "nim m requires precompiled NIF for import: " & toFullPath(graph.config, fileIdx) & " (expected: " & nifPath & ")") diff --git a/compiler/semdata.nim b/compiler/semdata.nim index c837cad54e..89af5effd9 100644 --- a/compiler/semdata.nim +++ b/compiler/semdata.nim @@ -353,7 +353,16 @@ proc addIncludeFileDep*(c: PContext; f: FileIndex) = discard proc addImportFileDep*(c: PContext; f: FileIndex) = - discard + # Under `nim m` (the IC frontend) record the REAL direct imports of the + # current module as sem resolves them — including imports a macro generated + # (e.g. chronicles' `parseStmt("import chronicles/textlines")`), which the + # static dependency scanner never sees. `nim ic` writes this set as the + # module's `.s.deps` sidecar and re-derives the build graph from it, so the + # discovery is structured data instead of a build-failure side channel. + if c.config.cmd == cmdM: + let importer = c.module.position.FileIndex + var deps = addr c.graph.importDeps.mgetOrPut(importer, @[]) + if f notin deps[]: deps[].add f proc addPragmaComputation*(c: PContext; n: PNode) = # Also store for NIF-based IC (cmdM mode or optCompress) diff --git a/doc/ic.md b/doc/ic.md index 9fd7b55d2b..c38cf53782 100644 --- a/doc/ic.md +++ b/doc/ic.md @@ -2,165 +2,265 @@ Incremental Compilation (IC) ====================================== -The ``nim ic`` command provides incremental compilation support for Nim projects, -allowing faster rebuilds by reusing previously compiled intermediate representations -of modules that haven't changed. +The ``nim ic`` command provides incremental compilation for Nim projects. It +decomposes compilation into per-module steps whose results are cached as NIF +files, and uses the external ``nifmake`` build tool to re-run only the steps +whose inputs changed. + +This document describes **how `nim ic` works today**, including the edge cases +that shaped the current design, and ends with a **Plan** for the next backend +rewrite. Overview ======== -Incremental compilation works by decomposing the compilation process into several stages: +The pipeline has two halves driven by one process (`nim ic`, `commandIc` in +``compiler/deps.nim``) that constructs a dependency graph, writes a build file, +and hands it to ``nifmake``: -1. **Parsing** - Source files are parsed into an abstract syntax tree (AST) -2. **Semantic Analysis** - Symbols are resolved and type checking is performed -3. **Code Generation** - Platform-specific code is generated from the analyzed AST -4. **Linking** - The generated code is linked into an executable +1. **Frontend** — per module: + - ``nifler parse --deps`` turns ``.nim`` source into a parsed NIF + (``.p.nif``) plus a static dependency list (``.deps.nif``). + - ``nim m`` (the *semantic* step, `cmdM`) reads the parsed NIF + the + precompiled NIFs of the module's imports, type-checks, and writes the + **semmed NIF** (``.nif``) plus invalidation sidecars (see *Cookies*). +2. **Backend** — ``nim nifc`` (`cmdNifC`, ``compiler/nifbackend.nim``) reads the + semmed NIFs, generates C, compiles and links. -The IC mechanism caches the results of earlier stages in NIF files -(Nim intermediate format): ``.p.nif`` (parsed), ``.deps.nif`` (dependencies), -and ``.nif`` (semantically analyzed). When recompiling, only modules that have -changed need to be reprocessed through the semantic analysis and code generation -stages, significantly reducing compilation time for large projects. +``nifmake`` orders the steps by their input/output files: every `nim m` runs +before the `nim nifc` step that consumes its NIF, and a step re-fires only when +one of its inputs is newer than its outputs. -NIF File Format -=============== +Artifacts (the NIF zoo) +======================= -NIF (Nim Intermediate Format) files are text-based files that use a Lisp-like -syntax. They employ a hybrid format where byte offsets into the text are used for -efficient access, making them simultaneously human-readable and machine-efficient. -The text representation is particularly valuable for debugging and introspection. +Per module ```` (a content hash of the path; see *NIF symbols* below), +under the nimcache directory: -Each ``.nim`` module produces its own ``.nif`` file during compilation. -The NIF format contains: +| File | Producer | Purpose | +| ---- | -------- | ------- | +| ``.p.nif`` | nifler | parsed AST (syntactic) | +| ``.deps.nif`` | nifler | **static** import list (syntactic `import`s) | +| ``.s.deps.nif`` | `nim m` | **real** post-sem imports (incl. macro-generated); see *Discovery* | +| ``.nif`` | `nim m` | semmed module (symbols resolved, typed) | +| ``.iface.nif`` | `nim m` | **iface cookie**: hash of the importer-visible surface | +| ``.impl.nif`` | `nim m` | **impl cookie**: hash of the entire content (bodies included) | +| ``.edges.nif`` | `nim m` | **NeedsImpl edges**: modules whose bodies this sem consumed | +| ``.c.nif`` | `nim nifc` | the C text as a NIF, with def/ref markers for DCE & dedup | +| ``ic_config.cfg.nif`` | driver | precompiled config replayed by every child (`icconfig.nim`) | +| ``ic.version`` | driver | format stamp; a mismatch wipes the cache (`icFormatVersion`) | -- **Header** - Version information (e.g., `(.nif27)`) -- **Dependencies** - List of source files and dependencies -- **Interface** - Exported symbols and their indices -- **Body** - The intermediate representation of the module's code in Lisp-like syntax - -The NIF format is designed specifically for Nim and allows efficient serialization -and deserialization of the compiler's intermediate representation while remaining -readable and debuggable by tools and developers. - -The ``nim ic`` Switch -===================== - -The ``nim ic`` command initiates incremental compilation for a project. -It automatically manages the build process by: - -1. Parsing all source files into ``.nif`` format (using the ``nifler`` tool) -2. Performing semantic analysis on modified modules -3. Generating code only for modules with changes or dependencies on changed modules -4. Generating a build file (in NIFMake format) that orchestrates the compilation -5. Executing the build file through ``nifmake`` - -Prerequisites -------------- - -- **nifler** - Tool for parsing Nim source files into NIF format. The ``nim ic`` command uses ``nifler parse --deps`` to generate both parsed files (``.p.nif``) and dependency files (``.deps.nif``). -- **nifmake** - Build orchestration tool that follows dependencies and executes the build rules defined in ``.build.nif`` files. - -If these tools are not available, ``nim ic`` will display instructions on how to -obtain them. - -Key Modules for IC Logic +NIF symbols and ownership ========================= -The primary modules in the compiler that handle incremental compilation logic are: +(See ``../nifspec/doc/nif-spec.md``.) A global symbol is +``..`` or, for a generic instantiation, +``...`` where ``key`` is the instantiation +encoded by the *NIF-trees-as-identifiers* scheme. Two consequences drive the +backend: -- **deps.nim** - Dependency analysis and build file generation. Contains the - ``commandIc`` procedure which is the main entry point for the ``nim ic`` command. - This module orchestrates the incremental compilation process, handling dependency - traversal (via ``nifler deps``), build rule generation, and build file creation. - The build file is written to ``nifcache/`` directory. This module also explicitly - models ``system.nim`` as a dependency of all modules. +- **Instance names are content-addressed**: the same instantiation (`seq[Foo]`) + produced in different modules yields the *same* `key`, so a deterministic + dedup is possible *by name*. +- **The suffix names an owner module.** Today an instance's owner is the module + whose process minted it (`itemId.module`), which is process-local mint order — + the root of the *single-writer* hazard below. -- **ast2nif.nim** - Core mapping between AST and NIF. +The driver: graph construction (`commandIc`) +============================================ +1. Stamp/​wipe the cache by ``icFormatVersion``. +2. Seed the graph with the root module and **`system.nim`**. `system`'s entire + import closure is folded into one node (one `nim m` invocation) — see + *single-writer* below. +3. ``traverseDeps`` runs ``nifler`` per module and reads ``.deps.nif`` to add + import edges. +4. **SCC grouping**: strongly-connected import cycles are collapsed (Tarjan). + A singleton compiles as ``nim m ``; a cycle compiles as one + ``nim m --icGroup:…`` that builds every member *from source* in + one process (resolving the recursion in memory) and writes each member's NIF. + Only edges *leaving* the component become build-graph inputs. +5. **Discovery fixpoint**: write the build file, run ``nifmake``; if it fails, + re-derive the graph from every module's ``.s.deps.nif`` (adding nodes/edges + for imports the static scanner missed), and retry. See *Discovery*. +6. The backend step (`nim nifc`) depends on every module's semmed NIF, so + ``nifmake`` runs it last. -**Code, Logic & Debugging** -=========================== +Invalidation: the cookie system +================================ -This section focuses on the compiler-side code paths, the logic you will -inspect while debugging IC, and a pragmatic manual workflow for bug hunting -using local invocations such as ``nim m --nimcache:nifcache``. +A dependent must re-sem only when a dependency's relevant surface changed. Two +hashes per module (``ast2nif.nim``): -Core places to inspect -- **`compiler/deps.nim`**: generates the NIF-based build file and implements - ``commandIc`` (entry point for ``nim ic``). Look for how build rules are - emitted (calls to the NIF builder) and how inputs/outputs are wired. -- **`compiler/modulegraphs.nim`** and **`compiler/pipelines.nim`**: - dependency graph and compilation pipeline integration — useful when a module - is rebuilt unexpectedly. +- **iface cookie** (``.iface.nif``): hashes only the *importer-visible* surface — + exported declarations' **signatures** (for *all* routine kinds: plain procs, + templates, macros, generics, `inline` procs alike), full content for + consts/types, plus import/export/replay/hook records. Routine **bodies are + excluded.** It also chains in the iface cookies of its own dependencies, so a + surface change anywhere in the import closure propagates. A `nim m` rule for a + module depends on its dependencies' iface cookies, so a body-only edit moves no + iface cookie and stops the re-sem cascade. +- **impl cookie** (``.impl.nif``): hashes the *entire* serialized content (private + defs and bodies included), with the module's own iface mixed in. -Understanding the NIF text -- NIF files are human-readable; open the per-module ``.nif`` files in - ``nifcache/`` to inspect parsed ASTs, dependency lists and interface tables. -- Because NIF uses textual nodes and byte offsets, tools can quickly seek to - positions in the file — but for debugging you usually only need to read the - file top-to-bottom. +**NeedsImpl edges** (``.edges.nif``): if a module *consumed another module's body* +during sem — a macro expansion, a generic instantiation, a `getImpl`, or a +compile-time call run in the VM — it records a strong edge. The dependent is then +gated on that dependency's **impl** cookie instead of its iface cookie, so e.g. +`const x = dep.foo()` re-sems when `foo`'s body changes. Recording sites: +`semExprs.semTemplateExpr` (templates), `seminst.generateInstance` (generics), +`vmgen.genProc` (VM/macros/CT procs), `vm.opcGetImpl` (`getImpl`). Inline +iterators and `inline` procs are *not* tracked — they are inlined at codegen, +where the backend's NIF-mtime invalidation re-codegens their users. -Manual bug-hunting workflow -- Prepare a clean nimcache directory (relative to your project): +Discovery of macro-generated imports +==================================== - ```bash - mkdir -p nifcache - ``` +The static scanner only sees syntactic `import`s. A macro can synthesize one +(chronicles does `parseStmt("import chronicles/textlines")` driven by the +`chronicles_sinks` define). Such an import is invisible until sem runs the macro. +Each `nim m` records the imports it *actually* resolved (via the +``semdata.addImportFileDep`` hook → ``graph.importDeps`` → ``ast2nif.writeSemDeps``) +into ``.s.deps.nif``; a child that fails on a not-yet-built import flushes it +before erroring. The driver re-derives the graph from those sidecars — adding the +missing node + the importer→import edge — and reruns to a fixpoint. (This replaced +an earlier `icmissing.txt` side channel.) -- Parse/semantic-check a single module and write NIF/sem artifacts: +The backend today: `commandNifC` +================================ - ```bash - nim m --nimcache:nifcache path/to/module.nim - ``` +The current backend is **whole-program and demand-driven**, run as one process +(``compiler/nifbackend.nim``): - - ``nim m`` runs the compiler up to the semantic checking stage for the - specified module and emits intermediate cache files into ``nifcache/``. - - Use this to reproduce and isolate failures in the semantic stage. +1. Load `system` then **all** modules' semmed NIFs in dependency order + (`loadModuleDependencies`), so all hooks/types are in memory. +2. **DCE**: `computeLiveSymbols` over all NIFs computes a global live set used to + filter the top-level routine listing. +3. **`computeModuleReuse`**: decide which modules' cached translation units + (``.c.nif``) can be reused — skip codegen for them and use their ``.c``/``.o`` + as is. The gate mirrors the m-step's cookie gating; a coarse fallback uses the + transitive NIF-mtime closure (`-d:icCoarseReuse`). +4. **Codegen**: for each non-reused module, `generateCodeForModule` runs `cgen`. + Codegen is **demand-driven**: emitting one module can *demand* entities + (generic instances, type-bound hooks, RTTI) that belong to other modules; a + demand whose home TU is reused is **redirected** into the demanding TU + (`redirectToLiveModule`), and reused TUs' definitions become prototypes. +5. **`enforceDefRetention`**: an un-reuse cascade. If a regenerated module would + stop emitting a definition that a still-reused TU references (the demand chain + that placed it no longer arises), the referencing TUs lose their reuse and + regenerate so the symbol does not vanish under them. +6. `emitMethodDispatchers`, then `finishModule` for every module (main module + **last**, so init-proc registration is complete before `genMainProc`). +7. Emit ``.c``, then `extccomp.callCCompiler` + link. -- Inspect the generated files for that module under ``nifcache/`` (look for - ``.nif``, sem/parsed artifacts). Because NIF is text-based you can open and - grep it directly: +Edge cases (and why the machinery exists) +========================================= - ```bash - sed -n '1,200p' nifcache/ModuleName.nif - grep -n "someSymbol" -n nifcache/ModuleName.nif - ``` +- **Single-writer.** Instance type-ids are minted in process-local order, so if + two `nim m` processes both write a module's NIF (e.g. a stdlib module pulled + into `system`'s from-source closure *and* given its own rule), the second + overwrites with different ids and every module checked against the first carries + dangling refs ("symbol has no offset"). Fixed by folding `system`'s closure into + one SCC and by **forwarding the project's defines** to every child so their + `when` bodies (hence import sets and NIF contents) match the scanner's. +- **`when … else: import`.** nifler emits `else`-branch imports unguarded, so a + dead `else: import` would be scheduled. The compiler's own sources were rewritten + to explicit negated `when`s; the vendored nifler later learned to negate prior + conditions for the `else`. +- **`nil` sons of loaded ASTs.** NIF dot-tokens load as `nil` where from-source + ASTs have `nkEmpty`; several passes gained `nil` guards. +- **Sealed loaded types.** Loaded types are `Sealed`; sem/transform mutate via + `unsealForTransform`/`exactReplica(idgen)` (the latter mints a fresh `uniqueId` + so serialized replicas don't collapse). +- **Methods/RTTI ownership.** `genTypeInfoV1` already routes a type's RTTI to + `t…itemId.module` when that module is open for codegen — an existing ownership + notion the rewrite can generalize. +- **Config cost.** Each child re-parsing `nim.cfg` + re-running `config.nims` in + the VM was ~80 ms; replaced by a precompiled `ic_config.cfg.nif` replayed in + `loadConfigs` (`compiler/icconfig.nim`). +- **`koch bootic`** bootstraps the compiler through `nim ic` (a 3-iteration + fixed-point check). It writes its binary to ``bin/nim_ic`` and never clobbers + ``bin/nim``. -- To reproduce a full incremental compilation of the project, generate the - build file and run it (``nim ic`` automates this). The build file is generated - in ``nifcache/`` directory. To debug an individual build step, run the command - that the build file would execute manually: - - Parsing step: ``nifler parse --deps input.nim`` (produces ``.p.nif`` and ``.deps.nif``) - - Semantic step: ``nim m --nimcache:nifcache input.nim`` (produces ``.nif``) - - Code generation: ``nim nifc --nimcache:nifcache input.nim`` (produces executable) +Known residual hacks (targets for the rewrite) +---------------------------------------------- -- Force a cache invalidation for a single module by removing its NIF/sem - artifact and re-running the semantic step: +- `deps.runNifler` uses `setLastModificationTime` to mark its scan up-to-date and + deletes a stale parsed file to coordinate with the nifmake nifler rule — the + driver duplicating nifmake's freshness logic. +- `computeModuleReuse` + `enforceDefRetention` + the redirect/cached-defs + machinery is a hand-rolled mini-`nifmake` *inside* the backend process, needed + only because one process reuses some TUs while regenerating others. - ```bash - rm nifcache/ModuleName.nif - nim m --nimcache:nifcache path/to/ModuleName.nim - ``` +These are legacy artifacts of a code generator that predates IC, not intrinsic +requirements. -- When investigating incorrect replayed state (pragmas, `{.compile: ...}`): - inspect the replay actions in ``compiler/ic/replayer.nim`` and open the - module's NIF to find the ``toReplay``/action entries that will be executed - during reload. +Plan: a nifmake-driven, per-module backend +========================================== -Tips for efficient debugging -- Use ``--path:...`` flags when invoking ``nim m`` to emulate the exact - search paths used in your project, e.g. ``--path:lib --path:vendor``. -- Compare two successive ``.nif`` files with ``diff`` to see what changed and - why a module was rebuilt. +Goal: the backend stops re-implementing `nifmake`. Each module's codegen becomes +its own build rule, so "which TUs rebuild" is just "which rules `nifmake` +re-fires from input mtimes" — exactly as the frontend already works. The reuse / +def-retention / redirect machinery then dissolves. -Where to change behavior -- Cache invalidation decisions and build-rule emission are implemented in - ``compiler/deps.nim``. When investigating surprising - rebuilds, instrument those modules to log the footprint/hash/comparison - outcome. +Target build graph (mirrors Nimony's ``src/nimony/deps.nim``): + +1. **Frontend split.** Generate a *frontend* build file (nifler + `nim m` rules), + run `nifmake`, run the `.s.deps` discovery fixpoint. Then re-derive the graph + from `.s.deps` (now complete; dead `when` imports can also be **pruned** here). +2. **Per-module codegen rule.** One ``nifc `` rule per module: inputs are the + module's own ``.nif`` plus the ``.iface``/``.impl`` cookies of its + dependencies; output is its ``.c.nif``. The process loads `` + its + import closure's NIFs (like `nim m`) and emits **only the entities it owns**, + referencing everything else `extern`. +3. **Static ownership** replaces runtime redirect. Every emittable entity — + generic instances, type-bound hooks, RTTI, lifted procs — gets a deterministic + owner module *by symbol suffix*. Because instance names are content-addressed + (``ident.disamb.key.owner``), the same instance demanded by several modules has + one name and one owner, so there is exactly one writer and no link-time + duplicate. (The precise owner rule — minting module vs. root-type's module — is + the open design decision; start from `itemId.module` and adjust where it forces + a downstream package to own stdlib code.) +4. **DCE as a rule.** A single rule reads all ``.c.nif``, computes the global live + set, and (Nimony: ``.live.nif`` / per-module ``.dce.nif``) drives per-module + ``.c`` emission filtered to live entities. +5. **Link rule.** Depends on every ``.o`` (each compiled by its own rule) and the + DCE output; produces the executable. +6. **Deletions.** `computeModuleReuse`, `enforceDefRetention`, + `redirectToLiveModule` and the cached-defs/claim bookkeeping go away. The + ``setLastModificationTime`` coordination in `runNifler` goes away with the + frontend split (the nifler rule owns parsed+deps; the driver's pre-scan only + reads `.deps` to build the graph). + +Validation bar: `koch bootic` must still reach its byte-identical fixed point, and +binary size must not regress (DCE parity), across the external-package CI set. + +Code, logic & debugging +======================== + +Core modules: +- **`compiler/deps.nim`** — graph construction, SCC grouping, discovery fixpoint, + build-file generation; `commandIc`. +- **`compiler/ast2nif.nim`** — AST↔NIF, the cookie hashes (`cookieSd`, + `writeIfaceCookie`, `writeImplCookie`, `writeEdgesFile`, `writeSemDeps`). +- **`compiler/nifbackend.nim`** — the backend (`commandNifC`) and its reuse + machinery. +- **`compiler/icconfig.nim`** — precompiled config. +- **`compiler/pipelines.nim`** / **`modulegraphs.nim`** — pipeline integration and + the graph state (`importDeps`, `icImplDeps`, `icReusedModules`, …). + +Manual workflow: +- Frontend a module: ``nim m --nimcache:nifcache path/to/mod.nim`` (writes + ``.nif`` + cookies + ``.s.deps``). +- Backend: ``nim nifc --nimcache:nifcache main.nim``. +- NIF files are text — open/grep them directly; ``diff`` two successive ``.nif`` + to see why a module rebuilt. +- Force a re-sem: delete the module's ``.nif`` and rerun `nim m`. +- A stale-cache crash after editing the serialization layout means bumping + ``icFormatVersion`` (`compiler/options.nim`). See also ======== -- `nif-spec` - NIF format specification (text format and node grammar): - [nifspec/doc/nif-spec.md](../nifspec/doc/nif-spec.md) +- NIF format spec: [nifspec/doc/nif-spec.md](../nifspec/doc/nif-spec.md) +- NIFC (C-like target) spec: dist/nimony/doc/nifc-spec.md