From 3b2e0e08c52f2fb1552c98d0cbc270a8f1383bfe Mon Sep 17 00:00:00 2001 From: Araq Date: Sat, 13 Jun 2026 22:37:47 +0200 Subject: [PATCH] IC: per-module backend emit stage (Phase 2b, B3) --icBackendStage:emit --icBackendModule: renders one module's final .c from its .c.nif and ic.backend.merge.nif. cnif.renderCFromArtifact walks the artifact token stream: string literals verbatim, symbols by name, and a (cdef ...) body is dropped when the name is dead OR it is a 'u' unique definition this module does not own. The prototype lives in the surrounding raw text (cgen emits a forward declaration for every used proc regardless of where the body lands), so a dropped body keeps a valid declaration -- no synthesis needed. emit loads the module graph the same way cg does (factored into loadBackendModules/findTargetModule) so getCFile yields the identical path cg wrote to -- in particular the main module's source-vs-suffix aliasing. Validated end-to-end on a 3-module diamond (lib.shared demanded by siblings a and b at top level): cg all modules, merge, emit all, cc, link. The proc shared lands in exactly one object (its assigned owner a) and is referenced (U) from the other -- proc dedup + ownership works at the object level. The only remaining link failures are DATA (RTTI NTIv2, const tables): those are emit-everywhere'd as cdata, which is not yet wrapped in a droppable directive nor given a guaranteed extern in non-owners -- the next unit (data ownership). koch ic thallo green. Co-Authored-By: Claude Opus 4.8 --- compiler/cnif.nim | 66 +++++++++++++++++++++++++++ compiler/nifbackend.nim | 98 ++++++++++++++++++++++++++++++----------- 2 files changed, 138 insertions(+), 26 deletions(-) diff --git a/compiler/cnif.nim b/compiler/cnif.nim index c0318c2067..02aebaaa6d 100644 --- a/compiler/cnif.nim +++ b/compiler/cnif.nim @@ -646,3 +646,69 @@ proc readMergeDecision*(f: string): MergeDecision = else: skip c endRead(c) + +proc renderCFromArtifact*(artifact: string; d: MergeDecision; ownerId: string; + dropped: var int): string = + ## The per-module backend's `emit` stage: render one module's final `.c` from + ## its `.c.nif` and the merge decision. String literals are emitted verbatim, + ## symbols by name; a `(cdef ...)` body is dropped when the name is dead, or + ## when it is a `'u'` unique definition this module does not own. The body's + ## prototype lives in the surrounding raw text (cgen emits a forward + ## declaration for every *used* proc, independent of where the body lands), so + ## a dropped body still leaves a valid declaration — no synthesis needed. The + ## head groups (meta/cdata/cref/cdeps) carry no C text. + result = "" + if not fileExists(artifact): return + var pool = newPool() + var tags = newTagPool() + let stmtsTag = tags.registerTag("stmts") + let cdefTag = tags.registerTag("cdef") + var buf = parseFromFile(artifact, 1000, pool, tags) + var c = beginRead(buf) + if c.kind != TagLit or c.cursorTagId != stmtsTag: + endRead(c) + return + c.loopInto: + case c.kind + of StrLit: + result.add strVal(c) + inc c + of Symbol, Ident: + result.add symOrIdentName(c) + inc c + of TagLit: + if c.cursorTagId == cdefTag: + # fixed head: SymbolDef, flags (Ident or empty), nifname StrLit; the + # rest is the definition's body text. `state` counts past the head. + var name = "" + var isUnique = false + var keep = true + var state = 0 + c.loopInto: + if state == 0 and c.kind == SymbolDef: + name = symName(c) + state = 1 + inc c + elif state == 1: # the flags field (one token: Ident/Symbol or empty) + if c.kind in {Ident, Symbol}: + for ch in symOrIdentName(c): + if ch == 'u': isUnique = true + state = 2 + inc c + elif state == 2: # the NIF name (one StrLit) — decide keep here + keep = (name in d.live) and + not (isUnique and d.owners.getOrDefault(name, ownerId) != ownerId) + if not keep: inc dropped + state = 3 + inc c + else: # body tokens + if keep: + if c.kind == StrLit: result.add strVal(c) + elif c.kind in {Symbol, Ident}: result.add symOrIdentName(c) + inc c + else: + # head groups (meta/cdata/cref/cdeps) carry no C text + skip c + else: + inc c + endRead(c) diff --git a/compiler/nifbackend.nim b/compiler/nifbackend.nim index ab59a2e6d4..ca8712840c 100644 --- a/compiler/nifbackend.nim +++ b/compiler/nifbackend.nim @@ -505,6 +505,41 @@ proc generateCodeForModule(g: ModuleGraph; precomp: PrecompiledModule) = for t in g.icPreserveTypeInfos[moduleId]: discard genTypeInfo(g.config, bmod, t, unknownLineInfo) +proc loadBackendModules(g: ModuleGraph; mainFileIdx: FileIndex): + tuple[modules: seq[PrecompiledModule], precompSys: PrecompiledModule, + nifFiles: seq[string]] = + ## Shared by the per-module `cg` and `emit` stages: load system + the main + ## module's whole import closure and set up a `BModule` for each, so every + ## type/symbol resolves and `getCFile` yields the same path both stages use. + ## The main module is loaded by its source index (its NIF suffix is aliased to + ## it in `loadModuleDependencies`), so it gets exactly one `BModule`. + resetForBackend(g) + var isKnownFile = false + let systemFileIdx = registerNifSuffix(g.config, "sysma2dyk", isKnownFile) + g.config.m.systemFileIdx = systemFileIdx + var precompSys = moduleFromNifFile(g, systemFileIdx, {LoadFullAst, AlwaysLoadInterface}) + g.systemModule = precompSys.module + var nifFiles: seq[string] = @[toNifFilename(g.config, systemFileIdx)] + let modules = loadModuleDependencies(g, mainFileIdx, nifFiles) + flushMethodReplays(g) + for m in modules: + discard setupNifBackendModule(g, m.module) + if precompSys.module != nil: + discard setupNifBackendModule(g, precompSys.module) + result = (modules, precompSys, nifFiles) + +proc findTargetModule(g: ModuleGraph; modules: seq[PrecompiledModule]; + precompSys: PrecompiledModule; suffix: string): PrecompiledModule = + ## The loaded module whose NIF suffix is `suffix` (the `--icBackendModule` + ## value), or a nil module if none matches. + result = PrecompiledModule(module: nil) + for m in modules: + if cachedModuleSuffix(g.config, FileIndex m.module.position) == suffix: + return m + if precompSys.module != nil and + cachedModuleSuffix(g.config, FileIndex precompSys.module.position) == suffix: + return precompSys + proc generateCgStage(g: ModuleGraph; mainFileIdx: FileIndex) = ## Per-module backend codegen (`--icBackendStage:cg --icBackendModule:`): ## generate C for the single module named by `icBackendModule` and write only @@ -515,16 +550,7 @@ proc generateCgStage(g: ModuleGraph; mainFileIdx: FileIndex) = ## the target module is code-generated; `findPendingModule` routes every demand ## into it (emit-everywhere). Loading only the target's import closure is a ## later optimization — correctness first. - resetForBackend(g) - var isKnownFile = false - let systemFileIdx = registerNifSuffix(g.config, "sysma2dyk", isKnownFile) - g.config.m.systemFileIdx = systemFileIdx - var precompSys = moduleFromNifFile(g, systemFileIdx, {LoadFullAst, AlwaysLoadInterface}) - g.systemModule = precompSys.module - - var nifFiles: seq[string] = @[toNifFilename(g.config, systemFileIdx)] - let modules = loadModuleDependencies(g, mainFileIdx, nifFiles) - flushMethodReplays(g) + let (modules, precompSys, nifFiles) = loadBackendModules(g, mainFileIdx) if modules.len == 0: rawMessage(g.config, errGenerated, "Cannot load NIF file for main module: " & toFullPath(g.config, mainFileIdx)) @@ -536,24 +562,10 @@ proc generateCgStage(g: ModuleGraph; mainFileIdx: FileIndex) = g.icDceEnabled = computeLiveSymbols(g.config, nifFiles, g.icLiveNames, dceStats, nifDeps) - for m in modules: - discard setupNifBackendModule(g, m.module) - if precompSys.module != nil: - discard setupNifBackendModule(g, precompSys.module) - - # Locate the target module by its NIF suffix. - let targetSuffix = g.config.icBackendModule - var target = PrecompiledModule(module: nil) - for m in modules: - if cachedModuleSuffix(g.config, FileIndex m.module.position) == targetSuffix: - target = m - break - if target.module == nil and precompSys.module != nil and - cachedModuleSuffix(g.config, FileIndex precompSys.module.position) == targetSuffix: - target = precompSys + let target = findTargetModule(g, modules, precompSys, g.config.icBackendModule) if target.module == nil: rawMessage(g.config, errGenerated, - "per-module codegen: module not found for suffix: " & targetSuffix) + "per-module codegen: module not found for suffix: " & g.config.icBackendModule) return generateCodeForModule(g, target) @@ -593,6 +605,37 @@ proc generateMergeStage(g: ModuleGraph) = " live: " & $decision.live.len & " defs: " & $decision.defs & " liveDefs: " & $decision.liveDefs & " owned: " & $decision.owners.len +proc generateEmitStage(g: ModuleGraph; mainFileIdx: FileIndex) = + ## Per-module backend emit (`--icBackendStage:emit --icBackendModule:`): + ## render the target module's final `.c` from its `.c.nif` and the merge + ## decision. Loads the module graph the same way `cg` does so `getCFile` + ## returns the identical path `cg` wrote to (the main module's source-vs-suffix + ## aliasing in particular); no codegen runs. + let (modules, precompSys, _) = loadBackendModules(g, mainFileIdx) + if modules.len == 0: + rawMessage(g.config, errGenerated, + "Cannot load NIF file for main module: " & toFullPath(g.config, mainFileIdx)) + return + let target = findTargetModule(g, modules, precompSys, g.config.icBackendModule) + if target.module == nil: + rawMessage(g.config, errGenerated, + "per-module emit: module not found for suffix: " & g.config.icBackendModule) + return + let decision = readMergeDecision(getNimcacheDir(g.config).string / MergeDecisionFile) + if decision.broken: + rawMessage(g.config, errGenerated, + "per-module emit: missing or unparsable merge decision " & MergeDecisionFile) + return + let bmod = BModuleList(g.backend).mods[target.module.position] + let cfile = getCFile(bmod).string + let artifact = cfile & ".nif" + var dropped = 0 + let code = renderCFromArtifact(artifact, decision, extractFilename(artifact), dropped) + writeFile(cfile, code) + if isDefined(g.config, "icDceCheck"): + stderr.writeLine "[icEmit] " & extractFilename(cfile) & " dropped " & + $dropped & " bodies (" & $code.len & " bytes)" + proc generateCode*(g: ModuleGraph; mainFileIdx: FileIndex) = ## Main entry point for NIF-based C code generation. ## Traverses the module dependency graph and generates C code. @@ -602,6 +645,9 @@ proc generateCode*(g: ModuleGraph; mainFileIdx: FileIndex) = elif g.config.icBackendStage == "merge": generateMergeStage(g) return + elif g.config.icBackendStage == "emit": + generateEmitStage(g, mainFileIdx) + return elif g.config.icBackendStage.len > 0: rawMessage(g.config, errGenerated, "per-module backend stage not implemented yet: " & g.config.icBackendStage)