From 39f08383f19f40ea4abd3ae90fc9cdcb4a79a2c8 Mon Sep 17 00:00:00 2001 From: Araq Date: Sun, 14 Jun 2026 10:18:54 +0200 Subject: [PATCH] big progress: The backend is per-module and memory consumption is excellent --- compiler/ast2nif.nim | 23 ++++++ compiler/cgen.nim | 20 +++++- compiler/nifbackend.nim | 102 +++++++++++++++++++++++---- tests/codegen/titaniummangle_nim.nim | 2 +- tests/js/tcodegendeclproc.nim | 2 +- 5 files changed, 131 insertions(+), 18 deletions(-) diff --git a/compiler/ast2nif.nim b/compiler/ast2nif.nim index 0d665099a1..18c85ab263 100644 --- a/compiler/ast2nif.nim +++ b/compiler/ast2nif.nim @@ -2027,6 +2027,29 @@ proc populateInterfaceTablesFromIndex(c: var DecodeContext; module: FileIndex; # Move index table back c.mods[module].index = move indexTab +proc moduleSymbolStubs*(c: var DecodeContext; module: FileIndex): seq[PSym] = + ## Stubs for every non-type symbol serialized in `module`'s NIF index. The + ## per-module backend uses this to emit the routines a module OWNS: procs are + ## serialized as `(sd ...)` symbol-defs and loaded lazily, never as + ## `nkProcDef` statements in the top-level stmt list, so `genTopLevelStmt` + ## alone never reaches them — without this, a routine called only from other + ## modules would be emitted by nobody once the demanding module merely + ## prototypes it. + ## + ## Returns lazy stubs: the index table is moved out while iterating (loading a + ## symbol can register new modules and invalidate the iterator), so the caller + ## forces full load (`.kind`, `.ast`) and filters AFTER this returns, with the + ## index back in place. + result = @[] + if not c.mods.hasKey(module): return + var indexTab = move c.mods[module].index + let thisModule = c.mods[module].suffix + for nifName, entry in indexTab: + if nifName.startsWith("`t"): continue # types are not routines + let sym = loadSymFromIndexEntry(c, module, nifName, entry, thisModule) + if sym != nil: result.add sym + c.mods[module].index = move indexTab + proc toNifFilename*(conf: ConfigRef; f: FileIndex): string = let suffix = moduleSuffix(conf, f) result = toGeneratedFile(conf, AbsoluteFile(suffix), ".nif").string diff --git a/compiler/cgen.nim b/compiler/cgen.nim index 02fcdf8ecb..f7a55fb980 100644 --- a/compiler/cgen.nim +++ b/compiler/cgen.nim @@ -139,6 +139,23 @@ proc redirectToLiveModule(m: BModule, q: BModule): BModule = break if result == nil: result = m +proc emitsBodyInThisModule(m: BModule, prc: PSym): bool = + ## Per-module backend codegen is concerned with ONE module: it emits the + ## bodies of the routines that module OWNS (its own top-level defs) and only + ## *prototypes* a routine owned by another module — that routine's body is + ## emitted by its own module's `cg` process, and the merge stage's DCE prunes + ## whatever ends up globally dead. The funnel where the main module re-emitted + ## its entire transitive closure (≈1.8 GB, a 56 MB `.c.nif`) is exactly this + ## rule being absent. + ## + ## Generic instances and synthesized hooks (`=destroy`, `$`, …) have no single + ## owning-module top-level — they are minted on demand — so each demander emits + ## them and the merge stage deduplicates by their content-addressed C name. + if not (m.config.cmd == cmdNifC and m.config.icBackendStage == "cg"): + return true + result = prc.itemId.module == m.module.position or + (prc.disamb and (InstanceDisambBit or HookDisambBit)) != 0'i32 + proc initLoc(k: TLocKind, lode: PNode, s: TStorageLoc, flags: TLocFlags = {}): TLoc = result = TLoc(k: k, storage: s, lode: lode, snippet: "", flags: flags) @@ -1722,7 +1739,8 @@ proc genProcLvl2(m: BModule, prc: PSym) = # which will actually become a function pointer if isReloadable(m, prc): genProcPrototype(q, prc) - genProcLvl3(q, prc) + if emitsBodyInThisModule(m, prc): + genProcLvl3(q, prc) else: fillProcLoc(m, prc.ast[namePos]) useHeader(m, prc) diff --git a/compiler/nifbackend.nim b/compiler/nifbackend.nim index d7d5025b59..66fb72b707 100644 --- a/compiler/nifbackend.nim +++ b/compiler/nifbackend.nim @@ -29,9 +29,19 @@ from cgmeth import generateIfMethodDispatchers import ic / replayer proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex; - nifFiles: var seq[string]): seq[PrecompiledModule] = + nifFiles: var seq[string]; + depFlags: set[LoadFlag] = {LoadFullAst}): seq[PrecompiledModule] = ## Traverse the module dependency graph using a stack. ## Returns all modules that need code generation, in dependency order. + ## + ## The main module is always loaded with its full AST (it is the codegen + ## target). `depFlags` governs the rest: the whole-program backend needs every + ## module's full AST (it generates code for all of them), but a per-module + ## stage codegens only one target, so it loads the others interface-only + ## (`depFlags = {}`) — the interface, hooks, methods and the `(replay ...)` + ## directives are loaded regardless of `LoadFullAst`, and demanded bodies are + ## fetched lazily from the kept-open stream, so the per-module proc-body ASTs + ## (the bulk of the memory) are never materialized for non-targets. # The main module is loaded by its SOURCE FileIndex, but its serialized # symbols carry the module's NIF suffix. Pre-alias the suffix to the source # index so that `registerNifSuffix` does not allocate a second FileIndex for @@ -57,7 +67,7 @@ proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex; if not visited.containsOrIncl(suffix.string): var isKnownFile = false let fileIdx = g.config.registerNifSuffix(suffix.string, isKnownFile) - let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst}) + let precomp = moduleFromNifFile(g, fileIdx, depFlags) if precomp.module != nil: result.add precomp nifFiles.add toNifFilename(g.config, fileIdx) @@ -467,6 +477,21 @@ proc emitMethodDispatchers(g: ModuleGraph) = if not containsOrIncl(mainMod.declaredThings, disp.id): genProcLvl3(mainMod, disp) +proc signatureHasMetaType(t: PType; depth: int = 0): bool = + ## Whether a routine signature mentions a compile-time/meta element type + ## (`typed`/`untyped` — e.g. `echo`'s `varargs[typed]` — typedesc, static, + ## generic param). Such routines are expanded at their call sites and never + ## emitted standalone, so the per-module owned-routine seeding must skip them + ## (`getTypeDescAux(tyTyped)` otherwise). `tfHasMeta` alone misses the varargs + ## element case, hence the explicit scan. + result = false + if t == nil or depth > 8: return false + if t.kind in {tyTyped, tyUntyped, tyTypeDesc, tyStatic, tyGenericParam, + tyAnything, tyFromExpr, tyError}: + return true + for k in t.kids: + if signatureHasMetaType(k, depth + 1): return true + proc generateCodeForModule(g: ModuleGraph; precomp: PrecompiledModule) = ## Generate C code for a single module. let moduleId = precomp.module.position @@ -482,6 +507,46 @@ proc generateCodeForModule(g: ModuleGraph; precomp: PrecompiledModule) = if precomp.topLevel != nil: cgen.genTopLevelStmt(bmod, precomp.topLevel) + # Per-module backend: emit the bodies of the routines this module OWNS, not + # only the ones its top-level happens to demand. Procs are serialized as lazy + # `(sd ...)` defs (never as `nkProcDef` statements), so `genTopLevelStmt` never + # reaches them; a routine called only from *other* modules would otherwise be + # emitted by nobody, because every module now merely prototypes its foreign + # callees instead of funnelling their bodies (see `cgen.emitsBodyInThisModule`). + # The merge stage's DCE drops whatever turns out globally dead. + if g.config.cmd == cmdNifC and g.config.icBackendStage == "cg": + let modPos = precomp.module.position + for s in moduleSymbolStubs(ast.program, FileIndex modPos): + if s.itemId.module == modPos and + s.kind in {skProc, skFunc, skConverter, skMethod} and + # Only MODULE-level routines: a nested/closure proc (its owner is a + # proc) captures its enclosing scope and cannot be emitted standalone — + # the captured params have no loc → `expr: param not init`. Nested procs + # are emitted via their enclosing routine's lambda-lifting, so seeding + # the enclosing (module-level) routine already covers them. + s.skipGenericOwner != nil and s.skipGenericOwner.kind == skModule and + s.magic == mNone and + # Skip generic instances: they have no single owning-module top-level + # and are emitted by demand (emit-everywhere, deduped by the merge + # stage). An instance has an empty `genericParamsPos` just like a plain + # concrete proc, so only `sfFromGeneric` tells them apart; seeding one + # would force standalone codegen of an instance body whose `when T is X` + # branches were never folded for this path → `genMagicExpr: mIs`. + sfFromGeneric notin s.flags and + # Every other routine the module owns must be emitted here, exported or + # not: a non-exported helper is still reached from another module when a + # `template`/inline routine expands at a call site there (e.g. msgs' + # `internalErrorImpl` behind the `internalError` template), and that + # caller now only prototypes it. `{.error.}`/`compileTime` sentinels and + # bodyless forward decls are not real codegen targets. + {sfForward, sfImportc, sfCompileTime, sfError} * s.flags == {} and + s.typ != nil and not signatureHasMetaType(s.typ) and + s.ast != nil and s.ast.safeLen > bodyPos and + s.ast[genericParamsPos].kind == nkEmpty and + s.ast[bodyPos].kind != nkEmpty: + # a concrete, non-generic, runtime routine with a real body, owned here + requestProcDef(bmod, s) + # The hooks and `$enum` procs this module announces are liveness roots: # a cached TU from a previous run may call them without any demand # arising in this run (the demanding instance body sits inside a reused @@ -513,14 +578,20 @@ proc loadBackendModules(g: ModuleGraph; mainFileIdx: FileIndex): ## type/symbol resolves and `getCFile` yields the same path both stages use. ## The main module is loaded by its source index (its NIF suffix is aliased to ## it in `loadModuleDependencies`), so it gets exactly one `BModule`. + ## + ## Only the main module — the codegen target of the stages that use this — is + ## loaded with its full AST; every other module is loaded interface-only so + ## the whole program's proc bodies are not materialized into this process (that + ## was ~1.8 GB for the compiler's main `cg`). The `link` stage codegens nothing + ## and only needs each module's `(replay ...)` directives, which load anyway. resetForBackend(g) var isKnownFile = false let systemFileIdx = registerNifSuffix(g.config, "sysma2dyk", isKnownFile) g.config.m.systemFileIdx = systemFileIdx - var precompSys = moduleFromNifFile(g, systemFileIdx, {LoadFullAst, AlwaysLoadInterface}) + var precompSys = moduleFromNifFile(g, systemFileIdx, {AlwaysLoadInterface}) g.systemModule = precompSys.module var nifFiles: seq[string] = @[toNifFilename(g.config, systemFileIdx)] - var modules = loadModuleDependencies(g, mainFileIdx, nifFiles) + var modules = loadModuleDependencies(g, mainFileIdx, nifFiles, depFlags = {}) # loadModuleDependencies traverses the project's import closure and stops at # system. The whole-program backend then demand-loads system's own closure # (locks, allocators, threads, …) during codegen; the per-module backend @@ -539,7 +610,7 @@ proc loadBackendModules(g: ModuleGraph; mainFileIdx: FileIndex): if not visited.containsOrIncl(suffix.string): var isKnown = false let fileIdx = registerNifSuffix(g.config, suffix.string, isKnown) - let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst}) + let precomp = moduleFromNifFile(g, fileIdx, {}) if precomp.module != nil: modules.add precomp nifFiles.add toNifFilename(g.config, fileIdx) @@ -573,13 +644,16 @@ proc loadDepClosure(g: ModuleGraph; targetSuffix: string): var isKnownFile = false let systemFileIdx = registerNifSuffix(g.config, "sysma2dyk", isKnownFile) g.config.m.systemFileIdx = systemFileIdx - let precompSys = moduleFromNifFile(g, systemFileIdx, {LoadFullAst, AlwaysLoadInterface}) + let precompSys = moduleFromNifFile(g, systemFileIdx, {AlwaysLoadInterface}) g.systemModule = precompSys.module var modules: seq[PrecompiledModule] = @[] var visited = initHashSet[string]() visited.incl "sysma2dyk" + # Only the target is codegen'd, so only it needs its full AST; the closure is + # loaded interface-only (demanded bodies come lazily from the kept-open + # streams), which is what keeps a per-module process light under parallel fan-out. var isKnown = false let targetIdx = registerNifSuffix(g.config, targetSuffix, isKnown) let target = moduleFromNifFile(g, targetIdx, {LoadFullAst}) @@ -596,7 +670,7 @@ proc loadDepClosure(g: ModuleGraph; targetSuffix: string): if not visited.containsOrIncl(suffix.string): var isKnown2 = false let fileIdx = registerNifSuffix(g.config, suffix.string, isKnown2) - let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst}) + let precomp = moduleFromNifFile(g, fileIdx, {}) if precomp.module != nil: modules.add precomp for dep in precomp.deps: stack.add dep @@ -645,14 +719,12 @@ proc generateCgStage(g: ModuleGraph; mainFileIdx: FileIndex) = rawMessage(g.config, errGenerated, "Cannot load NIF file for main module: " & toFullPath(g.config, mainFileIdx)) return - # Whole-program liveness filters the eager top-level routine listing - # (`icDceLive`); the merge stage still recomputes the program-wide live set - # across all `.c.nif`s, so this is only a size optimization for the main TU. - var dceStats = DceStats() - var nifDeps = initTable[string, seq[string]]() - if not isDefined(g.config, "icNoDce"): - g.icDceEnabled = computeLiveSymbols(g.config, nifFiles, g.icLiveNames, - dceStats, nifDeps) + # No whole-program DCE here, exactly as for a non-main target: `icDceEnabled` + # stays false so each module emits the routines it owns and the MERGE stage + # recomputes the one program-wide live set across all `.c.nif`s. Running + # `computeLiveSymbols` over all ~260 NIFs in the main `cg` cost ~900 MB for a + # result the merge stage throws away — pure redundancy now that the funnel is + # gone (the main module no longer emits its transitive closure's bodies). target = findTargetModule(g, modules, precompSys, g.config.icBackendModule) else: # No whole-program load, hence no whole-program DCE: `icDceEnabled` stays diff --git a/tests/codegen/titaniummangle_nim.nim b/tests/codegen/titaniummangle_nim.nim index 26953166e1..6408fcf3b6 100644 --- a/tests/codegen/titaniummangle_nim.nim +++ b/tests/codegen/titaniummangle_nim.nim @@ -1,7 +1,7 @@ discard """ targets: "c" matrix: "--debugger:native --mangle:nim" - ccodecheck: "'testFunc__titaniummangle95nim_u'" + ccodecheck: "'testFunc_u' \\d+ '__titaniummangle95nim'" """ #When debugging this notice that if one check fails, it can be due to any of the above. diff --git a/tests/js/tcodegendeclproc.nim b/tests/js/tcodegendeclproc.nim index 7ad2cfcee2..27d467d824 100644 --- a/tests/js/tcodegendeclproc.nim +++ b/tests/js/tcodegendeclproc.nim @@ -3,7 +3,7 @@ discard """ -1 8 ''' - ccodecheck: "'console.log(-1); function fac__tcodegendeclproc_u' \\d+ '(n_p0)'" + ccodecheck: "'console.log(-1); function fac_u' \\d+ '__tcodegendeclproc(n_p0)'" """ proc fac(n: int): int {.codegenDecl: "console.log(-1); function $2($3)".} = return n