big progress: The backend is per-module and memory consumption is excellent

This commit is contained in:
Araq
2026-06-14 10:18:54 +02:00
parent 35a4d4da84
commit 39f08383f1
5 changed files with 131 additions and 18 deletions

View File

@@ -2027,6 +2027,29 @@ proc populateInterfaceTablesFromIndex(c: var DecodeContext; module: FileIndex;
# Move index table back
c.mods[module].index = move indexTab
proc moduleSymbolStubs*(c: var DecodeContext; module: FileIndex): seq[PSym] =
## Stubs for every non-type symbol serialized in `module`'s NIF index. The
## per-module backend uses this to emit the routines a module OWNS: procs are
## serialized as `(sd ...)` symbol-defs and loaded lazily, never as
## `nkProcDef` statements in the top-level stmt list, so `genTopLevelStmt`
## alone never reaches them — without this, a routine called only from other
## modules would be emitted by nobody once the demanding module merely
## prototypes it.
##
## Returns lazy stubs: the index table is moved out while iterating (loading a
## symbol can register new modules and invalidate the iterator), so the caller
## forces full load (`.kind`, `.ast`) and filters AFTER this returns, with the
## index back in place.
result = @[]
if not c.mods.hasKey(module): return
var indexTab = move c.mods[module].index
let thisModule = c.mods[module].suffix
for nifName, entry in indexTab:
if nifName.startsWith("`t"): continue # types are not routines
let sym = loadSymFromIndexEntry(c, module, nifName, entry, thisModule)
if sym != nil: result.add sym
c.mods[module].index = move indexTab
proc toNifFilename*(conf: ConfigRef; f: FileIndex): string =
let suffix = moduleSuffix(conf, f)
result = toGeneratedFile(conf, AbsoluteFile(suffix), ".nif").string

View File

@@ -139,6 +139,23 @@ proc redirectToLiveModule(m: BModule, q: BModule): BModule =
break
if result == nil: result = m
proc emitsBodyInThisModule(m: BModule, prc: PSym): bool =
## Per-module backend codegen is concerned with ONE module: it emits the
## bodies of the routines that module OWNS (its own top-level defs) and only
## *prototypes* a routine owned by another module — that routine's body is
## emitted by its own module's `cg` process, and the merge stage's DCE prunes
## whatever ends up globally dead. The funnel where the main module re-emitted
## its entire transitive closure (≈1.8 GB, a 56 MB `.c.nif`) is exactly this
## rule being absent.
##
## Generic instances and synthesized hooks (`=destroy`, `$`, …) have no single
## owning-module top-level — they are minted on demand — so each demander emits
## them and the merge stage deduplicates by their content-addressed C name.
if not (m.config.cmd == cmdNifC and m.config.icBackendStage == "cg"):
return true
result = prc.itemId.module == m.module.position or
(prc.disamb and (InstanceDisambBit or HookDisambBit)) != 0'i32
proc initLoc(k: TLocKind, lode: PNode, s: TStorageLoc, flags: TLocFlags = {}): TLoc =
result = TLoc(k: k, storage: s, lode: lode,
snippet: "", flags: flags)
@@ -1722,7 +1739,8 @@ proc genProcLvl2(m: BModule, prc: PSym) =
# which will actually become a function pointer
if isReloadable(m, prc):
genProcPrototype(q, prc)
genProcLvl3(q, prc)
if emitsBodyInThisModule(m, prc):
genProcLvl3(q, prc)
else:
fillProcLoc(m, prc.ast[namePos])
useHeader(m, prc)

View File

@@ -29,9 +29,19 @@ from cgmeth import generateIfMethodDispatchers
import ic / replayer
proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex;
nifFiles: var seq[string]): seq[PrecompiledModule] =
nifFiles: var seq[string];
depFlags: set[LoadFlag] = {LoadFullAst}): seq[PrecompiledModule] =
## Traverse the module dependency graph using a stack.
## Returns all modules that need code generation, in dependency order.
##
## The main module is always loaded with its full AST (it is the codegen
## target). `depFlags` governs the rest: the whole-program backend needs every
## module's full AST (it generates code for all of them), but a per-module
## stage codegens only one target, so it loads the others interface-only
## (`depFlags = {}`) — the interface, hooks, methods and the `(replay ...)`
## directives are loaded regardless of `LoadFullAst`, and demanded bodies are
## fetched lazily from the kept-open stream, so the per-module proc-body ASTs
## (the bulk of the memory) are never materialized for non-targets.
# The main module is loaded by its SOURCE FileIndex, but its serialized
# symbols carry the module's NIF suffix. Pre-alias the suffix to the source
# index so that `registerNifSuffix` does not allocate a second FileIndex for
@@ -57,7 +67,7 @@ proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex;
if not visited.containsOrIncl(suffix.string):
var isKnownFile = false
let fileIdx = g.config.registerNifSuffix(suffix.string, isKnownFile)
let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst})
let precomp = moduleFromNifFile(g, fileIdx, depFlags)
if precomp.module != nil:
result.add precomp
nifFiles.add toNifFilename(g.config, fileIdx)
@@ -467,6 +477,21 @@ proc emitMethodDispatchers(g: ModuleGraph) =
if not containsOrIncl(mainMod.declaredThings, disp.id):
genProcLvl3(mainMod, disp)
proc signatureHasMetaType(t: PType; depth: int = 0): bool =
## Whether a routine signature mentions a compile-time/meta element type
## (`typed`/`untyped` — e.g. `echo`'s `varargs[typed]` — typedesc, static,
## generic param). Such routines are expanded at their call sites and never
## emitted standalone, so the per-module owned-routine seeding must skip them
## (`getTypeDescAux(tyTyped)` otherwise). `tfHasMeta` alone misses the varargs
## element case, hence the explicit scan.
result = false
if t == nil or depth > 8: return false
if t.kind in {tyTyped, tyUntyped, tyTypeDesc, tyStatic, tyGenericParam,
tyAnything, tyFromExpr, tyError}:
return true
for k in t.kids:
if signatureHasMetaType(k, depth + 1): return true
proc generateCodeForModule(g: ModuleGraph; precomp: PrecompiledModule) =
## Generate C code for a single module.
let moduleId = precomp.module.position
@@ -482,6 +507,46 @@ proc generateCodeForModule(g: ModuleGraph; precomp: PrecompiledModule) =
if precomp.topLevel != nil:
cgen.genTopLevelStmt(bmod, precomp.topLevel)
# Per-module backend: emit the bodies of the routines this module OWNS, not
# only the ones its top-level happens to demand. Procs are serialized as lazy
# `(sd ...)` defs (never as `nkProcDef` statements), so `genTopLevelStmt` never
# reaches them; a routine called only from *other* modules would otherwise be
# emitted by nobody, because every module now merely prototypes its foreign
# callees instead of funnelling their bodies (see `cgen.emitsBodyInThisModule`).
# The merge stage's DCE drops whatever turns out globally dead.
if g.config.cmd == cmdNifC and g.config.icBackendStage == "cg":
let modPos = precomp.module.position
for s in moduleSymbolStubs(ast.program, FileIndex modPos):
if s.itemId.module == modPos and
s.kind in {skProc, skFunc, skConverter, skMethod} and
# Only MODULE-level routines: a nested/closure proc (its owner is a
# proc) captures its enclosing scope and cannot be emitted standalone —
# the captured params have no loc → `expr: param not init`. Nested procs
# are emitted via their enclosing routine's lambda-lifting, so seeding
# the enclosing (module-level) routine already covers them.
s.skipGenericOwner != nil and s.skipGenericOwner.kind == skModule and
s.magic == mNone and
# Skip generic instances: they have no single owning-module top-level
# and are emitted by demand (emit-everywhere, deduped by the merge
# stage). An instance has an empty `genericParamsPos` just like a plain
# concrete proc, so only `sfFromGeneric` tells them apart; seeding one
# would force standalone codegen of an instance body whose `when T is X`
# branches were never folded for this path → `genMagicExpr: mIs`.
sfFromGeneric notin s.flags and
# Every other routine the module owns must be emitted here, exported or
# not: a non-exported helper is still reached from another module when a
# `template`/inline routine expands at a call site there (e.g. msgs'
# `internalErrorImpl` behind the `internalError` template), and that
# caller now only prototypes it. `{.error.}`/`compileTime` sentinels and
# bodyless forward decls are not real codegen targets.
{sfForward, sfImportc, sfCompileTime, sfError} * s.flags == {} and
s.typ != nil and not signatureHasMetaType(s.typ) and
s.ast != nil and s.ast.safeLen > bodyPos and
s.ast[genericParamsPos].kind == nkEmpty and
s.ast[bodyPos].kind != nkEmpty:
# a concrete, non-generic, runtime routine with a real body, owned here
requestProcDef(bmod, s)
# The hooks and `$enum` procs this module announces are liveness roots:
# a cached TU from a previous run may call them without any demand
# arising in this run (the demanding instance body sits inside a reused
@@ -513,14 +578,20 @@ proc loadBackendModules(g: ModuleGraph; mainFileIdx: FileIndex):
## type/symbol resolves and `getCFile` yields the same path both stages use.
## The main module is loaded by its source index (its NIF suffix is aliased to
## it in `loadModuleDependencies`), so it gets exactly one `BModule`.
##
## Only the main module — the codegen target of the stages that use this — is
## loaded with its full AST; every other module is loaded interface-only so
## the whole program's proc bodies are not materialized into this process (that
## was ~1.8 GB for the compiler's main `cg`). The `link` stage codegens nothing
## and only needs each module's `(replay ...)` directives, which load anyway.
resetForBackend(g)
var isKnownFile = false
let systemFileIdx = registerNifSuffix(g.config, "sysma2dyk", isKnownFile)
g.config.m.systemFileIdx = systemFileIdx
var precompSys = moduleFromNifFile(g, systemFileIdx, {LoadFullAst, AlwaysLoadInterface})
var precompSys = moduleFromNifFile(g, systemFileIdx, {AlwaysLoadInterface})
g.systemModule = precompSys.module
var nifFiles: seq[string] = @[toNifFilename(g.config, systemFileIdx)]
var modules = loadModuleDependencies(g, mainFileIdx, nifFiles)
var modules = loadModuleDependencies(g, mainFileIdx, nifFiles, depFlags = {})
# loadModuleDependencies traverses the project's import closure and stops at
# system. The whole-program backend then demand-loads system's own closure
# (locks, allocators, threads, …) during codegen; the per-module backend
@@ -539,7 +610,7 @@ proc loadBackendModules(g: ModuleGraph; mainFileIdx: FileIndex):
if not visited.containsOrIncl(suffix.string):
var isKnown = false
let fileIdx = registerNifSuffix(g.config, suffix.string, isKnown)
let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst})
let precomp = moduleFromNifFile(g, fileIdx, {})
if precomp.module != nil:
modules.add precomp
nifFiles.add toNifFilename(g.config, fileIdx)
@@ -573,13 +644,16 @@ proc loadDepClosure(g: ModuleGraph; targetSuffix: string):
var isKnownFile = false
let systemFileIdx = registerNifSuffix(g.config, "sysma2dyk", isKnownFile)
g.config.m.systemFileIdx = systemFileIdx
let precompSys = moduleFromNifFile(g, systemFileIdx, {LoadFullAst, AlwaysLoadInterface})
let precompSys = moduleFromNifFile(g, systemFileIdx, {AlwaysLoadInterface})
g.systemModule = precompSys.module
var modules: seq[PrecompiledModule] = @[]
var visited = initHashSet[string]()
visited.incl "sysma2dyk"
# Only the target is codegen'd, so only it needs its full AST; the closure is
# loaded interface-only (demanded bodies come lazily from the kept-open
# streams), which is what keeps a per-module process light under parallel fan-out.
var isKnown = false
let targetIdx = registerNifSuffix(g.config, targetSuffix, isKnown)
let target = moduleFromNifFile(g, targetIdx, {LoadFullAst})
@@ -596,7 +670,7 @@ proc loadDepClosure(g: ModuleGraph; targetSuffix: string):
if not visited.containsOrIncl(suffix.string):
var isKnown2 = false
let fileIdx = registerNifSuffix(g.config, suffix.string, isKnown2)
let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst})
let precomp = moduleFromNifFile(g, fileIdx, {})
if precomp.module != nil:
modules.add precomp
for dep in precomp.deps: stack.add dep
@@ -645,14 +719,12 @@ proc generateCgStage(g: ModuleGraph; mainFileIdx: FileIndex) =
rawMessage(g.config, errGenerated,
"Cannot load NIF file for main module: " & toFullPath(g.config, mainFileIdx))
return
# Whole-program liveness filters the eager top-level routine listing
# (`icDceLive`); the merge stage still recomputes the program-wide live set
# across all `.c.nif`s, so this is only a size optimization for the main TU.
var dceStats = DceStats()
var nifDeps = initTable[string, seq[string]]()
if not isDefined(g.config, "icNoDce"):
g.icDceEnabled = computeLiveSymbols(g.config, nifFiles, g.icLiveNames,
dceStats, nifDeps)
# No whole-program DCE here, exactly as for a non-main target: `icDceEnabled`
# stays false so each module emits the routines it owns and the MERGE stage
# recomputes the one program-wide live set across all `.c.nif`s. Running
# `computeLiveSymbols` over all ~260 NIFs in the main `cg` cost ~900 MB for a
# result the merge stage throws away — pure redundancy now that the funnel is
# gone (the main module no longer emits its transitive closure's bodies).
target = findTargetModule(g, modules, precompSys, g.config.icBackendModule)
else:
# No whole-program load, hence no whole-program DCE: `icDceEnabled` stays

View File

@@ -1,7 +1,7 @@
discard """
targets: "c"
matrix: "--debugger:native --mangle:nim"
ccodecheck: "'testFunc__titaniummangle95nim_u'"
ccodecheck: "'testFunc_u' \\d+ '__titaniummangle95nim'"
"""
#When debugging this notice that if one check fails, it can be due to any of the above.

View File

@@ -3,7 +3,7 @@ discard """
-1
8
'''
ccodecheck: "'console.log(-1); function fac__tcodegendeclproc_u' \\d+ '(n_p0)'"
ccodecheck: "'console.log(-1); function fac_u' \\d+ '__tcodegendeclproc(n_p0)'"
"""
proc fac(n: int): int {.codegenDecl: "console.log(-1); function $2($3)".} =
return n