mirror of
https://github.com/nim-lang/Nim.git
synced 2026-06-15 08:03:46 +00:00
big progress: The backend is per-module and memory consumption is excellent
This commit is contained in:
@@ -2027,6 +2027,29 @@ proc populateInterfaceTablesFromIndex(c: var DecodeContext; module: FileIndex;
|
||||
# Move index table back
|
||||
c.mods[module].index = move indexTab
|
||||
|
||||
proc moduleSymbolStubs*(c: var DecodeContext; module: FileIndex): seq[PSym] =
|
||||
## Stubs for every non-type symbol serialized in `module`'s NIF index. The
|
||||
## per-module backend uses this to emit the routines a module OWNS: procs are
|
||||
## serialized as `(sd ...)` symbol-defs and loaded lazily, never as
|
||||
## `nkProcDef` statements in the top-level stmt list, so `genTopLevelStmt`
|
||||
## alone never reaches them — without this, a routine called only from other
|
||||
## modules would be emitted by nobody once the demanding module merely
|
||||
## prototypes it.
|
||||
##
|
||||
## Returns lazy stubs: the index table is moved out while iterating (loading a
|
||||
## symbol can register new modules and invalidate the iterator), so the caller
|
||||
## forces full load (`.kind`, `.ast`) and filters AFTER this returns, with the
|
||||
## index back in place.
|
||||
result = @[]
|
||||
if not c.mods.hasKey(module): return
|
||||
var indexTab = move c.mods[module].index
|
||||
let thisModule = c.mods[module].suffix
|
||||
for nifName, entry in indexTab:
|
||||
if nifName.startsWith("`t"): continue # types are not routines
|
||||
let sym = loadSymFromIndexEntry(c, module, nifName, entry, thisModule)
|
||||
if sym != nil: result.add sym
|
||||
c.mods[module].index = move indexTab
|
||||
|
||||
proc toNifFilename*(conf: ConfigRef; f: FileIndex): string =
|
||||
let suffix = moduleSuffix(conf, f)
|
||||
result = toGeneratedFile(conf, AbsoluteFile(suffix), ".nif").string
|
||||
|
||||
@@ -139,6 +139,23 @@ proc redirectToLiveModule(m: BModule, q: BModule): BModule =
|
||||
break
|
||||
if result == nil: result = m
|
||||
|
||||
proc emitsBodyInThisModule(m: BModule, prc: PSym): bool =
|
||||
## Per-module backend codegen is concerned with ONE module: it emits the
|
||||
## bodies of the routines that module OWNS (its own top-level defs) and only
|
||||
## *prototypes* a routine owned by another module — that routine's body is
|
||||
## emitted by its own module's `cg` process, and the merge stage's DCE prunes
|
||||
## whatever ends up globally dead. The funnel where the main module re-emitted
|
||||
## its entire transitive closure (≈1.8 GB, a 56 MB `.c.nif`) is exactly this
|
||||
## rule being absent.
|
||||
##
|
||||
## Generic instances and synthesized hooks (`=destroy`, `$`, …) have no single
|
||||
## owning-module top-level — they are minted on demand — so each demander emits
|
||||
## them and the merge stage deduplicates by their content-addressed C name.
|
||||
if not (m.config.cmd == cmdNifC and m.config.icBackendStage == "cg"):
|
||||
return true
|
||||
result = prc.itemId.module == m.module.position or
|
||||
(prc.disamb and (InstanceDisambBit or HookDisambBit)) != 0'i32
|
||||
|
||||
proc initLoc(k: TLocKind, lode: PNode, s: TStorageLoc, flags: TLocFlags = {}): TLoc =
|
||||
result = TLoc(k: k, storage: s, lode: lode,
|
||||
snippet: "", flags: flags)
|
||||
@@ -1722,7 +1739,8 @@ proc genProcLvl2(m: BModule, prc: PSym) =
|
||||
# which will actually become a function pointer
|
||||
if isReloadable(m, prc):
|
||||
genProcPrototype(q, prc)
|
||||
genProcLvl3(q, prc)
|
||||
if emitsBodyInThisModule(m, prc):
|
||||
genProcLvl3(q, prc)
|
||||
else:
|
||||
fillProcLoc(m, prc.ast[namePos])
|
||||
useHeader(m, prc)
|
||||
|
||||
@@ -29,9 +29,19 @@ from cgmeth import generateIfMethodDispatchers
|
||||
import ic / replayer
|
||||
|
||||
proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex;
|
||||
nifFiles: var seq[string]): seq[PrecompiledModule] =
|
||||
nifFiles: var seq[string];
|
||||
depFlags: set[LoadFlag] = {LoadFullAst}): seq[PrecompiledModule] =
|
||||
## Traverse the module dependency graph using a stack.
|
||||
## Returns all modules that need code generation, in dependency order.
|
||||
##
|
||||
## The main module is always loaded with its full AST (it is the codegen
|
||||
## target). `depFlags` governs the rest: the whole-program backend needs every
|
||||
## module's full AST (it generates code for all of them), but a per-module
|
||||
## stage codegens only one target, so it loads the others interface-only
|
||||
## (`depFlags = {}`) — the interface, hooks, methods and the `(replay ...)`
|
||||
## directives are loaded regardless of `LoadFullAst`, and demanded bodies are
|
||||
## fetched lazily from the kept-open stream, so the per-module proc-body ASTs
|
||||
## (the bulk of the memory) are never materialized for non-targets.
|
||||
# The main module is loaded by its SOURCE FileIndex, but its serialized
|
||||
# symbols carry the module's NIF suffix. Pre-alias the suffix to the source
|
||||
# index so that `registerNifSuffix` does not allocate a second FileIndex for
|
||||
@@ -57,7 +67,7 @@ proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex;
|
||||
if not visited.containsOrIncl(suffix.string):
|
||||
var isKnownFile = false
|
||||
let fileIdx = g.config.registerNifSuffix(suffix.string, isKnownFile)
|
||||
let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst})
|
||||
let precomp = moduleFromNifFile(g, fileIdx, depFlags)
|
||||
if precomp.module != nil:
|
||||
result.add precomp
|
||||
nifFiles.add toNifFilename(g.config, fileIdx)
|
||||
@@ -467,6 +477,21 @@ proc emitMethodDispatchers(g: ModuleGraph) =
|
||||
if not containsOrIncl(mainMod.declaredThings, disp.id):
|
||||
genProcLvl3(mainMod, disp)
|
||||
|
||||
proc signatureHasMetaType(t: PType; depth: int = 0): bool =
|
||||
## Whether a routine signature mentions a compile-time/meta element type
|
||||
## (`typed`/`untyped` — e.g. `echo`'s `varargs[typed]` — typedesc, static,
|
||||
## generic param). Such routines are expanded at their call sites and never
|
||||
## emitted standalone, so the per-module owned-routine seeding must skip them
|
||||
## (`getTypeDescAux(tyTyped)` otherwise). `tfHasMeta` alone misses the varargs
|
||||
## element case, hence the explicit scan.
|
||||
result = false
|
||||
if t == nil or depth > 8: return false
|
||||
if t.kind in {tyTyped, tyUntyped, tyTypeDesc, tyStatic, tyGenericParam,
|
||||
tyAnything, tyFromExpr, tyError}:
|
||||
return true
|
||||
for k in t.kids:
|
||||
if signatureHasMetaType(k, depth + 1): return true
|
||||
|
||||
proc generateCodeForModule(g: ModuleGraph; precomp: PrecompiledModule) =
|
||||
## Generate C code for a single module.
|
||||
let moduleId = precomp.module.position
|
||||
@@ -482,6 +507,46 @@ proc generateCodeForModule(g: ModuleGraph; precomp: PrecompiledModule) =
|
||||
if precomp.topLevel != nil:
|
||||
cgen.genTopLevelStmt(bmod, precomp.topLevel)
|
||||
|
||||
# Per-module backend: emit the bodies of the routines this module OWNS, not
|
||||
# only the ones its top-level happens to demand. Procs are serialized as lazy
|
||||
# `(sd ...)` defs (never as `nkProcDef` statements), so `genTopLevelStmt` never
|
||||
# reaches them; a routine called only from *other* modules would otherwise be
|
||||
# emitted by nobody, because every module now merely prototypes its foreign
|
||||
# callees instead of funnelling their bodies (see `cgen.emitsBodyInThisModule`).
|
||||
# The merge stage's DCE drops whatever turns out globally dead.
|
||||
if g.config.cmd == cmdNifC and g.config.icBackendStage == "cg":
|
||||
let modPos = precomp.module.position
|
||||
for s in moduleSymbolStubs(ast.program, FileIndex modPos):
|
||||
if s.itemId.module == modPos and
|
||||
s.kind in {skProc, skFunc, skConverter, skMethod} and
|
||||
# Only MODULE-level routines: a nested/closure proc (its owner is a
|
||||
# proc) captures its enclosing scope and cannot be emitted standalone —
|
||||
# the captured params have no loc → `expr: param not init`. Nested procs
|
||||
# are emitted via their enclosing routine's lambda-lifting, so seeding
|
||||
# the enclosing (module-level) routine already covers them.
|
||||
s.skipGenericOwner != nil and s.skipGenericOwner.kind == skModule and
|
||||
s.magic == mNone and
|
||||
# Skip generic instances: they have no single owning-module top-level
|
||||
# and are emitted by demand (emit-everywhere, deduped by the merge
|
||||
# stage). An instance has an empty `genericParamsPos` just like a plain
|
||||
# concrete proc, so only `sfFromGeneric` tells them apart; seeding one
|
||||
# would force standalone codegen of an instance body whose `when T is X`
|
||||
# branches were never folded for this path → `genMagicExpr: mIs`.
|
||||
sfFromGeneric notin s.flags and
|
||||
# Every other routine the module owns must be emitted here, exported or
|
||||
# not: a non-exported helper is still reached from another module when a
|
||||
# `template`/inline routine expands at a call site there (e.g. msgs'
|
||||
# `internalErrorImpl` behind the `internalError` template), and that
|
||||
# caller now only prototypes it. `{.error.}`/`compileTime` sentinels and
|
||||
# bodyless forward decls are not real codegen targets.
|
||||
{sfForward, sfImportc, sfCompileTime, sfError} * s.flags == {} and
|
||||
s.typ != nil and not signatureHasMetaType(s.typ) and
|
||||
s.ast != nil and s.ast.safeLen > bodyPos and
|
||||
s.ast[genericParamsPos].kind == nkEmpty and
|
||||
s.ast[bodyPos].kind != nkEmpty:
|
||||
# a concrete, non-generic, runtime routine with a real body, owned here
|
||||
requestProcDef(bmod, s)
|
||||
|
||||
# The hooks and `$enum` procs this module announces are liveness roots:
|
||||
# a cached TU from a previous run may call them without any demand
|
||||
# arising in this run (the demanding instance body sits inside a reused
|
||||
@@ -513,14 +578,20 @@ proc loadBackendModules(g: ModuleGraph; mainFileIdx: FileIndex):
|
||||
## type/symbol resolves and `getCFile` yields the same path both stages use.
|
||||
## The main module is loaded by its source index (its NIF suffix is aliased to
|
||||
## it in `loadModuleDependencies`), so it gets exactly one `BModule`.
|
||||
##
|
||||
## Only the main module — the codegen target of the stages that use this — is
|
||||
## loaded with its full AST; every other module is loaded interface-only so
|
||||
## the whole program's proc bodies are not materialized into this process (that
|
||||
## was ~1.8 GB for the compiler's main `cg`). The `link` stage codegens nothing
|
||||
## and only needs each module's `(replay ...)` directives, which load anyway.
|
||||
resetForBackend(g)
|
||||
var isKnownFile = false
|
||||
let systemFileIdx = registerNifSuffix(g.config, "sysma2dyk", isKnownFile)
|
||||
g.config.m.systemFileIdx = systemFileIdx
|
||||
var precompSys = moduleFromNifFile(g, systemFileIdx, {LoadFullAst, AlwaysLoadInterface})
|
||||
var precompSys = moduleFromNifFile(g, systemFileIdx, {AlwaysLoadInterface})
|
||||
g.systemModule = precompSys.module
|
||||
var nifFiles: seq[string] = @[toNifFilename(g.config, systemFileIdx)]
|
||||
var modules = loadModuleDependencies(g, mainFileIdx, nifFiles)
|
||||
var modules = loadModuleDependencies(g, mainFileIdx, nifFiles, depFlags = {})
|
||||
# loadModuleDependencies traverses the project's import closure and stops at
|
||||
# system. The whole-program backend then demand-loads system's own closure
|
||||
# (locks, allocators, threads, …) during codegen; the per-module backend
|
||||
@@ -539,7 +610,7 @@ proc loadBackendModules(g: ModuleGraph; mainFileIdx: FileIndex):
|
||||
if not visited.containsOrIncl(suffix.string):
|
||||
var isKnown = false
|
||||
let fileIdx = registerNifSuffix(g.config, suffix.string, isKnown)
|
||||
let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst})
|
||||
let precomp = moduleFromNifFile(g, fileIdx, {})
|
||||
if precomp.module != nil:
|
||||
modules.add precomp
|
||||
nifFiles.add toNifFilename(g.config, fileIdx)
|
||||
@@ -573,13 +644,16 @@ proc loadDepClosure(g: ModuleGraph; targetSuffix: string):
|
||||
var isKnownFile = false
|
||||
let systemFileIdx = registerNifSuffix(g.config, "sysma2dyk", isKnownFile)
|
||||
g.config.m.systemFileIdx = systemFileIdx
|
||||
let precompSys = moduleFromNifFile(g, systemFileIdx, {LoadFullAst, AlwaysLoadInterface})
|
||||
let precompSys = moduleFromNifFile(g, systemFileIdx, {AlwaysLoadInterface})
|
||||
g.systemModule = precompSys.module
|
||||
|
||||
var modules: seq[PrecompiledModule] = @[]
|
||||
var visited = initHashSet[string]()
|
||||
visited.incl "sysma2dyk"
|
||||
|
||||
# Only the target is codegen'd, so only it needs its full AST; the closure is
|
||||
# loaded interface-only (demanded bodies come lazily from the kept-open
|
||||
# streams), which is what keeps a per-module process light under parallel fan-out.
|
||||
var isKnown = false
|
||||
let targetIdx = registerNifSuffix(g.config, targetSuffix, isKnown)
|
||||
let target = moduleFromNifFile(g, targetIdx, {LoadFullAst})
|
||||
@@ -596,7 +670,7 @@ proc loadDepClosure(g: ModuleGraph; targetSuffix: string):
|
||||
if not visited.containsOrIncl(suffix.string):
|
||||
var isKnown2 = false
|
||||
let fileIdx = registerNifSuffix(g.config, suffix.string, isKnown2)
|
||||
let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst})
|
||||
let precomp = moduleFromNifFile(g, fileIdx, {})
|
||||
if precomp.module != nil:
|
||||
modules.add precomp
|
||||
for dep in precomp.deps: stack.add dep
|
||||
@@ -645,14 +719,12 @@ proc generateCgStage(g: ModuleGraph; mainFileIdx: FileIndex) =
|
||||
rawMessage(g.config, errGenerated,
|
||||
"Cannot load NIF file for main module: " & toFullPath(g.config, mainFileIdx))
|
||||
return
|
||||
# Whole-program liveness filters the eager top-level routine listing
|
||||
# (`icDceLive`); the merge stage still recomputes the program-wide live set
|
||||
# across all `.c.nif`s, so this is only a size optimization for the main TU.
|
||||
var dceStats = DceStats()
|
||||
var nifDeps = initTable[string, seq[string]]()
|
||||
if not isDefined(g.config, "icNoDce"):
|
||||
g.icDceEnabled = computeLiveSymbols(g.config, nifFiles, g.icLiveNames,
|
||||
dceStats, nifDeps)
|
||||
# No whole-program DCE here, exactly as for a non-main target: `icDceEnabled`
|
||||
# stays false so each module emits the routines it owns and the MERGE stage
|
||||
# recomputes the one program-wide live set across all `.c.nif`s. Running
|
||||
# `computeLiveSymbols` over all ~260 NIFs in the main `cg` cost ~900 MB for a
|
||||
# result the merge stage throws away — pure redundancy now that the funnel is
|
||||
# gone (the main module no longer emits its transitive closure's bodies).
|
||||
target = findTargetModule(g, modules, precompSys, g.config.icBackendModule)
|
||||
else:
|
||||
# No whole-program load, hence no whole-program DCE: `icDceEnabled` stays
|
||||
|
||||
Reference in New Issue
Block a user