From 3df66836d8b360c298cd2fd4f115f0db9119271e Mon Sep 17 00:00:00 2001 From: Araq Date: Sat, 13 Jun 2026 22:49:13 +0200 Subject: [PATCH] IC: data ownership for the per-module backend (Phase 2b, B4) Consts and RTTI are demand-emitted, so under emit-everywhere `cg` the same external-linkage data definition lands in several modules' .c.nif as `cdata` (raw text, not droppable) -> multiple definition at link. Procs already deduped via the `'u'` cdef flag; data now gets the same droppable+owner treatment, with one difference: data is never DCE'd (RTTI needs pointer identity for `of`/exception checks; static-per-TU would break that), so it is always a liveness root and kept by its single owner regardless of liveness. - New `'d'` cdef flag = a data definition: the merge stage assigns it one owner (smallest claimant, like `'u'`) and roots it (so its body keeps the procs it references live); emit keeps the body only in the owner, every other module keeps just an always-emitted `extern` declaration (the data analogue of a proc prototype). - genConstDefinition (ccgexprs) and genTypeInfoV2Impl (ccgtypes) now, under cmdNifC, emit an extern declaration + wrap the definition in a `'d'` cdef directive. The RTTI forward decl becomes a real `extern` (was a tentative definition that would collide across TUs). - cnif: computeLiveFromCArtifacts, computeMergeDecision and renderCFromArtifact all handle `'d'`. icFormatVersion 4 -> 5 (old .c.nif lack the data wrappers). Validated on the 3-module diamond: the full per-module pipeline (cg all, merge, emit all, cc, link) now LINKS with no duplicate symbols -- RTTI (NTIv2) and const tables each land in exactly one object. Whole-program IC path unchanged (koch ic thallo/tconverter/tmiscs green). Remaining: NimMain init orchestration (a/b module inits not yet called from main's cg -> the linked exe runs but prints defaults), the next unit. Co-Authored-By: Claude Opus 4.8 --- compiler/ccgexprs.nim | 18 ++++++++++++++++-- compiler/ccgtypes.nim | 14 ++++++++++++-- compiler/cnif.nim | 25 +++++++++++++++++++------ compiler/options.nim | 6 +++++- 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index f10f481881..2c1bf17590 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -3491,9 +3491,23 @@ proc genConstDefinition(q: BModule; p: BProc; sym: PSym) = data.addDeclWithVisibility(Private): data.addVarWithInitializer(Local, actualConstName, typ = td): genBracedInit(q.initProc, sym.astdef, isConst = true, sym.typ, data) - q.s[cfsData].add(extract(data)) if q.config.cmd == cmdNifC: - q.icDataDefs.add (stripCnifMarks(actualConstName), icNifName(q, sym)) + # Each `cg` process that demands this const emits its definition + # (emit-everywhere). Always declare it first (the data analogue of a proc + # prototype) so a TU whose copy the merge stage drops still has a valid + # declaration; wrap the definition as a droppable `'d'` unit the merge + # stage assigns to a single owner. + let cname = stripCnifMarks(actualConstName) + var decl = newBuilder("") + decl.addDeclWithVisibility(Extern): + decl.addVar(kind = Local, name = actualConstName, typ = td) + q.s[cfsData].add(extract(decl)) + q.s[cfsData].add(cnifDefDirective(cname, "d", icNifName(q, sym))) + q.s[cfsData].add(extract(data)) + q.s[cfsData].add(cnifEndDefs()) + q.icDataDefs.add (cname, icNifName(q, sym)) + else: + q.s[cfsData].add(extract(data)) if q.hcrOn: # generate the global pointer with the real name q.s[cfsVars].addVar(kind = Global, name = sym.loc.snippet, diff --git a/compiler/ccgtypes.nim b/compiler/ccgtypes.nim index 042941b745..a472711143 100644 --- a/compiler/ccgtypes.nim +++ b/compiler/ccgtypes.nim @@ -1911,7 +1911,12 @@ proc genTypeInfoV2OldImpl(m: BModule; t, origType: PType, name: Rope; info: TLin proc genTypeInfoV2Impl(m: BModule; t, origType: PType, name: Rope; info: TLineInfo) = cgsym(m, "TNimTypeV2") - m.s[cfsStrData].addDeclWithVisibility(Private): + # Under `nim nifc` every `cg` process that demands this type's RTTI emits its + # definition (emit-everywhere). The forward declaration must therefore be a + # real `extern` (not a tentative definition) so a TU whose copy the merge + # stage drops still only *declares* it; the definition itself is wrapped as a + # droppable `'d'` unit below and assigned to a single owner. + m.s[cfsStrData].addDeclWithVisibility(if m.config.cmd == cmdNifC: Extern else: Private): m.s[cfsStrData].addVar(kind = Local, name = name, typ = "TNimTypeV2") if m.config.cmd == cmdNifC: m.icDataDefs.add (name, icNifName(m, origType)) @@ -1975,7 +1980,12 @@ proc genTypeInfoV2Impl(m: BModule; t, origType: PType, name: Rope; info: TLineIn else: typeEntry.addField(typeInit, name = "flags"): typeEntry.addIntValue(flags) - m.s[cfsVars].add extract(typeEntry) + if m.config.cmd == cmdNifC: + m.s[cfsVars].add(cnifDefDirective(name, "d", icNifName(m, origType))) + m.s[cfsVars].add extract(typeEntry) + m.s[cfsVars].add(cnifEndDefs()) + else: + m.s[cfsVars].add extract(typeEntry) if t.kind == tyObject and t.baseClass != nil and optEnableDeepCopy in m.config.globalOptions: discard genTypeInfoV1(m, t, info) diff --git a/compiler/cnif.nim b/compiler/cnif.nim index 02aebaaa6d..85f53affb7 100644 --- a/compiler/cnif.nim +++ b/compiler/cnif.nim @@ -425,7 +425,9 @@ proc computeLiveFromCArtifacts*(files: openArray[string]): CnifLiveness = # the flags field right after the SymbolDef flagsSeen = true for ch in name: - if ch in {'x', 'c', 'm'}: + # 'd' marks a data definition (const/RTTI): never DCE'd, so it + # is a root whose body keeps its referenced procs live + if ch in {'x', 'c', 'm', 'd'}: roots.incl owner break else: @@ -526,7 +528,7 @@ proc computeMergeDecision*(files: openArray[string]): MergeDecision = elif c.cursorTagId == cdefTag: var ownerName = "" var flagsSeen = false - var isUnique = false + var needsOwner = false c.loopInto: case c.kind of SymbolDef: @@ -540,7 +542,13 @@ proc computeMergeDecision*(files: openArray[string]): MergeDecision = flagsSeen = true for ch in name: if ch in {'x', 'c', 'm'}: roots.incl ownerName - elif ch == 'u': isUnique = true + # 'u' = unique proc (DCE'd), 'd' = data (never DCE'd, hence a + # root); both need a single owner across the emit-everywhere + # processes + elif ch == 'u': needsOwner = true + elif ch == 'd': + needsOwner = true + roots.incl ownerName else: uses.mgetOrPut(ownerName, initHashSet[string]()).incl name inc c @@ -549,7 +557,7 @@ proc computeMergeDecision*(files: openArray[string]): MergeDecision = inc c else: skip c - if isUnique and ownerName.len > 0: + if needsOwner and ownerName.len > 0: # smallest claimant wins; ties impossible (one entry per name) let prev = result.owners.getOrDefault(ownerName, "") if prev.len == 0 or owner < prev: @@ -682,6 +690,7 @@ proc renderCFromArtifact*(artifact: string; d: MergeDecision; ownerId: string; # rest is the definition's body text. `state` counts past the head. var name = "" var isUnique = false + var isData = false var keep = true var state = 0 c.loopInto: @@ -693,11 +702,15 @@ proc renderCFromArtifact*(artifact: string; d: MergeDecision; ownerId: string; if c.kind in {Ident, Symbol}: for ch in symOrIdentName(c): if ch == 'u': isUnique = true + elif ch == 'd': isData = true state = 2 inc c elif state == 2: # the NIF name (one StrLit) — decide keep here - keep = (name in d.live) and - not (isUnique and d.owners.getOrDefault(name, ownerId) != ownerId) + let owned = d.owners.getOrDefault(name, ownerId) == ownerId + keep = + if isData: owned # data: kept by its owner only + elif isUnique: (name in d.live) and owned + else: name in d.live # inline/dispatcher: per-TU if not keep: inc dropped state = 3 inc c diff --git a/compiler/options.nim b/compiler/options.nim index 6e42fa7c8e..06c05a94e0 100644 --- a/compiler/options.nim +++ b/compiler/options.nim @@ -29,7 +29,7 @@ const nimEnableCovariance* = defined(nimEnableCovariance) - icFormatVersion* = "4" + icFormatVersion* = "5" ## Version of the IC cache format (the sem-NIF module layout written by ## ast2nif.nim plus the iface/impl/edges side files). Bump it whenever ## that layout changes: `commandIc` wipes a nimcache whose `ic.version` @@ -45,6 +45,10 @@ const ## v4: backend C-name scheme change — the module suffix is now the trailing ## token (`name_u__`, was `name___u`), so ## cached `.c.nif` artifacts hold incompatible names and must be wiped. + ## v5: data definitions (consts, RTTI) are now wrapped in droppable `'d'` + ## cdef directives with an always-present extern declaration, so the + ## per-module merge stage can assign them a single owner; old `.c.nif` + ## artifacts lack the wrappers. type # please make sure we have under 32 options # (improves code efficiency a lot!)