IC: data ownership for the per-module backend (Phase 2b, B4)

Consts and RTTI are demand-emitted, so under emit-everywhere `cg` the same
external-linkage data definition lands in several modules' .c.nif as `cdata`
(raw text, not droppable) -> multiple definition at link. Procs already
deduped via the `'u'` cdef flag; data now gets the same droppable+owner
treatment, with one difference: data is never DCE'd (RTTI needs pointer
identity for `of`/exception checks; static-per-TU would break that), so it is
always a liveness root and kept by its single owner regardless of liveness.

- New `'d'` cdef flag = a data definition: the merge stage assigns it one
  owner (smallest claimant, like `'u'`) and roots it (so its body keeps the
  procs it references live); emit keeps the body only in the owner, every
  other module keeps just an always-emitted `extern` declaration (the data
  analogue of a proc prototype).
- genConstDefinition (ccgexprs) and genTypeInfoV2Impl (ccgtypes) now, under
  cmdNifC, emit an extern declaration + wrap the definition in a `'d'` cdef
  directive. The RTTI forward decl becomes a real `extern` (was a tentative
  definition that would collide across TUs).
- cnif: computeLiveFromCArtifacts, computeMergeDecision and
  renderCFromArtifact all handle `'d'`.

icFormatVersion 4 -> 5 (old .c.nif lack the data wrappers).

Validated on the 3-module diamond: the full per-module pipeline (cg all,
merge, emit all, cc, link) now LINKS with no duplicate symbols -- RTTI
(NTIv2) and const tables each land in exactly one object. Whole-program IC
path unchanged (koch ic thallo/tconverter/tmiscs green). Remaining: NimMain
init orchestration (a/b module inits not yet called from main's cg -> the
linked exe runs but prints defaults), the next unit.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Araq
2026-06-13 22:49:13 +02:00
parent 3b2e0e08c5
commit 3df66836d8
4 changed files with 52 additions and 11 deletions

View File

@@ -3491,9 +3491,23 @@ proc genConstDefinition(q: BModule; p: BProc; sym: PSym) =
data.addDeclWithVisibility(Private):
data.addVarWithInitializer(Local, actualConstName, typ = td):
genBracedInit(q.initProc, sym.astdef, isConst = true, sym.typ, data)
q.s[cfsData].add(extract(data))
if q.config.cmd == cmdNifC:
q.icDataDefs.add (stripCnifMarks(actualConstName), icNifName(q, sym))
# Each `cg` process that demands this const emits its definition
# (emit-everywhere). Always declare it first (the data analogue of a proc
# prototype) so a TU whose copy the merge stage drops still has a valid
# declaration; wrap the definition as a droppable `'d'` unit the merge
# stage assigns to a single owner.
let cname = stripCnifMarks(actualConstName)
var decl = newBuilder("")
decl.addDeclWithVisibility(Extern):
decl.addVar(kind = Local, name = actualConstName, typ = td)
q.s[cfsData].add(extract(decl))
q.s[cfsData].add(cnifDefDirective(cname, "d", icNifName(q, sym)))
q.s[cfsData].add(extract(data))
q.s[cfsData].add(cnifEndDefs())
q.icDataDefs.add (cname, icNifName(q, sym))
else:
q.s[cfsData].add(extract(data))
if q.hcrOn:
# generate the global pointer with the real name
q.s[cfsVars].addVar(kind = Global, name = sym.loc.snippet,

View File

@@ -1911,7 +1911,12 @@ proc genTypeInfoV2OldImpl(m: BModule; t, origType: PType, name: Rope; info: TLin
proc genTypeInfoV2Impl(m: BModule; t, origType: PType, name: Rope; info: TLineInfo) =
cgsym(m, "TNimTypeV2")
m.s[cfsStrData].addDeclWithVisibility(Private):
# Under `nim nifc` every `cg` process that demands this type's RTTI emits its
# definition (emit-everywhere). The forward declaration must therefore be a
# real `extern` (not a tentative definition) so a TU whose copy the merge
# stage drops still only *declares* it; the definition itself is wrapped as a
# droppable `'d'` unit below and assigned to a single owner.
m.s[cfsStrData].addDeclWithVisibility(if m.config.cmd == cmdNifC: Extern else: Private):
m.s[cfsStrData].addVar(kind = Local, name = name, typ = "TNimTypeV2")
if m.config.cmd == cmdNifC:
m.icDataDefs.add (name, icNifName(m, origType))
@@ -1975,7 +1980,12 @@ proc genTypeInfoV2Impl(m: BModule; t, origType: PType, name: Rope; info: TLineIn
else:
typeEntry.addField(typeInit, name = "flags"):
typeEntry.addIntValue(flags)
m.s[cfsVars].add extract(typeEntry)
if m.config.cmd == cmdNifC:
m.s[cfsVars].add(cnifDefDirective(name, "d", icNifName(m, origType)))
m.s[cfsVars].add extract(typeEntry)
m.s[cfsVars].add(cnifEndDefs())
else:
m.s[cfsVars].add extract(typeEntry)
if t.kind == tyObject and t.baseClass != nil and optEnableDeepCopy in m.config.globalOptions:
discard genTypeInfoV1(m, t, info)

View File

@@ -425,7 +425,9 @@ proc computeLiveFromCArtifacts*(files: openArray[string]): CnifLiveness =
# the flags field right after the SymbolDef
flagsSeen = true
for ch in name:
if ch in {'x', 'c', 'm'}:
# 'd' marks a data definition (const/RTTI): never DCE'd, so it
# is a root whose body keeps its referenced procs live
if ch in {'x', 'c', 'm', 'd'}:
roots.incl owner
break
else:
@@ -526,7 +528,7 @@ proc computeMergeDecision*(files: openArray[string]): MergeDecision =
elif c.cursorTagId == cdefTag:
var ownerName = ""
var flagsSeen = false
var isUnique = false
var needsOwner = false
c.loopInto:
case c.kind
of SymbolDef:
@@ -540,7 +542,13 @@ proc computeMergeDecision*(files: openArray[string]): MergeDecision =
flagsSeen = true
for ch in name:
if ch in {'x', 'c', 'm'}: roots.incl ownerName
elif ch == 'u': isUnique = true
# 'u' = unique proc (DCE'd), 'd' = data (never DCE'd, hence a
# root); both need a single owner across the emit-everywhere
# processes
elif ch == 'u': needsOwner = true
elif ch == 'd':
needsOwner = true
roots.incl ownerName
else:
uses.mgetOrPut(ownerName, initHashSet[string]()).incl name
inc c
@@ -549,7 +557,7 @@ proc computeMergeDecision*(files: openArray[string]): MergeDecision =
inc c
else:
skip c
if isUnique and ownerName.len > 0:
if needsOwner and ownerName.len > 0:
# smallest claimant wins; ties impossible (one entry per name)
let prev = result.owners.getOrDefault(ownerName, "")
if prev.len == 0 or owner < prev:
@@ -682,6 +690,7 @@ proc renderCFromArtifact*(artifact: string; d: MergeDecision; ownerId: string;
# rest is the definition's body text. `state` counts past the head.
var name = ""
var isUnique = false
var isData = false
var keep = true
var state = 0
c.loopInto:
@@ -693,11 +702,15 @@ proc renderCFromArtifact*(artifact: string; d: MergeDecision; ownerId: string;
if c.kind in {Ident, Symbol}:
for ch in symOrIdentName(c):
if ch == 'u': isUnique = true
elif ch == 'd': isData = true
state = 2
inc c
elif state == 2: # the NIF name (one StrLit) — decide keep here
keep = (name in d.live) and
not (isUnique and d.owners.getOrDefault(name, ownerId) != ownerId)
let owned = d.owners.getOrDefault(name, ownerId) == ownerId
keep =
if isData: owned # data: kept by its owner only
elif isUnique: (name in d.live) and owned
else: name in d.live # inline/dispatcher: per-TU
if not keep: inc dropped
state = 3
inc c

View File

@@ -29,7 +29,7 @@ const
nimEnableCovariance* = defined(nimEnableCovariance)
icFormatVersion* = "4"
icFormatVersion* = "5"
## Version of the IC cache format (the sem-NIF module layout written by
## ast2nif.nim plus the iface/impl/edges side files). Bump it whenever
## that layout changes: `commandIc` wipes a nimcache whose `ic.version`
@@ -45,6 +45,10 @@ const
## v4: backend C-name scheme change — the module suffix is now the trailing
## token (`name_u<disamb>__<suffix>`, was `name__<suffix>_u<disamb>`), so
## cached `.c.nif` artifacts hold incompatible names and must be wiped.
## v5: data definitions (consts, RTTI) are now wrapped in droppable `'d'`
## cdef directives with an always-present extern declaration, so the
## per-module merge stage can assign them a single owner; old `.c.nif`
## artifacts lack the wrappers.
type # please make sure we have under 32 options
# (improves code efficiency a lot!)