Files
Nim/compiler/cnif.nim
Araq 3df66836d8 IC: data ownership for the per-module backend (Phase 2b, B4)
Consts and RTTI are demand-emitted, so under emit-everywhere `cg` the same
external-linkage data definition lands in several modules' .c.nif as `cdata`
(raw text, not droppable) -> multiple definition at link. Procs already
deduped via the `'u'` cdef flag; data now gets the same droppable+owner
treatment, with one difference: data is never DCE'd (RTTI needs pointer
identity for `of`/exception checks; static-per-TU would break that), so it is
always a liveness root and kept by its single owner regardless of liveness.

- New `'d'` cdef flag = a data definition: the merge stage assigns it one
  owner (smallest claimant, like `'u'`) and roots it (so its body keeps the
  procs it references live); emit keeps the body only in the owner, every
  other module keeps just an always-emitted `extern` declaration (the data
  analogue of a proc prototype).
- genConstDefinition (ccgexprs) and genTypeInfoV2Impl (ccgtypes) now, under
  cmdNifC, emit an extern declaration + wrap the definition in a `'d'` cdef
  directive. The RTTI forward decl becomes a real `extern` (was a tentative
  definition that would collide across TUs).
- cnif: computeLiveFromCArtifacts, computeMergeDecision and
  renderCFromArtifact all handle `'d'`.

icFormatVersion 4 -> 5 (old .c.nif lack the data wrappers).

Validated on the 3-module diamond: the full per-module pipeline (cg all,
merge, emit all, cc, link) now LINKS with no duplicate symbols -- RTTI
(NTIv2) and const tables each land in exactly one object. Whole-program IC
path unchanged (koch ic thallo/tconverter/tmiscs green). Remaining: NimMain
init orchestration (a/b module inits not yet called from main's cg -> the
linked exe runs but prints defaults), the next unit.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-13 22:49:13 +02:00

728 lines
26 KiB
Nim

#
#
# The Nim Compiler
# (c) Copyright 2026 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## The "cnif" artifact: the C code generator's output as a NIF file.
##
## This is deliberately *not* NIFC: the C text is kept verbatim (Nim's
## C-level machinery — exception handling in particular — is more refined
## than what NIFC models today; the gap can be closed incrementally later).
## The only structure the artifact adds is the part dead code elimination
## and generic-instance merging need:
##
## - raw C text as string literals
## - every *global* entity's C name as a `Symbol` token
## - every emitted proc definition as a `(cdef SymbolDef flags ...)` group
##
## The C generator marks names with control characters at the single place
## a global's C name is minted (`fillBackendName`) and emits a definition
## directive at the single place finished procs are appended; the marks then
## ride through all of the snippet composition untouched. This module turns
## the final marked module text into the `.c.nif` artifact and strips the
## marks for the actual `.c` output. Rendering C from the artifact is a
## plain token walk: string literals verbatim, symbols by name — which is
## also where a later merge step redirects losing generic instances.
##
## Marker scheme (cannot collide: C string literals escape control chars,
## and `\1`/`\31`/`\23` of cgen's postprocess directives are distinct):
## \2 name \3 a global's C name
## \4 name \31 flags \31 nif \5 start of the definition of `name`;
## `nif` is the defining symbol's NIF name
## (empty for backend-minted symbols) so a
## later run can re-demand the definition
## \4 \5 end of the definitions section
import std / [tables, sets, os, assertions, syncio, algorithm]
import "../dist/nimony/src/lib" / [nifbuilder, nifcoreparse]
const
CnifSymStart* = '\2'
CnifSymEnd* = '\3'
CnifDefStart* = '\4'
CnifDefSep* = '\31' # same separator char as cgen's postprocess directives
CnifDefEnd* = '\5'
proc markCName*(name: string): string {.inline.} =
CnifSymStart & name & CnifSymEnd
proc hasCnifMarks*(s: string): bool =
for c in s:
if c in {CnifSymStart, CnifSymEnd, CnifDefStart}: return true
false
proc stripCnifMarks*(s: string): string =
## Removes the symbol marks (keeping the names) and the definition
## directives (entirely) so the result is plain C.
if not hasCnifMarks(s): return s
result = newStringOfCap(s.len)
var i = 0
while i < s.len:
case s[i]
of CnifSymStart, CnifSymEnd:
inc i
of CnifDefStart:
while i < s.len and s[i] != CnifDefEnd: inc i
inc i # skip CnifDefEnd
else:
result.add s[i]
inc i
const
CnifVersion* = "4"
## Artifact format version, stored in the meta head. Artifacts written
## by an older compiler lack the NIF names and the cref group the
## def-retention check needs (v2), the cdeps group the fine-grained
## reuse gate needs (v3), or the type NIF names and cnif-marked extern
## RTTI references the typeinfo flavor of the def-retention check
## needs (v4); `readCnifHeads` reports them as invalid so their TUs
## simply regenerate once.
proc cnifDefDirective*(name, flags, nifName: string): string =
CnifDefStart & name & CnifDefSep & flags & CnifDefSep & nifName & CnifDefEnd
proc cnifEndDefs*(): string =
CnifDefStart & CnifDefEnd
proc writeCnifArtifact*(code: string; outfile: string;
initRequired = false; datInitRequired = false;
dataDefs: openArray[tuple[cname, nifname: string]] = [];
semmedNif = ""; moduleBase = "";
implDeps: openArray[string] = []) =
## Splits the marked module text into the `.c.nif` artifact.
## The artifact starts with a `(meta <flags> "semmedNif" "moduleBase"
## "version")` head — whether the module has an init/datInit proc
## ('i'/'d'), which semmed NIF it was generated from and the module's
## mangled base name (what `registerModuleToMain` and the reuse decision
## need when the TU is reused in a later run, possibly without the module
## ever being loaded again) — a `(cdata (SymbolDef StrLit)*)` group naming
## the data definitions (consts, globals, RTTI) the TU embeds together
## with their NIF names, a `(cref Ident*)` group naming every C name
## the TU references but does not define itself (what the def-retention
## check consults when some *other* TU regenerates), and a
## `(cdeps Ident*)` group naming the modules whose routine *bodies* this
## TU embeds (redirected defs, shared instances, hooks): the fine-grained
## reuse gate checks their `.impl.nif` cookies on top of the direct
## imports' `.iface.nif` cookies.
# pre-pass: every marked name is a use, every definition directive (and
# every data def) is a definition; external references = uses - defs
var uses = initHashSet[string]()
var defs = initHashSet[string]()
block prePass:
var i = 0
while i < code.len:
case code[i]
of CnifSymStart:
inc i
var name = ""
while i < code.len and code[i] != CnifSymEnd:
name.add code[i]
inc i
inc i
uses.incl name
of CnifDefStart:
inc i
var payload = ""
while i < code.len and code[i] != CnifDefEnd:
payload.add code[i]
inc i
inc i
let sep = find(payload, CnifDefSep)
if sep > 0: defs.incl payload[0..<sep]
elif payload.len > 0: defs.incl payload
else:
inc i
for d in dataDefs: defs.incl d.cname
var crefs: seq[string] = @[]
for u in uses:
if u notin defs: crefs.add u
sort crefs
var b = nifbuilder.open(outfile)
b.withTree "stmts":
b.withTree "meta":
var metaFlags = ""
if initRequired: metaFlags.add 'i'
if datInitRequired: metaFlags.add 'd'
if metaFlags.len > 0: b.addIdent metaFlags
else: b.addEmpty
b.addStrLit semmedNif
b.addStrLit moduleBase
b.addStrLit CnifVersion
b.withTree "cdata":
for d in dataDefs:
b.addSymbolDef d.cname
b.addStrLit d.nifname
b.withTree "cref":
for r in crefs:
b.addIdent r
b.withTree "cdeps":
for s in implDeps:
b.addIdent s
var raw = ""
var inDef = false
template flushRaw() =
if raw.len > 0:
b.addStrLit raw
raw.setLen 0
var i = 0
while i < code.len:
case code[i]
of CnifSymStart:
flushRaw()
inc i
var name = ""
while i < code.len and code[i] != CnifSymEnd:
name.add code[i]
inc i
inc i # skip CnifSymEnd
b.addSymbol name, ""
of CnifDefStart:
flushRaw()
inc i
var payload = ""
while i < code.len and code[i] != CnifDefEnd:
payload.add code[i]
inc i
inc i # skip CnifDefEnd
if inDef:
b.endTree()
inDef = false
if payload.len > 0:
let sep = find(payload, CnifDefSep)
let name = if sep >= 0: payload[0..<sep] else: payload
var flags = if sep >= 0: payload[sep+1..^1] else: ""
var nifName = ""
let sep2 = find(flags, CnifDefSep)
if sep2 >= 0:
nifName = flags[sep2+1..^1]
flags = flags[0..<sep2]
b.addTree "cdef"
b.addSymbolDef name
if flags.len > 0: b.addIdent flags
else: b.addEmpty
b.addStrLit nifName
inDef = true
else:
raw.add code[i]
inc i
flushRaw()
if inDef:
b.endTree()
b.close()
proc renderMarkedC*(code: string; live: HashSet[string]; dropped: var int): string =
## Renders the final C text from the marked module text: symbol marks are
## removed (keeping the names — a later merge step substitutes them here),
## and definitions whose name is not in `live` are dropped entirely. Each
## definition is self-delimiting (genProcAux emits an end directive right
## after the proc's text), so text written by other emitters is never part
## of a definition's span and survives unconditionally.
result = newStringOfCap(code.len)
var i = 0
while i < code.len:
case code[i]
of CnifSymStart, CnifSymEnd:
inc i
of CnifDefStart:
var payload = ""
inc i
while i < code.len and code[i] != CnifDefEnd:
payload.add code[i]
inc i
inc i # skip CnifDefEnd
if payload.len > 0:
let sep = find(payload, CnifDefSep)
let name = if sep >= 0: payload[0..<sep] else: payload
if name notin live:
inc dropped
# drop the definition's text: everything up to its end directive
while i < code.len and code[i] != CnifDefStart: inc i
else:
result.add code[i]
inc i
# ---- Liveness over the artifact -------------------------------------------
proc symOrIdentName(c: Cursor): string {.inline.} =
if c.kind == Ident: strVal(c) else: symName(c)
type
CnifHeads* = object
## The cheap-to-parse part of an artifact that a later run needs in
## order to reuse the TU without regenerating it.
valid*: bool ## file parsed, carries the meta head and has
## the current format version
initRequired*: bool
datInitRequired*: bool
semmedNif*: string ## the semmed NIF this TU was generated from
moduleBase*: string ## the module's mangled base name
cdefs*: seq[tuple[cname, nifname: string]] ## the proc definitions
cdata*: seq[tuple[cname, nifname: string]] ## the data definitions
crefs*: seq[string] ## C names referenced but not defined here
cdeps*: seq[string] ## module suffixes whose routine bodies this
## TU embeds (impl-cookie gated on reuse)
proc readCnifHeads*(f: string): CnifHeads =
## Reads `(meta ...)`, `(cdata ...)`, `(cref ...)` and the `(cdef ...)`
## head names from an artifact. Artifacts written by an older compiler
## (no meta head or a different format version) report `valid=false`.
result = CnifHeads()
if not fileExists(f): return
var pool = newPool()
var tags = newTagPool()
let stmtsTag = tags.registerTag("stmts")
let cdefTag = tags.registerTag("cdef")
let cdataTag = tags.registerTag("cdata")
let crefTag = tags.registerTag("cref")
let cdepsTag = tags.registerTag("cdeps")
let metaTag = tags.registerTag("meta")
var buf = parseFromFile(f, 1000, pool, tags)
var c = beginRead(buf)
if c.kind != TagLit or c.cursorTagId != stmtsTag:
endRead(c)
return
var version = ""
var sawMeta = false
c.loopInto:
if c.kind == TagLit:
if c.cursorTagId == metaTag:
sawMeta = true
var strIdx = 0
c.loopInto:
if c.kind == Ident:
for ch in strVal(c):
if ch == 'i': result.initRequired = true
elif ch == 'd': result.datInitRequired = true
inc c
elif c.kind == StrLit:
if strIdx == 0: result.semmedNif = strVal(c)
elif strIdx == 1: result.moduleBase = strVal(c)
elif strIdx == 2: version = strVal(c)
inc strIdx
inc c
else:
skip c
elif c.cursorTagId == cdataTag:
c.loopInto:
if c.kind == SymbolDef:
result.cdata.add (symName(c), "")
inc c
elif c.kind == StrLit:
if result.cdata.len > 0:
result.cdata[^1].nifname = strVal(c)
inc c
else:
skip c
elif c.cursorTagId == crefTag:
c.loopInto:
if c.kind in {Ident, Symbol, SymbolDef}:
result.crefs.add symOrIdentName(c)
inc c
else:
skip c
elif c.cursorTagId == cdepsTag:
c.loopInto:
if c.kind in {Ident, Symbol, SymbolDef}:
result.cdeps.add symOrIdentName(c)
inc c
else:
skip c
elif c.cursorTagId == cdefTag:
# fixed head: SymbolDef, flags (Ident or empty), NIF name StrLit;
# everything after that is the definition's body text
var state = 0
c.loopInto:
if c.kind == SymbolDef:
result.cdefs.add (symName(c), "")
state = 1
inc c
elif state == 1: # the flags field
state = 2
skip c
elif state == 2: # the NIF name
if c.kind == StrLit and result.cdefs.len > 0:
result.cdefs[^1].nifname = strVal(c)
state = 3
skip c
else:
skip c
else:
skip c
else:
skip c
endRead(c)
result.valid = sawMeta and version == CnifVersion
type
CnifLiveness* = object
defs*: int ## proc definitions emitted across all modules
liveDefs*: int ## of those, reachable from the roots
live*: HashSet[string] ## live C names
broken*: bool
proc computeLiveFromCArtifacts*(files: openArray[string]): CnifLiveness =
## dce1-style mark&sweep over the C-shaped artifacts: a `(cdef ...)`
## group is a definition (flags 'x'/'c'/'m' — exportc, compilerproc,
## method/dispatcher — make it a root), names at the top level (data,
## globals, init code) are roots, names inside a group are its uses.
## Because the artifact is *fully lowered* output, no conservative
## modelling is needed: every call the C code contains is a token here.
##
## NB: mangled C names contain no dots, so NIF's text reader classifies
## them as `Ident` rather than `Symbol`; the dialect therefore treats
## Ident tokens as name uses. Inside a `(cdef ...)` the flags ident is
## the one immediately following the SymbolDef; everything after is a use.
result = CnifLiveness(live: initHashSet[string]())
var pool = newPool()
var tags = newTagPool()
let stmtsTag = tags.registerTag("stmts")
let cdefTag = tags.registerTag("cdef")
let cdataTag = tags.registerTag("cdata")
let crefTag = tags.registerTag("cref")
let cdepsTag = tags.registerTag("cdeps")
let metaTag = tags.registerTag("meta")
var uses = initTable[string, HashSet[string]]()
var roots = initHashSet[string]()
var defs = initHashSet[string]()
for f in files:
if not fileExists(f):
result.broken = true
return
var buf = parseFromFile(f, 1000, pool, tags)
var c = beginRead(buf)
if c.kind != TagLit or c.cursorTagId != stmtsTag:
result.broken = true
endRead(c)
return
c.loopInto:
case c.kind
of Symbol, Ident:
roots.incl symOrIdentName(c)
inc c
of TagLit:
if c.cursorTagId == metaTag or c.cursorTagId == cdataTag or
c.cursorTagId == crefTag or c.cursorTagId == cdepsTag:
# bookkeeping for TU reuse, irrelevant for liveness
skip c
elif c.cursorTagId == cdefTag:
var owner = ""
var flagsSeen = false
c.loopInto:
case c.kind
of SymbolDef:
owner = symName(c)
defs.incl owner
flagsSeen = false
inc c
of Symbol, Ident:
let name = symOrIdentName(c)
if not flagsSeen:
# the flags field right after the SymbolDef
flagsSeen = true
for ch in name:
# 'd' marks a data definition (const/RTTI): never DCE'd, so it
# is a root whose body keeps its referenced procs live
if ch in {'x', 'c', 'm', 'd'}:
roots.incl owner
break
else:
uses.mgetOrPut(owner, initHashSet[string]()).incl name
inc c
of DotToken:
flagsSeen = true # empty flags field
inc c
else:
skip c
else:
c.loopInto:
if c.kind in {Symbol, Ident}:
roots.incl symOrIdentName(c)
inc c
else:
skip c
else:
skip c
endRead(c)
# mark & sweep
var work = newSeqOfCap[string](roots.len)
for r in roots: work.add r
while work.len > 0:
let s = work.pop()
if not result.live.containsOrIncl(s):
if uses.hasKey(s):
for dep in uses[s]:
if dep notin result.live:
work.add dep
result.defs = defs.len
for d in defs:
if d in result.live: inc result.liveDefs
# ---- The merge stage: liveness + owner assignment -------------------------
type
MergeDecision* = object
## What the per-module backend's `merge` stage computes from every
## module's `.c.nif` and what its `emit` stage consumes to render the
## final `.c` of one module.
live*: HashSet[string] ## globally reachable C names (dead cdefs
## are dropped from every module)
owners*: Table[string, string] ## for each `'u'`-flagged (unique,
## externally-linked) definition, the single
## artifact base name allowed to embed its
## body; every other module prototypes it
broken*: bool ## an artifact was missing or unparsable —
## the caller should fall back / regenerate
defs*, liveDefs*: int
proc computeMergeDecision*(files: openArray[string]): MergeDecision =
## One pass over every `.c.nif`: the same mark&sweep as
## `computeLiveFromCArtifacts` plus, per definition, owner assignment.
##
## Each `cg` process emits the body of every definition it demands
## (emit-everywhere), so the same externally-linked definition appears in
## several artifacts. A `'u'` flag on the `(cdef ...)` marks those that need
## exactly one owner (the whole-program backend's `icSharedDefOwner`
## invariant, here recomputed across processes); the owner is the
## lexicographically smallest artifact that emits it — a pure function of the
## claimant set, hence stable across rebuilds. Definitions without `'u'`
## (inline procs, dispatchers) are `static`/main-only and emitted into every
## using TU, so they get no owner entry and are never deduplicated.
result = MergeDecision(live: initHashSet[string](),
owners: initTable[string, string]())
var pool = newPool()
var tags = newTagPool()
let stmtsTag = tags.registerTag("stmts")
let cdefTag = tags.registerTag("cdef")
let cdataTag = tags.registerTag("cdata")
let crefTag = tags.registerTag("cref")
let cdepsTag = tags.registerTag("cdeps")
let metaTag = tags.registerTag("meta")
var uses = initTable[string, HashSet[string]]()
var roots = initHashSet[string]()
var defs = initHashSet[string]()
for f in files:
if not fileExists(f):
result.broken = true
return
let owner = extractFilename(f)
var buf = parseFromFile(f, 1000, pool, tags)
var c = beginRead(buf)
if c.kind != TagLit or c.cursorTagId != stmtsTag:
result.broken = true
endRead(c)
return
c.loopInto:
case c.kind
of Symbol, Ident:
roots.incl symOrIdentName(c)
inc c
of TagLit:
if c.cursorTagId == metaTag or c.cursorTagId == cdataTag or
c.cursorTagId == crefTag or c.cursorTagId == cdepsTag:
skip c
elif c.cursorTagId == cdefTag:
var ownerName = ""
var flagsSeen = false
var needsOwner = false
c.loopInto:
case c.kind
of SymbolDef:
ownerName = symName(c)
defs.incl ownerName
flagsSeen = false
inc c
of Symbol, Ident:
let name = symOrIdentName(c)
if not flagsSeen:
flagsSeen = true
for ch in name:
if ch in {'x', 'c', 'm'}: roots.incl ownerName
# 'u' = unique proc (DCE'd), 'd' = data (never DCE'd, hence a
# root); both need a single owner across the emit-everywhere
# processes
elif ch == 'u': needsOwner = true
elif ch == 'd':
needsOwner = true
roots.incl ownerName
else:
uses.mgetOrPut(ownerName, initHashSet[string]()).incl name
inc c
of DotToken:
flagsSeen = true # empty flags field
inc c
else:
skip c
if needsOwner and ownerName.len > 0:
# smallest claimant wins; ties impossible (one entry per name)
let prev = result.owners.getOrDefault(ownerName, "")
if prev.len == 0 or owner < prev:
result.owners[ownerName] = owner
else:
c.loopInto:
if c.kind in {Symbol, Ident}:
roots.incl symOrIdentName(c)
inc c
else:
skip c
else:
skip c
endRead(c)
var work = newSeqOfCap[string](roots.len)
for r in roots: work.add r
while work.len > 0:
let s = work.pop()
if not result.live.containsOrIncl(s):
if uses.hasKey(s):
for dep in uses[s]:
if dep notin result.live:
work.add dep
result.defs = defs.len
for d in defs:
if d in result.live: inc result.liveDefs
const MergeDecisionFile* = "ic.backend.merge.nif"
## Fixed name of the merge stage's output in the nimcache, read by `emit`.
proc writeMergeDecision*(outfile: string; d: MergeDecision) =
## Serializes the merge decision: `(merge (live Symbol*) (owners (own
## Symbol StrLit)*))`. C names are mangled (no dots) so they serialize as
## symbols; owner artifact base names go in string literals.
var live: seq[string] = @[]
for n in d.live: live.add n
sort live
var keys: seq[string] = @[]
for k in d.owners.keys: keys.add k
sort keys
var b = nifbuilder.open(outfile)
b.withTree "merge":
b.withTree "live":
for n in live: b.addSymbol n, ""
b.withTree "owners":
for k in keys:
b.withTree "own":
b.addSymbol k, ""
b.addStrLit d.owners[k]
b.close()
proc readMergeDecision*(f: string): MergeDecision =
## Reads back a `writeMergeDecision` file; `broken=true` if absent/unparsable.
result = MergeDecision(live: initHashSet[string](),
owners: initTable[string, string]())
if not fileExists(f):
result.broken = true
return
var pool = newPool()
var tags = newTagPool()
let mergeTag = tags.registerTag("merge")
let liveTag = tags.registerTag("live")
let ownersTag = tags.registerTag("owners")
let ownTag = tags.registerTag("own")
var buf = parseFromFile(f, 1000, pool, tags)
var c = beginRead(buf)
if c.kind != TagLit or c.cursorTagId != mergeTag:
result.broken = true
endRead(c)
return
c.loopInto:
if c.kind == TagLit and c.cursorTagId == liveTag:
c.loopInto:
if c.kind in {Symbol, Ident}:
result.live.incl symOrIdentName(c)
inc c
else:
skip c
elif c.kind == TagLit and c.cursorTagId == ownersTag:
c.loopInto:
if c.kind == TagLit and c.cursorTagId == ownTag:
var key = ""
c.loopInto:
if c.kind in {Symbol, Ident}:
key = symOrIdentName(c)
inc c
elif c.kind == StrLit:
if key.len > 0: result.owners[key] = strVal(c)
inc c
else:
skip c
else:
skip c
else:
skip c
endRead(c)
proc renderCFromArtifact*(artifact: string; d: MergeDecision; ownerId: string;
dropped: var int): string =
## The per-module backend's `emit` stage: render one module's final `.c` from
## its `.c.nif` and the merge decision. String literals are emitted verbatim,
## symbols by name; a `(cdef ...)` body is dropped when the name is dead, or
## when it is a `'u'` unique definition this module does not own. The body's
## prototype lives in the surrounding raw text (cgen emits a forward
## declaration for every *used* proc, independent of where the body lands), so
## a dropped body still leaves a valid declaration — no synthesis needed. The
## head groups (meta/cdata/cref/cdeps) carry no C text.
result = ""
if not fileExists(artifact): return
var pool = newPool()
var tags = newTagPool()
let stmtsTag = tags.registerTag("stmts")
let cdefTag = tags.registerTag("cdef")
var buf = parseFromFile(artifact, 1000, pool, tags)
var c = beginRead(buf)
if c.kind != TagLit or c.cursorTagId != stmtsTag:
endRead(c)
return
c.loopInto:
case c.kind
of StrLit:
result.add strVal(c)
inc c
of Symbol, Ident:
result.add symOrIdentName(c)
inc c
of TagLit:
if c.cursorTagId == cdefTag:
# fixed head: SymbolDef, flags (Ident or empty), nifname StrLit; the
# rest is the definition's body text. `state` counts past the head.
var name = ""
var isUnique = false
var isData = false
var keep = true
var state = 0
c.loopInto:
if state == 0 and c.kind == SymbolDef:
name = symName(c)
state = 1
inc c
elif state == 1: # the flags field (one token: Ident/Symbol or empty)
if c.kind in {Ident, Symbol}:
for ch in symOrIdentName(c):
if ch == 'u': isUnique = true
elif ch == 'd': isData = true
state = 2
inc c
elif state == 2: # the NIF name (one StrLit) — decide keep here
let owned = d.owners.getOrDefault(name, ownerId) == ownerId
keep =
if isData: owned # data: kept by its owner only
elif isUnique: (name in d.live) and owned
else: name in d.live # inline/dispatcher: per-TU
if not keep: inc dropped
state = 3
inc c
else: # body tokens
if keep:
if c.kind == StrLit: result.add strVal(c)
elif c.kind in {Symbol, Ident}: result.add symOrIdentName(c)
inc c
else:
# head groups (meta/cdata/cref/cdeps) carry no C text
skip c
else:
inc c
endRead(c)