IC: beginnings of the backend porting

This commit is contained in:
Araq
2026-06-11 21:51:59 +02:00
parent efc52a5635
commit 2524b8a1b1
9 changed files with 783 additions and 21 deletions

View File

@@ -202,7 +202,7 @@ proc toNifSymName(w: var Writer; sym: PSym): string =
result.add modname(module, w.infos.config)
proc globalName(sym: PSym; config: ConfigRef): string =
proc globalName*(sym: PSym; config: ConfigRef): string =
result = sym.name.s
if sym.kindImpl == skPackage:
# stubs store the clean name; the NIF index is keyed by the marked one

View File

@@ -1980,6 +1980,14 @@ proc genAsgn(p: BProc, e: PNode, fastAsgn: bool) =
loadInto(p, le, ri, a)
proc genStmts(p: BProc, t: PNode) =
if p.config.cmd == cmdNifC and t.kind == nkSym and
t.sym.kind in {skProc, skFunc, skConverter, skIterator} and
not icDceLive(p.module, t.sym):
# Under IC a module's top-level routine definitions reappear as bare
# symbol statements in the loaded statement list and were generated
# eagerly. Skip the ones dce.nim proved unreachable; anything a live
# body references is still generated on demand via `genProc`.
return
var a: TLoc = default(TLoc)
let isPush = p.config.hasHint(hintExtendedContext)

View File

@@ -72,6 +72,40 @@ proc mangleProc(m: BModule; s: PSym; makeUnique: bool): string =
else:
m.g.mangledPrcs.incl(result)
proc sharedInstanceCName(m: BModule; s: PSym): string =
## The module-free canonical C name for a content-keyed generic instance,
## or "" when the symbol must keep its module-suffixed name. With a shared
## name, every TU that instantiated the same generic with the same type
## arguments calls one extern definition (first claimant's TU embeds it,
## see `genProcLvl3`) instead of compiling its own static copy.
##
## The name is program-unique only if the 30-bit content hash does not
## collide for same-named instances of *different* instantiations across
## modules — the per-module probe in `setInstanceDisamb` cannot see that.
## Claimants therefore must present the same signature; on mismatch the
## later one keeps its module-suffixed name (no merge, still correct).
## Residual risk: same name and signature, different generic args, AND a
## 30-bit collision — vanishingly unlikely; a full-typeKey verification
## channel can close it later.
result = ""
if m.config.cmd == cmdNifC and s.kind in routineKinds and
(s.disamb and InstanceDisambBit) != 0'i32 and
s.typ != nil and s.typ.callConv != ccInline and not m.hcrOn and
{sfImportc, sfExportc, sfCodegenDecl} * s.flags == {}:
let candidate = s.name.s.mangle & "_i" & $s.disamb
let compat = typeToString(s.typ)
let existing = m.g.graph.icSharedSigs.getOrDefault(candidate)
if existing.len == 0:
m.g.graph.icSharedSigs[candidate] = compat
result = candidate
elif existing == compat:
result = candidate
proc isSharedInstanceCName(m: BModule; s: PSym): bool =
m.config.cmd == cmdNifC and s.kind in routineKinds and
(s.disamb and InstanceDisambBit) != 0'i32 and
stripCnifMarks(s.loc.snippet) == s.name.s.mangle & "_i" & $s.disamb
proc fillBackendName(m: BModule; s: PSym) =
if s.loc.snippet == "":
var result: Rope
@@ -79,13 +113,22 @@ proc fillBackendName(m: BModule; s: PSym) =
m.g.config.symbolFiles == disabledSf:
result = mangleProc(m, s, false).rope
else:
result = s.name.s.mangle.rope
result.add mangleProcNameExt(m.g.graph, s)
let shared = sharedInstanceCName(m, s)
if shared.len > 0:
result = shared.rope
else:
result = s.name.s.mangle.rope
result.add mangleProcNameExt(m.g.graph, s)
if m.hcrOn:
result.add '_'
result.add(idOrSig(s, m.module.name.s.mangle, m.sigConflicts, m.config))
backendEnsureMutable s
s.locImpl.snippet = result
if m.config.cmd == cmdNifC:
# mark the name so the cnif artifact writer can turn every occurrence
# into a Symbol token; stripped from the actual C output in genModule
s.locImpl.snippet = markCName(result)
else:
s.locImpl.snippet = result
proc fillParamName(m: BModule; s: PSym) =
if s.loc.snippet == "":
@@ -1273,7 +1316,9 @@ proc genProcHeader(m: BModule; prc: PSym; result: var Builder; visibility: var D
elif prc.typ.callConv == ccInline or isNonReloadable(m, prc):
visibility = StaticProc
elif sfImportc notin prc.flags:
visibility = Private
if not isSharedInstanceCName(m, prc):
visibility = Private
# else: plain extern — the definition is shared across TUs
if asPtr:
result.addProcVar(m, prc, name, params, rettype, isStatic = isStaticVar, ignoreAttributes = true)
else:

View File

@@ -19,6 +19,8 @@ import
mangleutils, cbuilderbase, modulegraphs
from expanddefaults import caseObjDefaultBranch
from ast2nif import globalName
import cnif
import pipelineutils
@@ -853,6 +855,16 @@ proc initLocExprSingleUse(p: BProc, e: PNode): TLoc =
result.flags.incl lfSingleUse
expr(p, e, result)
proc icDceLive(m: BModule; sym: PSym): bool =
## Under `nim nifc` the eagerly emitted top-level routine listing is
## filtered through dce.nim's liveness result. Symbols generated on
## demand (`genProc` from a use site) never consult this.
let g = m.g.graph
if not g.icDceEnabled or sym.itemId.isBackendMinted:
result = true
else:
result = globalName(sym, m.config) in g.icLiveNames
include ccgcalls, "ccgstmts.nim"
proc initFrame(p: BProc, procname, filename: Rope): Rope =
@@ -1316,6 +1328,16 @@ proc genProcBody(p: BProc; procBody: PNode) =
p.blocks[0].sections[cpsInit].addCall(cgsymValue(p.module, "nimErrorFlag"))
proc genProcLvl3*(m: BModule, prc: PSym) =
if m.config.cmd == cmdNifC:
fillBackendName(m, prc)
if isSharedInstanceCName(m, prc):
# one definition program-wide: the first claimant's TU embeds it,
# everyone else declares it
let key = stripCnifMarks(prc.loc.snippet)
if m.g.graph.icSharedDefOwner.hasKeyOrPut(key, prc.itemId) and
m.g.graph.icSharedDefOwner[key] != prc.itemId:
genProcPrototype(m, prc)
return
var p = newProc(prc, m)
var header = newBuilder("")
let isCppMember = m.config.backend == backendCpp and sfCppMember * prc.flags != {}
@@ -1436,7 +1458,26 @@ proc genProcLvl3*(m: BModule, prc: PSym) =
generatedProc.add(extract(p.s(cpsStmts)))
if optStackTrace in prc.options: generatedProc.add(deinitFrame(p))
generatedProc.add(returnStmt)
m.s[cfsProcs].add(extract(generatedProc))
if m.config.cmd == cmdNifC:
# definition directive for the cnif artifact: groups the proc's text
# under its name and carries the root-relevant flags. The end directive
# right after the text makes the definition self-delimiting, so raw
# cfsProcs emitters (NimMain block, trav markers, ...) never end up
# inside a definition's span.
var defFlags = ""
if sfExportc in prc.flags or sfConstructor in prc.flags: defFlags.add 'x'
if sfCompilerProc in prc.flags: defFlags.add 'c'
if prc.kind == skMethod or sfDispatcher in prc.flags: defFlags.add 'm'
if not hasCnifMarks(prc.loc.snippet):
# The C name was not minted through `fillBackendName` (e.g. set by an
# `extern`/`rtl` pragma at sem time), so its uses are invisible to the
# artifact's liveness walk — conservatively keep the definition.
defFlags.add 'x'
m.s[cfsProcs].add(cnifDefDirective(stripCnifMarks(prc.loc.snippet), defFlags))
m.s[cfsProcs].add(extract(generatedProc))
m.s[cfsProcs].add(cnifEndDefs())
else:
m.s[cfsProcs].add(extract(generatedProc))
if isReloadable(m, prc):
m.s[cfsDynLibInit].add('\t')
m.s[cfsDynLibInit].addAssignmentWithValue(prc.loc.snippet):
@@ -1482,10 +1523,15 @@ proc genProcPrototype(m: BModule, sym: PSym) =
var header = newBuilder("")
var visibility: DeclVisibility = None
genProcHeader(m, sym, header, visibility, asPtr = asPtr, addAttributes = true)
# A prototype is not a *use*: strip the cnif name marks so the artifact's
# liveness walk does not see every forward-declared proc as referenced.
var headerText = extract(header)
if m.config.cmd == cmdNifC:
headerText = stripCnifMarks(headerText)
if asPtr:
m.s[cfsProcHeaders].addDeclWithVisibility(visibility):
# genProcHeader would give variable declaration, add it directly
m.s[cfsProcHeaders].add(extract(header))
m.s[cfsProcHeaders].add(headerText)
else:
let extraVis =
if sym.typ.callConv != ccInline and requiresExternC(m, sym):
@@ -1494,7 +1540,7 @@ proc genProcPrototype(m: BModule, sym: PSym) =
None
m.s[cfsProcHeaders].addDeclWithVisibility(extraVis):
m.s[cfsProcHeaders].addDeclWithVisibility(visibility):
m.s[cfsProcHeaders].add(extract(header))
m.s[cfsProcHeaders].add(headerText)
m.s[cfsProcHeaders].finishProcHeaderAsProto()
include inliner
@@ -1594,6 +1640,13 @@ proc isActivated(prc: PSym): bool = prc.typ != nil
proc genProc(m: BModule, prc: PSym) =
if sfBorrow in prc.flags or not isActivated(prc): return
if m.config.cmd == cmdNifC and m.g.graph.icDceEnabled and
sfImportc notin prc.flags and not icDceLive(m, prc):
# Stage-2 readiness check: in the current single-process backend, demand
# always wins over the liveness analysis (we generate the proc anyway).
# But per-module codegen will have to trust the analysis, so a proc that
# is demanded yet not marked live is an analysis bug — report it.
m.g.graph.icDceMisses.incl globalName(prc, m.config)
if sfForward in prc.flags:
addForwardedProc(m, prc)
fillProcLoc(m, prc.ast[namePos])
@@ -2309,6 +2362,11 @@ proc genModule(m: BModule, cfile: Cfile): Rope =
moduleIsEmpty = false
res.add(extract(m.s[i]))
if m.config.cmd == cmdNifC:
# close the definitions section: the init procs that follow belong to
# the artifact's top level (always-run code, hence liveness roots)
res.add(cnifEndDefs())
if m.s[cfsInitProc].buf.len > 0:
moduleIsEmpty = false
res.add(extract(m.s[cfsInitProc]))
@@ -2331,6 +2389,13 @@ proc genModule(m: BModule, cfile: Cfile): Rope =
postprocessCode(m.config, result)
if m.config.cmd == cmdNifC and result.len > 0:
let artifact = cfile.cname.string & ".nif"
writeCnifArtifact(result, artifact)
m.g.graph.icCnifFiles.add artifact
# NB: under cmdNifC the returned text still carries the cnif marks; the
# caller renders it (dropping dead definitions) or strips it.
proc initProcOptions(m: BModule): TOptions =
let opts = m.config.options
if sfSystemModule in m.module.flags: opts-{optStackTrace} else: opts
@@ -2413,7 +2478,10 @@ proc writeHeader(m: BModule) =
result.finishProcHeaderAsProto()
if m.config.cppCustomNamespace.len > 0: closeNamespaceNim(result)
result.addf("#endif /* $1 */$n", [guard])
if not writeRope(extract(result), m.filename):
var headerText = extract(result)
if m.config.cmd == cmdNifC:
headerText = stripCnifMarks(headerText)
if not writeRope(headerText, m.filename):
rawMessage(m.config, errCannotOpenFile, m.filename.string)
proc getCFile(m: BModule): AbsoluteFile =
@@ -2510,8 +2578,9 @@ proc shouldRecompile(m: BModule; code: Rope, cfile: Cfile): bool =
rawMessage(m.config, errCannotOpenFile, cfile.cname.string)
result = true
proc writeModule(m: BModule) =
let cfile = getCFile(m)
proc genModuleCode(m: BModule; cf: var Cfile): string =
## First half of `writeModule`: finalizes the module and produces its code
## text. Under cmdNifC the text still carries the cnif marks.
if moduleHasChanged(m.g.graph, m.module):
genInitCode(m)
@@ -2526,9 +2595,11 @@ proc writeModule(m: BModule) =
m.s[cfsProcHeaders].add(extract(m.g.mainModProcs))
generateThreadVarsSize(m)
var cf = Cfile(nimname: m.module.name.s, cname: cfile,
obj: completeCfilePath(m.config, toObjFile(m.config, cfile)), flags: {})
var code = genModule(m, cf)
result = genModule(m, cf)
proc registerModuleCode(m: BModule; cf: var Cfile; code: string) =
## Second half of `writeModule`: writes the .c file if it changed and
## registers it for compilation.
if code != "" or m.config.symbolFiles != disabledSf:
when hasTinyCBackend:
if m.config.cmd == cmdTcc:
@@ -2538,6 +2609,15 @@ proc writeModule(m: BModule) =
if not shouldRecompile(m, code, cf): cf.flags = {CfileFlag.Cached}
addFileToCompile(m.config, cf)
proc writeModule(m: BModule) =
let cfile = getCFile(m)
var cf = Cfile(nimname: m.module.name.s, cname: cfile,
obj: completeCfilePath(m.config, toObjFile(m.config, cfile)), flags: {})
var code = genModuleCode(m, cf)
if m.config.cmd == cmdNifC:
code = stripCnifMarks(code)
registerModuleCode(m, cf, code)
proc updateCachedModule(m: BModule) =
let cfile = getCFile(m)
var cf = Cfile(nimname: m.module.name.s, cname: cfile,
@@ -2654,7 +2734,35 @@ proc cgenWriteModules*(backend: RootRef, config: ConfigRef) =
# order anyway)
genForwardedProcs(g)
for m in cgenModules(g):
m.writeModule()
if config.cmd == cmdNifC and not isDefined(config, "icNoCDce"):
# Two-phase write: produce every module's marked text and artifact
# first, then compute global liveness over the artifacts and render
# the .c files with dead definitions dropped. Demand-driven codegen
# over-approximates (it cannot retract a definition once some path
# requested it); this is where the surplus is removed.
var mods: seq[BModule] = @[]
var cfs: seq[Cfile] = @[]
var codes: seq[string] = @[]
for m in cgenModules(g):
let cfile = getCFile(m)
var cf = Cfile(nimname: m.module.name.s, cname: cfile,
obj: completeCfilePath(m.config, toObjFile(m.config, cfile)), flags: {})
let code = genModuleCode(m, cf)
mods.add m
cfs.add cf
codes.add code
let cl = computeLiveFromCArtifacts(g.graph.icCnifFiles)
var dropped = 0
for i in 0..<mods.len:
let rendered =
if cl.broken: stripCnifMarks(codes[i])
else: renderMarkedC(codes[i], cl.live, dropped)
registerModuleCode(mods[i], cfs[i], rendered)
g.graph.icCDefs = cl.defs
g.graph.icCLiveDefs = cl.liveDefs
g.graph.icCDropped = dropped
else:
for m in cgenModules(g):
m.writeModule()
writeMapping(config, g.mapping)
if g.generatedHeader != nil: writeHeader(g.generatedHeader)

256
compiler/cnif.nim Normal file
View File

@@ -0,0 +1,256 @@
#
#
# The Nim Compiler
# (c) Copyright 2026 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## The "cnif" artifact: the C code generator's output as a NIF file.
##
## This is deliberately *not* NIFC: the C text is kept verbatim (Nim's
## C-level machinery — exception handling in particular — is more refined
## than what NIFC models today; the gap can be closed incrementally later).
## The only structure the artifact adds is the part dead code elimination
## and generic-instance merging need:
##
## - raw C text as string literals
## - every *global* entity's C name as a `Symbol` token
## - every emitted proc definition as a `(cdef SymbolDef flags ...)` group
##
## The C generator marks names with control characters at the single place
## a global's C name is minted (`fillBackendName`) and emits a definition
## directive at the single place finished procs are appended; the marks then
## ride through all of the snippet composition untouched. This module turns
## the final marked module text into the `.c.nif` artifact and strips the
## marks for the actual `.c` output. Rendering C from the artifact is a
## plain token walk: string literals verbatim, symbols by name — which is
## also where a later merge step redirects losing generic instances.
##
## Marker scheme (cannot collide: C string literals escape control chars,
## and `\1`/`\31`/`\23` of cgen's postprocess directives are distinct):
## \2 name \3 a global's C name
## \4 name \31 flags \5 start of the definition of `name`
## \4 \5 end of the definitions section
import std / [tables, sets, os, assertions, syncio]
import "../dist/nimony/src/lib" / [nifbuilder, nifcoreparse]
const
CnifSymStart* = '\2'
CnifSymEnd* = '\3'
CnifDefStart* = '\4'
CnifDefSep* = '\31' # same separator char as cgen's postprocess directives
CnifDefEnd* = '\5'
proc markCName*(name: string): string {.inline.} =
CnifSymStart & name & CnifSymEnd
proc hasCnifMarks*(s: string): bool =
for c in s:
if c in {CnifSymStart, CnifSymEnd, CnifDefStart}: return true
false
proc stripCnifMarks*(s: string): string =
## Removes the symbol marks (keeping the names) and the definition
## directives (entirely) so the result is plain C.
if not hasCnifMarks(s): return s
result = newStringOfCap(s.len)
var i = 0
while i < s.len:
case s[i]
of CnifSymStart, CnifSymEnd:
inc i
of CnifDefStart:
while i < s.len and s[i] != CnifDefEnd: inc i
inc i # skip CnifDefEnd
else:
result.add s[i]
inc i
proc cnifDefDirective*(name, flags: string): string =
CnifDefStart & name & CnifDefSep & flags & CnifDefEnd
proc cnifEndDefs*(): string =
CnifDefStart & CnifDefEnd
proc writeCnifArtifact*(code: string; outfile: string) =
## Splits the marked module text into the `.c.nif` artifact.
var b = nifbuilder.open(outfile)
b.withTree "stmts":
var raw = ""
var inDef = false
template flushRaw() =
if raw.len > 0:
b.addStrLit raw
raw.setLen 0
var i = 0
while i < code.len:
case code[i]
of CnifSymStart:
flushRaw()
inc i
var name = ""
while i < code.len and code[i] != CnifSymEnd:
name.add code[i]
inc i
inc i # skip CnifSymEnd
b.addSymbol name, ""
of CnifDefStart:
flushRaw()
inc i
var payload = ""
while i < code.len and code[i] != CnifDefEnd:
payload.add code[i]
inc i
inc i # skip CnifDefEnd
if inDef:
b.endTree()
inDef = false
if payload.len > 0:
let sep = find(payload, CnifDefSep)
let name = if sep >= 0: payload[0..<sep] else: payload
let flags = if sep >= 0: payload[sep+1..^1] else: ""
b.addTree "cdef"
b.addSymbolDef name
if flags.len > 0: b.addIdent flags
else: b.addEmpty
inDef = true
else:
raw.add code[i]
inc i
flushRaw()
if inDef:
b.endTree()
b.close()
proc renderMarkedC*(code: string; live: HashSet[string]; dropped: var int): string =
## Renders the final C text from the marked module text: symbol marks are
## removed (keeping the names — a later merge step substitutes them here),
## and definitions whose name is not in `live` are dropped entirely. Each
## definition is self-delimiting (genProcAux emits an end directive right
## after the proc's text), so text written by other emitters is never part
## of a definition's span and survives unconditionally.
result = newStringOfCap(code.len)
var i = 0
while i < code.len:
case code[i]
of CnifSymStart, CnifSymEnd:
inc i
of CnifDefStart:
var payload = ""
inc i
while i < code.len and code[i] != CnifDefEnd:
payload.add code[i]
inc i
inc i # skip CnifDefEnd
if payload.len > 0:
let sep = find(payload, CnifDefSep)
let name = if sep >= 0: payload[0..<sep] else: payload
if name notin live:
inc dropped
# drop the definition's text: everything up to its end directive
while i < code.len and code[i] != CnifDefStart: inc i
else:
result.add code[i]
inc i
# ---- Liveness over the artifact -------------------------------------------
proc symOrIdentName(c: Cursor): string {.inline.} =
if c.kind == Ident: strVal(c) else: symName(c)
type
CnifLiveness* = object
defs*: int ## proc definitions emitted across all modules
liveDefs*: int ## of those, reachable from the roots
live*: HashSet[string] ## live C names
broken*: bool
proc computeLiveFromCArtifacts*(files: openArray[string]): CnifLiveness =
## dce1-style mark&sweep over the C-shaped artifacts: a `(cdef ...)`
## group is a definition (flags 'x'/'c'/'m' — exportc, compilerproc,
## method/dispatcher — make it a root), names at the top level (data,
## globals, init code) are roots, names inside a group are its uses.
## Because the artifact is *fully lowered* output, no conservative
## modelling is needed: every call the C code contains is a token here.
##
## NB: mangled C names contain no dots, so NIF's text reader classifies
## them as `Ident` rather than `Symbol`; the dialect therefore treats
## Ident tokens as name uses. Inside a `(cdef ...)` the flags ident is
## the one immediately following the SymbolDef; everything after is a use.
result = CnifLiveness(live: initHashSet[string]())
var pool = newPool()
var tags = newTagPool()
let stmtsTag = tags.registerTag("stmts")
let cdefTag = tags.registerTag("cdef")
var uses = initTable[string, HashSet[string]]()
var roots = initHashSet[string]()
var defs = initHashSet[string]()
for f in files:
if not fileExists(f):
result.broken = true
return
var buf = parseFromFile(f, 1000, pool, tags)
var c = beginRead(buf)
if c.kind != TagLit or c.cursorTagId != stmtsTag:
result.broken = true
endRead(c)
return
c.loopInto:
case c.kind
of Symbol, Ident:
roots.incl symOrIdentName(c)
inc c
of TagLit:
if c.cursorTagId == cdefTag:
var owner = ""
var flagsSeen = false
c.loopInto:
case c.kind
of SymbolDef:
owner = symName(c)
defs.incl owner
flagsSeen = false
inc c
of Symbol, Ident:
let name = symOrIdentName(c)
if not flagsSeen:
# the flags field right after the SymbolDef
flagsSeen = true
for ch in name:
if ch in {'x', 'c', 'm'}:
roots.incl owner
break
else:
uses.mgetOrPut(owner, initHashSet[string]()).incl name
inc c
of DotToken:
flagsSeen = true # empty flags field
inc c
else:
skip c
else:
c.loopInto:
if c.kind in {Symbol, Ident}:
roots.incl symOrIdentName(c)
inc c
else:
skip c
else:
skip c
endRead(c)
# mark & sweep
var work = newSeqOfCap[string](roots.len)
for r in roots: work.add r
while work.len > 0:
let s = work.pop()
if not result.live.containsOrIncl(s):
if uses.hasKey(s):
for dep in uses[s]:
if dep notin result.live:
work.add dep
result.defs = defs.len
for d in defs:
if d in result.live: inc result.liveDefs

255
compiler/dce.nim Normal file
View File

@@ -0,0 +1,255 @@
#
#
# The Nim Compiler
# (c) Copyright 2026 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Dead code analysis over per-module NIF files — a port of Nimony's
## `hexer/dce1.nim`/`dce2.nim` ideas onto the `nifcore` API.
##
## Per module we collect, in a single token walk over its `.nif` file:
## - `roots`: symbols that are alive by construction — anything referenced
## from top-level init code (every module's init proc is always emitted),
## plus flag-based entry points (see below)
## - `uses`: edges `definition -> symbols referenced inside its body`
##
## A global mark&sweep over the union of all modules' graphs then yields the
## set of live symbols.
##
## The NIF files contain *semchecked* (unlowered) AST, so uses that only
## materialize during the backend's lowering passes are invisible to the
## token walk. Those are covered by conservative roots instead:
## - registered hooks and `$enum` procs (the `(rep* "key" sym)` entries):
## `injectdestructors` and magic lowering insert calls to them at codegen
## - `{.compilerproc.}` symbols: requested by name via `cgsym`
## - `{.exportc.}` symbols, methods and dispatchers: external entry points
## resp. reachable through dynamic dispatch only
##
## In the current single-process backend the result is consumed as a skip
## filter for the eagerly generated top-level routine listing
## (`ccgstmts.genStmts`); cgen's demand-driven `genProc` remains in place,
## so an analysis miss can only cost code size, never correctness. The same
## analysis is the building block for per-module incremental codegen later,
## where it has to stand on its own.
import std / [tables, sets, os, assertions]
from std / strutils import rfind
import "../dist/nimony/src/lib" / nifcoreparse
import ast, options, pathutils
import ic / enum2nif
type
DceContext = object
pool: Pool # shared literal pool: same name <=> same SymId everywhere
tags: TagPool # shared tag pool: tag ids fixed by the registrations below
uses: Table[SymId, HashSet[SymId]]
roots: HashSet[SymId]
stmtsTag, sdefTag, implTag, replayTag, importTag, includeTag: TagId
methodKindTag: TagId
hookTags: HashSet[TagId]
routineKindTags: HashSet[TagId]
offers: HashSet[SymId] # generic routine instances defined by the modules
broken: bool # a module failed to parse; the result must not be used
DceStats* = object
instances*: int ## routine instance definitions across all modules
uniqueInstances*: int ## distinct instantiation keys (name.disamb)
## `instances - uniqueInstances` = definitions a merge step would drop
const
NoSym = SymId(0) # pool ids start at 1
proc symIdAt(c: Cursor): SymId {.inline.} =
# Every symbol in our NIFs is written with its `.disamb.modulesuffix`, so
# the name is always longer than nifcore's 3-byte inline-string cutoff and
# lands in the (shared) pool: pool ids are stable identities across all
# modules' token buffers.
assert not isInlineLit(c), "unexpectedly short NIF symbol name"
SymId(combinedPayload(c) shr 1)
proc recordUse(ctx: var DceContext; sym, owner: SymId) =
if owner == NoSym:
ctx.roots.incl sym
else:
ctx.uses.mgetOrPut(owner, initHashSet[SymId]()).incl sym
proc walkDef(ctx: var DceContext; c: var Cursor; owner: SymId; declarative: bool)
proc walk(ctx: var DceContext; c: var Cursor; owner: SymId; declarative: bool) =
## Generic walk. `owner == NoSym and not declarative` is init-code context:
## symbol uses become roots. With an owner they become `uses` edges. In
## declarative context (the listing after the `(implementation)` marker)
## bare uses record nothing — only definitions found inside contribute.
case c.kind
of TagLit:
if c.cursorTagId == ctx.sdefTag:
walkDef(ctx, c, owner, declarative)
else:
c.loopInto:
walk(ctx, c, owner, declarative)
of Symbol:
if not declarative:
recordUse(ctx, symIdAt(c), owner)
inc c
else:
skip c
proc walkDef(ctx: var DceContext; c: var Cursor; owner: SymId; declarative: bool) =
# Layout (ast2nif.writeSymDef):
# (sd SymbolDef <x|.> (symkind ...) magic flags options offset ...)
# NB: no `return` inside `into` — it would skip the cursor rescoping.
c.into:
if c.kind == SymbolDef:
let self = symIdAt(c)
# An sdef is emitted at the symbol's *first reference*; in use
# positions that reference counts like a plain symbol use.
if not declarative:
recordUse(ctx, self, owner)
inc c
if c.hasMore: skip c # export marker: "x" or dot
var rooted = false
var isRoutine = false
if c.hasMore and c.kind == TagLit: # symbol kind tree
if c.cursorTagId == ctx.methodKindTag:
rooted = true # reachable via dynamic dispatch
isRoutine = c.cursorTagId in ctx.routineKindTags
c.loopInto:
walk(ctx, c, self, false) # guard sym/bitsize for vars
if c.hasMore: skip c # magic: ident or dot
if c.hasMore: # flags: ident or dot
if c.kind == Ident:
let fl = parse(TSymFlag, strVal(c))
if sfExportc in fl or sfCompilerProc in fl or sfDispatcher in fl:
rooted = true
if isRoutine and sfFromGeneric in fl:
ctx.offers.incl self
skip c
if rooted: ctx.roots.incl self
# rest: options, offset, position, lib, type, owner, ast, loc,
# constraint, instantiatedFrom — all walked as the definition's body
while c.hasMore:
walk(ctx, c, self, false)
else:
# malformed sdef; consume defensively
while c.hasMore:
walk(ctx, c, owner, declarative)
proc rootHookSyms(ctx: var DceContext; c: var Cursor) =
# (repdestroy "typekey" hookSym) and friends
c.loopInto:
if c.kind == Symbol:
ctx.roots.incl symIdAt(c)
inc c
else:
skip c
proc analyzeNifFile(ctx: var DceContext; filename: string;
imports: var seq[string]) =
if not fileExists(filename):
ctx.broken = true
return
var buf = parseFromFile(filename, 1000, ctx.pool, ctx.tags)
var c = beginRead(buf)
if c.kind == TagLit and c.cursorTagId == ctx.stmtsTag:
var declarative = false
c.loopInto:
case c.kind
of TagLit:
let tag = c.cursorTagId
if tag == ctx.implTag:
# marks the start of the declarative listing (routines, type
# sections, consts); everything before it is init code
declarative = true
skip c
elif tag == ctx.importTag:
# (import . . "modsuffix") — the analysis discovers the module
# closure itself; the backend's own module list omits modules that
# are only reached through system or through demand-driven codegen
c.loopInto:
if c.kind == StrLit:
imports.add strVal(c)
inc c
else:
skip c
elif tag == ctx.replayTag or tag == ctx.includeTag:
skip c # compile directives and include info
elif tag in ctx.hookTags:
rootHookSyms(ctx, c)
elif tag == ctx.sdefTag:
# a definition listed at section level (globals before the marker,
# announced hooks after it): a declaration, not a use
walkDef(ctx, c, NoSym, true)
else:
walk(ctx, c, NoSym, declarative)
of Symbol:
inc c # bare re-listing of a written definition
else:
skip c # the stmts wrapper's flag/type dots
else:
ctx.broken = true
endRead(c)
proc markLive(ctx: DceContext): HashSet[SymId] =
result = initHashSet[SymId]()
var work = newSeqOfCap[SymId](ctx.roots.len)
for r in ctx.roots: work.add r
while work.len > 0:
let s = work.pop()
if not result.containsOrIncl(s):
if ctx.uses.hasKey(s):
for dep in ctx.uses[s]:
if dep notin result:
work.add dep
proc computeLiveSymbols*(conf: ConfigRef; seedFiles: openArray[string];
live: var HashSet[string]; stats: var DceStats): bool =
## Global liveness over a program's NIF modules: the seeds plus the
## transitive closure of their `(import ...)` entries. On success fills
## `live` with the NIF names (`name.disamb.modsuffix`) of every reachable
## symbol and returns true. Returns false when any module could not be
## analyzed — the caller must then treat everything as live.
var ctx = DceContext(pool: newPool(), tags: newTagPool())
ctx.stmtsTag = ctx.tags.registerTag("stmts")
ctx.sdefTag = ctx.tags.registerTag("sd")
ctx.implTag = ctx.tags.registerTag("implementation")
ctx.replayTag = ctx.tags.registerTag("replay")
ctx.importTag = ctx.tags.registerTag("import")
ctx.includeTag = ctx.tags.registerTag("include")
ctx.methodKindTag = ctx.tags.registerTag("method")
for t in ["repdestroy", "repcopy", "repwasmoved", "repdup", "repsink",
"reptrace", "repdeepcopy", "repenumtostr"]:
ctx.hookTags.incl ctx.tags.registerTag(t)
for t in ["proc", "func", "iterator", "converter", "method"]:
ctx.routineKindTags.incl ctx.tags.registerTag(t)
var queue = newSeq[string](seedFiles.len)
for i in 0..<seedFiles.len: queue[i] = seedFiles[i]
var seen = initHashSet[string]()
var i = 0
while i < queue.len:
let f = queue[i]
inc i
if seen.containsOrIncl(f): continue
var imports: seq[string] = @[]
analyzeNifFile(ctx, f, imports)
if ctx.broken: return false
if conf != nil:
for suffix in imports:
queue.add toGeneratedFile(conf, AbsoluteFile(suffix), ".nif").string
let liveIds = markLive(ctx)
live = initHashSet[string](liveIds.len)
for s in liveIds:
live.incl ctx.pool.syms[s]
# Instance duplication stats: with content-derived instance disambs the
# NIF name minus the module suffix is the instantiation key, so the same
# instantiation made by several modules counts as one unique instance.
stats = DceStats(instances: ctx.offers.len)
var uniq = initHashSet[string]()
for s in ctx.offers:
let name = ctx.pool.syms[s]
let suffixStart = rfind(name, '.')
uniq.incl(if suffixStart >= 0: name[0..<suffixStart] else: name)
stats.uniqueInstances = uniq.len
result = true

View File

@@ -11,7 +11,7 @@
## represents a complete Nim project. Single modules can either be kept in RAM
## or stored in a rod-file.
import std/[intsets, tables, hashes, strtabs, os, strutils, parseutils]
import std/[intsets, tables, hashes, strtabs, os, strutils, parseutils, sets]
import ../dist/checksums/src/checksums/md5
import ast, astalgo, options, lineinfos,idents, btrees, ropes, msgs, pathutils, packages, suggestsymdb
@@ -68,6 +68,20 @@ type
enumToStringProcs*: Table[ItemId, PSym]
loadedEnumToStringProcs: Table[string, PSym]
emittedTypeInfo*: Table[string, FileIndex]
icLiveNames*: HashSet[string] # NIF names of reachable symbols (dce.nim);
# filters the top-level listing under `nim nifc`
icDceEnabled*: bool
icDceMisses*: HashSet[string] # demand-generated but not marked live:
# analysis bugs that per-module codegen would hit
instDisambs: Table[(int, int32), ItemId] # (name id, content disamb) ->
# instance, for collision probing in
# `setInstanceDisamb`
icCnifFiles*: seq[string] # `.c.nif` artifacts written by this run
icCDefs*, icCLiveDefs*, icCDropped*: int # render-time DCE stats
icSharedSigs*: Table[string, string] # shared instance C name -> signature
# (collision guard for the 30-bit hash)
icSharedDefOwner*: Table[string, ItemId] # shared instance C name ->
# the symbol whose TU embeds the definition
packageSyms*: TStrTable
deps*: IntSet # the dependency graph or potentially its transitive closure.
@@ -405,6 +419,50 @@ proc logGenericInstance*(g: ModuleGraph; inst: PSym) =
let ownerModule = inst.itemId.module.int
g.opsLog.add LogEntry(kind: GenericInstEntry, module: ownerModule, sym: inst)
const
InstanceDisambBit* = 0x4000_0000'i32
## Set in the `disamb` of routine instances whose value is content-derived
## (see `setInstanceDisamb`); keeps them disjoint from the small counter
## range ordinary symbols draw from, so the NIF name `name.disamb.module`
## stays collision-free within a module.
proc setInstanceDisamb*(g: ModuleGraph; inst, generic: PSym;
concreteTypes: openArray[PType]) =
## Under IC, replace a fresh routine instance's counter-based `disamb` with
## a content-derived one: a hash of the generic's identity plus the
## `typeKey` of every concrete type argument — exactly the identity the
## instantiation cache compares. The instance's NIF name
## `name.disamb.modsuffix` then differs only in the module suffix when the
## same instantiation is made by different modules, which is the
## prerequisite for cross-module generic-instance merging (and gives the
## dce analysis its `offers` keys). The hash is computed once, here; it is
## never recomputed — the value travels in the serialized `disamb` field.
if g.config.cmd notin {cmdNifC, cmdM}: return
if isDefined(g.config, "icNoInstKey"): return
var key = generic.name.s
key.add '.'
key.addInt generic.disamb
key.add '.'
key.add modname(generic.itemId.module, g.config)
for t in concreteTypes:
key.add '|'
key.add typeKey(t, g.config, loadTypeCallback, loadSymCallback)
let d = toMD5(key)
var h = (int32(d[0]) or (int32(d[1]) shl 8) or (int32(d[2]) shl 16) or
(int32(d[3] and 0x3F'u8) shl 24)) or InstanceDisambBit
# Same-name hash collisions inside this process get probed to the next
# free value; the loser stays correct (its name keeps the module suffix),
# it merely won't merge cross-module.
while true:
let probe = (inst.name.id, h)
if g.instDisambs.hasKey(probe):
if g.instDisambs[probe] == inst.itemId: break
h = if h == high(int32): InstanceDisambBit else: h + 1
else:
g.instDisambs[probe] = inst.itemId
break
inst.disamb = h
proc hasDisabledAsgn*(g: ModuleGraph; t: PType): bool =
let op = getAttachedOp(g, t, attachedAsgn)
result = op != nil and sfError in op.flags

View File

@@ -17,16 +17,18 @@
## 1. Compile modules to NIF: nim m mymodule.nim
## 2. Generate C from NIF: nim nifc myproject.nim
import std/[intsets, tables, sets, os]
import std/[intsets, tables, sets, os, algorithm, syncio]
when defined(nimPreviewSlimSystem):
import std/assertions
import ast, options, lineinfos, modulegraphs, cgendata, cgen,
pathutils, extccomp, msgs, modulepaths, idents, types, ast2nif, typekeys
pathutils, extccomp, msgs, modulepaths, idents, types, ast2nif, typekeys, dce,
cnif
import ic / replayer
proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex): seq[PrecompiledModule] =
proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex;
nifFiles: var seq[string]): seq[PrecompiledModule] =
## Traverse the module dependency graph using a stack.
## Returns all modules that need code generation, in dependency order.
# The main module is loaded by its SOURCE FileIndex, but its serialized
@@ -36,6 +38,7 @@ proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex): seq[Precomp
# units (top-level globals in one, procs in the other → undeclared symbols).
g.config.m.filenameToIndexTbl[cachedModuleSuffix(g.config, mainFileIdx)] = mainFileIdx
let mainModule = moduleFromNifFile(g, mainFileIdx, {LoadFullAst})
nifFiles.add toNifFilename(g.config, mainFileIdx)
var stack: seq[ModuleSuffix] = @[]
result = @[]
@@ -56,6 +59,7 @@ proc loadModuleDependencies(g: ModuleGraph; mainFileIdx: FileIndex): seq[Precomp
let precomp = moduleFromNifFile(g, fileIdx, {LoadFullAst})
if precomp.module != nil:
result.add precomp
nifFiles.add toNifFilename(g.config, fileIdx)
for dep in precomp.deps:
if not visited.contains(dep.string):
stack.add dep
@@ -116,12 +120,20 @@ proc generateCode*(g: ModuleGraph; mainFileIdx: FileIndex) =
# Load all modules in dependency order using stack traversal
# This must happen BEFORE any code generation so that hooks are loaded into loadedOps
let modules = loadModuleDependencies(g, mainFileIdx)
var nifFiles: seq[string] = @[toNifFilename(g.config, systemFileIdx)]
let modules = loadModuleDependencies(g, mainFileIdx, nifFiles)
if modules.len == 0:
rawMessage(g.config, errGenerated,
"Cannot load NIF file for main module: " & toFullPath(g.config, mainFileIdx))
return
# Compute the global live set so that the top-level routine listing can be
# filtered (see `ccgstmts.genStmts`). On analysis failure everything stays
# alive — demand-driven `genProc` makes this a size optimization only.
var dceStats = DceStats()
if not isDefined(g.config, "icNoDce"):
g.icDceEnabled = computeLiveSymbols(g.config, nifFiles, g.icLiveNames, dceStats)
# Set up backend modules for all modules that need code generation
for m in modules:
discard setupNifBackendModule(g, m.module)
@@ -157,9 +169,25 @@ proc generateCode*(g: ModuleGraph; mainFileIdx: FileIndex) =
if mainModule != nil:
finishModule g, mainModule
if g.icDceEnabled and isDefined(g.config, "icDceCheck"):
var misses: seq[string] = @[]
for n in g.icDceMisses: misses.add n
sort misses
for n in misses:
stderr.writeLine "[icDce] MISS (generated on demand, not marked live): " & n
stderr.writeLine "[icDce] live: " & $g.icLiveNames.len & " misses: " & $misses.len &
" modules: " & $nifFiles.len
stderr.writeLine "[icDce] instances: " & $dceStats.instances &
" unique: " & $dceStats.uniqueInstances &
" mergeable: " & $(dceStats.instances - dceStats.uniqueInstances)
# Write C files
cgenWriteModules(g.backend, g.config)
if isDefined(g.config, "icDceCheck") and g.icCnifFiles.len > 0:
stderr.writeLine "[icDceC] cdefs: " & $g.icCDefs & " live: " & $g.icCLiveDefs &
" dropped: " & $g.icCDropped
# Run C compiler
if g.config.cmd != cmdTcc:
extccomp.callCCompiler(g.config)

View File

@@ -463,6 +463,10 @@ proc generateInstance(c: PContext, fn: PSym, pt: LayeredIdTable,
# This is needed for cyclic module dependencies where generic instances
# may be created in one module but referenced from another.
logGenericInstance(c.graph, result)
# Under IC the instance's NIF name must be canonical across modules:
# derive its `disamb` from the instantiation identity (generic +
# concrete types) instead of the per-module counter.
setInstanceDisamb(c.graph, result, fn, entry.concreteTypes)
# bug #12985 bug #22913
# TODO: use the context of the declaration of generic functions instead
# TODO: consider fixing options as well