Files
Nim/compiler/cnif.nim
2026-06-12 06:05:31 +02:00

349 lines
12 KiB
Nim

#
#
# The Nim Compiler
# (c) Copyright 2026 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## The "cnif" artifact: the C code generator's output as a NIF file.
##
## This is deliberately *not* NIFC: the C text is kept verbatim (Nim's
## C-level machinery — exception handling in particular — is more refined
## than what NIFC models today; the gap can be closed incrementally later).
## The only structure the artifact adds is the part dead code elimination
## and generic-instance merging need:
##
## - raw C text as string literals
## - every *global* entity's C name as a `Symbol` token
## - every emitted proc definition as a `(cdef SymbolDef flags ...)` group
##
## The C generator marks names with control characters at the single place
## a global's C name is minted (`fillBackendName`) and emits a definition
## directive at the single place finished procs are appended; the marks then
## ride through all of the snippet composition untouched. This module turns
## the final marked module text into the `.c.nif` artifact and strips the
## marks for the actual `.c` output. Rendering C from the artifact is a
## plain token walk: string literals verbatim, symbols by name — which is
## also where a later merge step redirects losing generic instances.
##
## Marker scheme (cannot collide: C string literals escape control chars,
## and `\1`/`\31`/`\23` of cgen's postprocess directives are distinct):
## \2 name \3 a global's C name
## \4 name \31 flags \5 start of the definition of `name`
## \4 \5 end of the definitions section
import std / [tables, sets, os, assertions, syncio]
import "../dist/nimony/src/lib" / [nifbuilder, nifcoreparse]
const
CnifSymStart* = '\2'
CnifSymEnd* = '\3'
CnifDefStart* = '\4'
CnifDefSep* = '\31' # same separator char as cgen's postprocess directives
CnifDefEnd* = '\5'
proc markCName*(name: string): string {.inline.} =
CnifSymStart & name & CnifSymEnd
proc hasCnifMarks*(s: string): bool =
for c in s:
if c in {CnifSymStart, CnifSymEnd, CnifDefStart}: return true
false
proc stripCnifMarks*(s: string): string =
## Removes the symbol marks (keeping the names) and the definition
## directives (entirely) so the result is plain C.
if not hasCnifMarks(s): return s
result = newStringOfCap(s.len)
var i = 0
while i < s.len:
case s[i]
of CnifSymStart, CnifSymEnd:
inc i
of CnifDefStart:
while i < s.len and s[i] != CnifDefEnd: inc i
inc i # skip CnifDefEnd
else:
result.add s[i]
inc i
proc cnifDefDirective*(name, flags: string): string =
CnifDefStart & name & CnifDefSep & flags & CnifDefEnd
proc cnifEndDefs*(): string =
CnifDefStart & CnifDefEnd
proc writeCnifArtifact*(code: string; outfile: string;
initRequired = false; datInitRequired = false;
dataDefs: openArray[string] = [];
semmedNif = ""; moduleBase = "") =
## Splits the marked module text into the `.c.nif` artifact.
## The artifact starts with a `(meta <flags> "semmedNif" "moduleBase")`
## head — whether the module has an init/datInit proc ('i'/'d'), which
## semmed NIF it was generated from and the module's mangled base name
## (what `registerModuleToMain` and the reuse decision need when the TU
## is reused in a later run, possibly without the module ever being
## loaded again) — and a `(cdata <SymbolDef>*)` group naming the data
## definitions (consts, globals, RTTI) the TU embeds.
var b = nifbuilder.open(outfile)
b.withTree "stmts":
b.withTree "meta":
var metaFlags = ""
if initRequired: metaFlags.add 'i'
if datInitRequired: metaFlags.add 'd'
if metaFlags.len > 0: b.addIdent metaFlags
else: b.addEmpty
b.addStrLit semmedNif
b.addStrLit moduleBase
b.withTree "cdata":
for d in dataDefs:
b.addSymbolDef d
var raw = ""
var inDef = false
template flushRaw() =
if raw.len > 0:
b.addStrLit raw
raw.setLen 0
var i = 0
while i < code.len:
case code[i]
of CnifSymStart:
flushRaw()
inc i
var name = ""
while i < code.len and code[i] != CnifSymEnd:
name.add code[i]
inc i
inc i # skip CnifSymEnd
b.addSymbol name, ""
of CnifDefStart:
flushRaw()
inc i
var payload = ""
while i < code.len and code[i] != CnifDefEnd:
payload.add code[i]
inc i
inc i # skip CnifDefEnd
if inDef:
b.endTree()
inDef = false
if payload.len > 0:
let sep = find(payload, CnifDefSep)
let name = if sep >= 0: payload[0..<sep] else: payload
let flags = if sep >= 0: payload[sep+1..^1] else: ""
b.addTree "cdef"
b.addSymbolDef name
if flags.len > 0: b.addIdent flags
else: b.addEmpty
inDef = true
else:
raw.add code[i]
inc i
flushRaw()
if inDef:
b.endTree()
b.close()
proc renderMarkedC*(code: string; live: HashSet[string]; dropped: var int): string =
## Renders the final C text from the marked module text: symbol marks are
## removed (keeping the names — a later merge step substitutes them here),
## and definitions whose name is not in `live` are dropped entirely. Each
## definition is self-delimiting (genProcAux emits an end directive right
## after the proc's text), so text written by other emitters is never part
## of a definition's span and survives unconditionally.
result = newStringOfCap(code.len)
var i = 0
while i < code.len:
case code[i]
of CnifSymStart, CnifSymEnd:
inc i
of CnifDefStart:
var payload = ""
inc i
while i < code.len and code[i] != CnifDefEnd:
payload.add code[i]
inc i
inc i # skip CnifDefEnd
if payload.len > 0:
let sep = find(payload, CnifDefSep)
let name = if sep >= 0: payload[0..<sep] else: payload
if name notin live:
inc dropped
# drop the definition's text: everything up to its end directive
while i < code.len and code[i] != CnifDefStart: inc i
else:
result.add code[i]
inc i
# ---- Liveness over the artifact -------------------------------------------
proc symOrIdentName(c: Cursor): string {.inline.} =
if c.kind == Ident: strVal(c) else: symName(c)
type
CnifHeads* = object
## The cheap-to-parse part of an artifact that a later run needs in
## order to reuse the TU without regenerating it.
valid*: bool ## file parsed and carries the meta head
initRequired*: bool
datInitRequired*: bool
semmedNif*: string ## the semmed NIF this TU was generated from
moduleBase*: string ## the module's mangled base name
cdefs*: seq[string] ## C names of the proc definitions
cdata*: seq[string] ## C names of the data definitions
proc readCnifHeads*(f: string): CnifHeads =
## Reads `(meta ...)`, `(cdata ...)` and the `(cdef ...)` head names from
## an artifact. Artifacts written before the meta head report `valid=false`.
result = CnifHeads()
if not fileExists(f): return
var pool = newPool()
var tags = newTagPool()
let stmtsTag = tags.registerTag("stmts")
let cdefTag = tags.registerTag("cdef")
let cdataTag = tags.registerTag("cdata")
let metaTag = tags.registerTag("meta")
var buf = parseFromFile(f, 1000, pool, tags)
var c = beginRead(buf)
if c.kind != TagLit or c.cursorTagId != stmtsTag:
endRead(c)
return
c.loopInto:
if c.kind == TagLit:
if c.cursorTagId == metaTag:
result.valid = true
var strIdx = 0
c.loopInto:
if c.kind == Ident:
for ch in strVal(c):
if ch == 'i': result.initRequired = true
elif ch == 'd': result.datInitRequired = true
inc c
elif c.kind == StrLit:
if strIdx == 0: result.semmedNif = strVal(c)
elif strIdx == 1: result.moduleBase = strVal(c)
inc strIdx
inc c
else:
skip c
elif c.cursorTagId == cdataTag:
c.loopInto:
if c.kind == SymbolDef:
result.cdata.add symName(c)
inc c
else:
skip c
elif c.cursorTagId == cdefTag:
c.loopInto:
if c.kind == SymbolDef:
result.cdefs.add symName(c)
inc c
else:
skip c
else:
skip c
else:
skip c
endRead(c)
type
CnifLiveness* = object
defs*: int ## proc definitions emitted across all modules
liveDefs*: int ## of those, reachable from the roots
live*: HashSet[string] ## live C names
broken*: bool
proc computeLiveFromCArtifacts*(files: openArray[string]): CnifLiveness =
## dce1-style mark&sweep over the C-shaped artifacts: a `(cdef ...)`
## group is a definition (flags 'x'/'c'/'m' — exportc, compilerproc,
## method/dispatcher — make it a root), names at the top level (data,
## globals, init code) are roots, names inside a group are its uses.
## Because the artifact is *fully lowered* output, no conservative
## modelling is needed: every call the C code contains is a token here.
##
## NB: mangled C names contain no dots, so NIF's text reader classifies
## them as `Ident` rather than `Symbol`; the dialect therefore treats
## Ident tokens as name uses. Inside a `(cdef ...)` the flags ident is
## the one immediately following the SymbolDef; everything after is a use.
result = CnifLiveness(live: initHashSet[string]())
var pool = newPool()
var tags = newTagPool()
let stmtsTag = tags.registerTag("stmts")
let cdefTag = tags.registerTag("cdef")
let cdataTag = tags.registerTag("cdata")
let metaTag = tags.registerTag("meta")
var uses = initTable[string, HashSet[string]]()
var roots = initHashSet[string]()
var defs = initHashSet[string]()
for f in files:
if not fileExists(f):
result.broken = true
return
var buf = parseFromFile(f, 1000, pool, tags)
var c = beginRead(buf)
if c.kind != TagLit or c.cursorTagId != stmtsTag:
result.broken = true
endRead(c)
return
c.loopInto:
case c.kind
of Symbol, Ident:
roots.incl symOrIdentName(c)
inc c
of TagLit:
if c.cursorTagId == metaTag or c.cursorTagId == cdataTag:
# bookkeeping for TU reuse, irrelevant for liveness
skip c
elif c.cursorTagId == cdefTag:
var owner = ""
var flagsSeen = false
c.loopInto:
case c.kind
of SymbolDef:
owner = symName(c)
defs.incl owner
flagsSeen = false
inc c
of Symbol, Ident:
let name = symOrIdentName(c)
if not flagsSeen:
# the flags field right after the SymbolDef
flagsSeen = true
for ch in name:
if ch in {'x', 'c', 'm'}:
roots.incl owner
break
else:
uses.mgetOrPut(owner, initHashSet[string]()).incl name
inc c
of DotToken:
flagsSeen = true # empty flags field
inc c
else:
skip c
else:
c.loopInto:
if c.kind in {Symbol, Ident}:
roots.incl symOrIdentName(c)
inc c
else:
skip c
else:
skip c
endRead(c)
# mark & sweep
var work = newSeqOfCap[string](roots.len)
for r in roots: work.add r
while work.len > 0:
let s = work.pop()
if not result.live.containsOrIncl(s):
if uses.hasKey(s):
for dep in uses[s]:
if dep notin result.live:
work.add dep
result.defs = defs.len
for d in defs:
if d in result.live: inc result.liveDefs