IC: progress

This commit is contained in:
Araq
2026-06-08 13:51:02 +02:00
parent d9e28aac8e
commit 69280d6d75
7 changed files with 187 additions and 71 deletions

View File

@@ -36,6 +36,13 @@ proc setupProgram*(config: ConfigRef; cache: IdentCache) =
when not defined(nimKochBootstrap):
program = createDecodeContext(config, cache)
proc setIcMainModule*(fileIdx: FileIndex) =
## Tells the IC loader which module is being compiled fresh, so that
## re-exports of that module's symbols by dependencies are not loaded as
## duplicate stubs.
when not defined(nimKochBootstrap):
ast2nif.setMainModule(program, fileIdx)
template loadSym(s: PSym) =
## Loads a symbol from NIF file if it's in Partial state.
when not defined(nimKochBootstrap):

View File

@@ -161,14 +161,18 @@ type
#writtenSyms: seq[PSym] # symbols written in this module, to be unloaded later
writtenPackages: HashSet[string]
const
# Symbol kinds that are always local to a proc and should never have module suffix
skLocalSymKinds = {skParam, skForVar, skResult, skTemp}
proc isLocalSym(sym: PSym): bool {.inline.} =
sym.kindImpl in skLocalSymKinds or
(sym.kindImpl in {skVar, skLet} and {sfGlobal, sfThread} * sym.flagsImpl == {} and
(sym.ownerFieldImpl == nil or sym.ownerFieldImpl.kindImpl != skModule))
## Params (and generic params, see `writeSymDef`) are emitted as *global*
## (module-suffixed) names so their `sdef` gets an index entry and is
## resolvable by index lookup even when referenced from a different index entry
## than the one that physically contains the definition: generic params of a
## forward declaration vs its implementation, and proc-type params shared
## between an enclosing proc and a nested object's proc-type field. The
## per-module `disamb` counter keeps `name.disamb.module` unique, so this is
## clash-free. The kinds below are only ever used within the single index entry
## of their owning routine, so they stay local (smaller index, and the codegen
## name mangler only handles real module owners).
false
proc toNifSymName(w: var Writer; sym: PSym): string =
## Generate NIF name for a symbol: local names are `ident.disamb`,
@@ -301,24 +305,6 @@ proc writeLib(w: var Writer; dest: var TokenBuf; lib: PLib) =
dest.addStrLit lib.name
writeNode w, dest, lib.path
proc collectGenericParams(w: var Writer; n: PNode) =
## Pre-collect generic param symbols into w.locals before writing the type.
## This ensures generic params get consistent short names, and their sdefs
## are written in the type (where lazy loading can find them).
if n == nil: return
case n.kind
of nkSym:
if n.sym != nil and w.inProc > 0:
w.locals.incl(n.sym.itemId)
of nkIdentDefs, nkVarTuple:
for i in 0 ..< max(0, n.len - 2):
collectGenericParams(w, n[i])
of nkGenericParams:
for child in n:
collectGenericParams(w, child)
else:
discard
proc writeSymDef(w: var Writer; dest: var TokenBuf; sym: PSym) =
dest.addParLe sdefTag, trLineInfo(w, sym.infoImpl)
dest.addSymDef pool.syms.getOrIncl(w.toNifSymName(sym)), NoLineInfo
@@ -353,14 +339,12 @@ proc writeSymDef(w: var Writer; dest: var TokenBuf; sym: PSym) =
writeLib(w, dest, sym.annexImpl)
# For routine symbols, pre-collect generic params into w.locals before writing
# the type. This ensures they get consistent short names, and their sdefs are
# written in the type where lazy loading can find them via extractLocalSymsFromTree.
if sym.kindImpl in routineKinds and sym.astImpl != nil and sym.astImpl.len > genericParamsPos:
inc w.inProc
collectGenericParams(w, sym.astImpl[genericParamsPos])
dec w.inProc
# Generic params are written as *global* symbols (with a module suffix) so that
# they get their own index entries and can be looked up lazily. This matters for
# generic routines that have a separate forward declaration and implementation:
# the two share the same generic param symbols, but each is serialized as its own
# index entry. If the params were local, a reference from the implementation's
# entry could not resolve the sdef emitted in the forward declaration's entry.
writeType(w, dest, sym.typImpl)
writeSym(w, dest, sym.ownerFieldImpl)
# Store the AST for routine symbols and constants
@@ -433,9 +417,10 @@ template withNode(w: var Writer; dest: var TokenBuf; n: PNode; body: untyped) =
dest.addParRi
proc addLocalSym(w: var Writer; n: PNode) =
## Add symbol from a node to locals set if it's a symbol node
if n != nil and n.kind == nkSym and n.sym != nil and w.inProc > 0:
w.locals.incl(n.sym.itemId)
## Previously forced proc-local symbols to be written without a module suffix.
## All symbols are now emitted as global (see `isLocalSym`), so `w.locals` is
## intentionally left empty.
discard
proc addLocalSyms(w: var Writer; n: PNode) =
case n.kind
@@ -819,6 +804,8 @@ type
symCounter: int32
index: Table[string, NifIndexEntry] # Simple embedded index for offsets
suffix: string
contentStart: int # stream offset of the module body, so a full-AST load can
# rewind after lazy symbol loads moved the cursor
DecodeContext* = object
infos: LineInfoWriter
@@ -827,11 +814,20 @@ type
syms: Table[string, (PSym, NifIndexEntry)]
mods: Table[FileIndex, NifModule]
cache: IdentCache
mainModuleSuffix: string
## Mangled module name of the module being compiled fresh (cmdM). Symbols
## belonging to it that are re-exported by a dependency must NOT be loaded
## as stubs, otherwise they collide with the freshly compiled originals.
proc createDecodeContext*(config: ConfigRef; cache: IdentCache): DecodeContext =
## Supposed to be a global variable
result = DecodeContext(infos: LineInfoWriter(config: config), cache: cache)
proc setMainModule*(c: var DecodeContext; fileIdx: FileIndex) =
## Records the module that is being compiled fresh so that re-exports of its
## own symbols by dependencies are not turned into duplicate stubs.
c.mainModuleSuffix = modname(fileIdx.int, c.infos.config)
proc cursorFromIndexEntry(c: var DecodeContext; module: FileIndex; entry: NifIndexEntry;
buf: var TokenBuf): Cursor =
let s = addr c.mods[module].stream
@@ -895,7 +891,10 @@ proc moduleId(c: var DecodeContext; suffix: string; flags: set[LoadFlag] = {}):
"whose NIF file hasn't been written yet."
var stream = nifstreams.open(modFile)
let index = readEmbeddedIndex(stream)
c.mods[result] = NifModule(stream: stream, index: index, suffix: suffix)
# `readEmbeddedIndex` leaves the cursor at the start of the module body.
let contentStart = offset(stream.r)
c.mods[result] = NifModule(stream: stream, index: index, suffix: suffix,
contentStart: contentStart)
proc getOffset(c: var DecodeContext; module: FileIndex; nifName: string): NifIndexEntry =
let ii = addr c.mods[module].index
@@ -927,8 +926,19 @@ proc createTypeStub(c: var DecodeContext; t: SymId): PType =
if i < name.len and name[i] == '.': inc i
let suffix = name.substr(i)
let id = ItemId(module: moduleId(c, suffix).int32, item: itemId)
let offs = c.getOffset(id.module.FileIndex, name)
result = PType(itemId: id, uniqueId: id, kind: TTypeKind(k), state: Partial)
let ii = addr c.mods[id.module.FileIndex].index
let offs = ii[].getOrDefault(name)
if offs.offset == 0 and k == ord(tyNone):
# A `tyNone` placeholder (e.g. the type of a symbol-choice node) that is
# not present in its owning module's index. Such types are copied during
# template/generic instantiation in another module but keep their original
# owner, so the owner never serialised them. They carry no information, so
# synthesise a fresh, fully-loaded empty type instead of failing.
result = PType(itemId: id, uniqueId: id, kind: TTypeKind(k), state: Complete)
else:
if offs.offset == 0:
raiseAssert "symbol has no offset: " & name
result = PType(itemId: id, uniqueId: id, kind: TTypeKind(k), state: Partial)
c.types[name] = (result, offs)
proc extractLocalSymsFromTree(c: var DecodeContext; n: var Cursor; thisModule: string;
@@ -1544,6 +1554,25 @@ proc loadImport(c: var DecodeContext; s: var Stream; deps: var seq[ModuleSuffix]
else:
raiseAssert "expected ParRi but got " & $tok.kind
proc addReexportedEnumFields(c: var DecodeContext; sym: PSym; interf: var TStrTable) =
## When a non-pure enum type is (re-)exported, its fields must also become
## visible (unqualified) to importers. In a from-source build this happens via
## `rawImportSymbol`'s enum handling when the type is imported; the lazy IC
## importer never runs that, so we materialise the fields into the interface
## here, when the export list is processed.
loadSym(c, sym)
if sym.kindImpl != skType or sfPure in sym.flagsImpl: return
let et = sym.typImpl
if et == nil: return
loadType(c, et)
if et.kind notin {tyEnum, tyBool}: return
let fields = et.nImpl
if fields == nil: return
for i in 0 ..< fields.len:
let f = fields[i]
if f != nil and f.kind == nkSym and f.sym != nil:
strTableAdd(interf, f.sym)
proc processTopLevel(c: var DecodeContext; s: var Stream; flags: set[LoadFlag];
interf: var TStrTable; suffix: string; module: int): PrecompiledModule =
result = PrecompiledModule(topLevel: newNode(nkStmtList))
@@ -1601,9 +1630,15 @@ proc processTopLevel(c: var DecodeContext; s: var Stream; flags: set[LoadFlag];
while true:
if t.kind == Symbol:
let symAsStr = pool.syms[t.symId]
let sym = resolveSym(c, symAsStr, false)
if sym != nil:
strTableAdd(interf, sym)
# Skip symbols that are re-exported by this dependency but actually
# belong to the module we are compiling fresh: loading them as stubs
# would shadow/collide with the freshly compiled originals.
if c.mainModuleSuffix.len == 0 or
parseSymName(symAsStr).module != c.mainModuleSuffix:
let sym = resolveSym(c, symAsStr, false)
if sym != nil:
strTableAdd(interf, sym)
addReexportedEnumFields(c, sym, interf)
t = next(s)
elif t.kind == ParRi:
break
@@ -1636,8 +1671,11 @@ proc loadNifModule*(c: var DecodeContext; suffix: ModuleSuffix; interf, interfHi
let module = moduleId(c, string(suffix), flags)
# Load the module AST (or just replay actions if loadFullAst is false)
# processTopLevel also collects export instructions
# processTopLevel also collects export instructions.
# Lazy symbol loading may have moved the stream cursor since the module was
# opened, so rewind to the start of the module body before reading it.
let s = addr c.mods[module].stream
s[].r.jumpTo(c.mods[module].contentStart)
var t = next(s[])
if t.kind == ParLe and pool.tags[t.tagId] == toNifTag(nkStmtList):
t = next(s[]) # skip (stmts

View File

@@ -302,6 +302,51 @@ proc whenMarkerHolds(c: DepContext; s: var Stream): bool =
if v == "false": result = false
# else (true / unknown ident): keep result
proc parseImportPath(s: var Stream; t: var PackedToken): seq[string] =
## Parse an import path expression and return the list of module paths it
## refers to. Handles plain idents (`foo`), string literals, `std/foo`
## infixes (including nested ones like `std/private/since`) and bracketed
## groups like `std/[bitops, fenv]` which expand to several imports.
## On entry `t` is the first token of the expression; on exit `t` is the
## token immediately following the whole expression.
result = @[]
case t.kind
of Ident:
result.add pool.strings[t.litId]
t = next(s)
of StringLit:
result.add pool.strings[t.litId]
t = next(s)
of ParLe:
let tag = pool.tags[t.tagId]
if tag == "infix":
t = next(s) # skip 'infix' tag
if t.kind == Ident: t = next(s) # skip the operator (`/`)
let left = parseImportPath(s, t)
let right = parseImportPath(s, t)
let prefix = if left.len == 1: left[0] else: ""
for r in right:
if prefix.len > 0: result.add prefix & "/" & r
else: result.add r
if t.kind == ParRi: t = next(s) # skip closing ')'
elif tag == "bracket":
t = next(s) # skip 'bracket' tag
while t.kind != ParRi and t.kind != EofToken:
result.add parseImportPath(s, t)
if t.kind == ParRi: t = next(s) # skip closing ')'
else:
# Unknown subtree: skip it entirely.
var depth = 1
t = next(s)
while depth > 0 and t.kind != EofToken:
if t.kind == ParLe: inc depth
elif t.kind == ParRi: dec depth
if depth == 0: break
t = next(s)
if t.kind == ParRi: t = next(s)
else:
t = next(s)
proc readDepsFile(c: var DepContext; pair: FilePair; current: Node) =
## Read a .deps.nif file and process imports/includes
let depsPath = c.depsFile(pair)
@@ -344,33 +389,32 @@ proc readDepsFile(c: var DepContext; pair: FilePair; current: Node) =
elif n.kind == EofToken: break
t = next(s)
continue
# Handle path expression (could be ident, string, or infix like std/foo)
var importPath = ""
if t.kind == Ident:
importPath = pool.strings[t.litId]
elif t.kind == StringLit:
importPath = pool.strings[t.litId]
elif t.kind == ParLe and pool.tags[t.tagId] == "infix":
# Handle std / foo style imports
t = next(s) # skip infix tag
if t.kind == Ident: # operator (/)
t = next(s)
if t.kind == Ident: # first part (std)
importPath = pool.strings[t.litId]
t = next(s)
if t.kind == Ident: # second part (foo)
importPath = importPath & "/" & pool.strings[t.litId]
if importPath.len > 0:
if tag == "include":
processInclude(c, importPath, current)
else:
processImport(c, importPath, current)
# Skip to end of node
# Process the path expression(s). Each path supports plain idents,
# string literals, `std/foo` infixes (possibly nested, e.g.
# `std/private/since`) and bracketed groups like `std/[bitops, fenv]`
# that expand to several imports. A plain `import a, b, c` lists several
# modules as siblings; a `fromimport` has a single path followed by the
# imported symbol list, which must not be treated as modules.
if tag == "fromimport":
for importPath in parseImportPath(s, t):
if importPath.len > 0:
processImport(c, importPath, current)
else:
while t.kind != ParRi and t.kind != EofToken:
for importPath in parseImportPath(s, t):
if importPath.len > 0:
if tag == "include":
processInclude(c, importPath, current)
else:
processImport(c, importPath, current)
# Drain any remaining tokens of this node (e.g. the symbol list of a
# `fromimport`), up to and including the node's closing ')'.
var depth = 1
while depth > 0:
t = next(s)
while depth > 0 and t.kind != EofToken:
if t.kind == ParLe: inc depth
elif t.kind == ParRi: dec depth
if depth == 0: break
t = next(s)
else:
# Skip unknown node
var depth = 1

View File

@@ -54,6 +54,19 @@ proc mangleParamExt*(s: PSym): string =
proc mangleProcNameExt*(graph: ModuleGraph, s: PSym): string =
result = "__"
result.add graph.ifaces[s.itemId.module].uniqueName
# Under incremental compilation the main module is registered at its source
# file index, but its symbols are keyed by the NIF-suffix file index, which has
# no `ifaces` slot. Only the main module has this gap (dependencies are
# registered at their suffix index), so omitting the unique name for it stays
# collision-free: the mangled base name plus `disamb` already disambiguate.
if s.itemId.module >= 0 and s.itemId.module < graph.ifaces.len:
result.add graph.ifaces[s.itemId.module].uniqueName
result.add "_u"
result.addInt s.itemId.item # s.disamb #
# Use `disamb` rather than `itemId.item`: under incremental compilation a
# symbol loaded from a NIF file gets a fresh, load-order-dependent `itemId.item`
# (from the per-module symbol counter), which is neither stable across the
# processes that compile vs. use a module nor guaranteed distinct from another
# loaded symbol's. `disamb` is assigned deterministically per (module, name)
# and, together with the already-prepended mangled name, yields a unique and
# stable C identifier.
result.addInt s.disamb

View File

@@ -375,6 +375,9 @@ proc compilePipelineProject*(graph: ModuleGraph; projectFileIdx = InvalidFileIdx
elif graph.config.cmd == cmdM:
# For cmdM: load system.nim from NIF first, then compile the main module
connectPipelineCallbacks(graph)
# Record the main module so the IC loader won't materialise duplicate stubs
# for its own symbols when a dependency (e.g. system) re-exports them.
setIcMainModule(projectFile)
graph.config.m.systemFileIdx = fileInfoIdx(graph.config,
graph.config.libpath / RelativeFile"system.nim")
when not defined(nimKochBootstrap):

View File

@@ -512,6 +512,10 @@ proc semArrayIndex(c: PContext, n: PNode): PType =
if c.inGenericContext > 0: result.incl tfUnresolved
else:
result = e.typ.skipTypes({tyTypeDesc})
if result.state == Sealed:
# The index type was loaded from the IC cache and must not be mutated
# in place; work on a copy so we can mark it as an implicit static.
result = copyType(result, c.idgen, getCurrOwner(c))
result.incl tfImplicitStatic
elif e.kind in (nkCallKinds + {nkBracketExpr}) and hasUnresolvedArgs(c, e):
if not isOrdinalType(e.typ.skipTypes({tyStatic, tyAlias, tyGenericInst, tySink})):

View File

@@ -808,7 +808,11 @@ proc replaceTypeVarsTAux(cl: var TReplTypeVars, t: PType, isInstValue = false):
if t.kind == tyRef and t.hasElementType and t.elementType.kind == tyObject and t.elementType.n != nil:
discard replaceObjBranches(cl, t.elementType.n)
elif result.n != nil and t.kind == tyObject:
elif result.n != nil and t.kind == tyObject and result.state != Sealed:
# A type loaded from the IC cache already had its object branches
# resolved when it was originally compiled, and must not be mutated in
# place (nor copied, which would break object-inheritance identity), so
# only non-Sealed types are processed here.
# Invalidate the type size as we may alter its structure
result.size = -1
result.n = replaceObjBranches(cl, result.n)
@@ -860,7 +864,10 @@ proc recomputeFieldPositions*(t: PType; obj: PNode; currPosition: var int) =
for i in 1..<obj.len:
recomputeFieldPositions(nil, lastSon(obj[i]), currPosition)
of nkSym:
obj.sym.position = currPosition
# A field loaded from the IC cache is already at its final position and must
# not be mutated; only freshly instantiated fields need (re)positioning.
if obj.sym.state != Sealed:
obj.sym.position = currPosition
inc currPosition
else: discard "cannot happen"