From 4c8de3bcb8866bc07172087be48039d52ce1aa20 Mon Sep 17 00:00:00 2001 From: Araq Date: Tue, 9 Jun 2026 16:12:38 +0200 Subject: [PATCH] progress --- compiler/ast.nim | 10 ++- compiler/ast2nif.nim | 13 +++- compiler/ccgexprs.nim | 6 +- compiler/commands.nim | 11 +++- compiler/deps.nim | 144 ++++++++++++++++++++++++++++++++++------- compiler/options.nim | 7 ++ compiler/parser.nim | 5 +- compiler/pipelines.nim | 22 +++++-- compiler/typekeys.nim | 29 ++++++++- compiler/vmops.nim | 4 +- koch.nim | 37 +++++++++++ 11 files changed, 249 insertions(+), 39 deletions(-) diff --git a/compiler/ast.nim b/compiler/ast.nim index a1c6b797e2..d9cfe11300 100644 --- a/compiler/ast.nim +++ b/compiler/ast.nim @@ -455,9 +455,13 @@ var gconfig {.threadvar.}: Gconfig proc setUseIc*(useIc: bool) = gconfig.useIc = useIc proc comment*(n: PNode): string = - if nfHasComment in n.flags and not gconfig.useIc: - # IC doesn't track comments, see `packed_ast`, so this could fail - result = gconfig.comments[n.nodeId] + if nfHasComment in n.flags: + # NIF-based IC doesn't serialize comments, but the comment table is keyed by + # the node's address (`nodeId`), which is unique among live nodes; a loaded + # node that carries `nfHasComment` simply has no entry here (its comment was + # set in another process), so `getOrDefault` safely returns "" for it while + # in-process VM macro nodes (e.g. newCommentStmtNode) still round-trip. + result = gconfig.comments.getOrDefault(n.nodeId) else: result = "" diff --git a/compiler/ast2nif.nim b/compiler/ast2nif.nim index 8fff523a0e..2ac6963a02 100644 --- a/compiler/ast2nif.nim +++ b/compiler/ast2nif.nim @@ -615,9 +615,16 @@ proc writeNode(w: var Writer; dest: var TokenBuf; n: PNode; forAst = false) = w.withNode dest, ast: for i in 0 ..< ast.len: if i == paramsPos and skipParams: - # Parameter are redundant with s.typ.n and even dangerous as for generic instances - # we do not adapt the symbols properly - addDotToken(dest) + # Parameters are redundant with s.typ.n (and re-emitting their syms + # is dangerous for generic instances — we do not adapt the symbols + # properly). Emit an `nkEmpty` placeholder rather than a dot token: + # a dot loads back as a `nil` son, but ast children must be real + # nodes — the loaded routine ast is walked by passes (lambdalifting, + # liftdestructors, transf) that dereference `ast[paramsPos]`, and + # `nkEmpty` is the canonical empty slot. The actual params are + # recovered from `sym.typ.n` where needed. + dest.addParLe pool.tags.getOrIncl(toNifTag(nkEmpty)), NoLineInfo + dest.addParRi else: writeNode(w, dest, ast[i], forAst) dec w.inProc diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 32e39e5f9f..a0d74be6c2 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -3555,12 +3555,16 @@ proc expr(p: BProc, n: PNode, d: var TLoc) = of skProc, skConverter, skIterator, skFunc: #if sym.kind == skIterator: # echo renderTree(sym.getBody, {renderIds}) - if isGenericRoutineStrict(sym) or sfCompileTime in sym.flags: + if isGenericRoutineStrict(sym) or sfCompileTime in sym.flags or + (sym.kind == skIterator and sym.typ.callConv == ccInline): # Under IC a module's top-level routine definitions are serialized as bare # symbol references that reappear in the loaded statement list. Uninstantiated # generic routines (incl. those with type-class params like `tuple`) and # `.compileTime` routines have no run-time code, so skip them here. (A real # run-time *use* of a `.compileTime` proc is still rejected at the call site.) + # Inline iterators likewise have no standalone code — they are always inlined + # at their for-loop call sites by the transformer (only closure iterators get + # a standalone C function), so a bare serialized def reference is a no-op. return if delayedCodegen(p.module) and sym.typ.callConv != ccInline: fillProcLoc(p.module, n) diff --git a/compiler/commands.nim b/compiler/commands.nim index 64821a4743..9e9349f1d1 100644 --- a/compiler/commands.nim +++ b/compiler/commands.nim @@ -24,7 +24,7 @@ bootSwitch(usedMarkAndSweep, defined(gcmarkandsweep), "--gc:markAndSweep") bootSwitch(usedGoGC, defined(gogc), "--gc:go") bootSwitch(usedNoGC, defined(nogc), "--gc:none") -import std/[setutils, os, strutils, parseutils, parseopt, sequtils, strtabs, enumutils] +import std/[setutils, sets, os, strutils, parseutils, parseopt, sequtils, strtabs, enumutils] import msgs, options, nversion, condsyms, extccomp, platform, wordrecg, nimblecmd, lineinfos, pathutils @@ -928,6 +928,15 @@ proc processSwitch*(switch, arg: string, pass: TCmdLinePass, info: TLineInfo; # real entry point so that `isMainModule` and `when isMainModule:` resolve # correctly even though every module is compiled with `sfMainModule` set. conf.isMainModule = switchOn(arg) + of "icgroup": + # `nim m` only: register a module that belongs to the current strongly- + # connected import group, so it is compiled from source (not loaded from a + # precompiled NIF) and gets its own NIF written. `deps.nim` emits one + # `--icGroup:` per member of a dependency cycle. The argument is an + # absolute .nim path produced by the dependency scanner. + expectArg(conf, switch, arg, pass, info) + if pass in {passCmd2, passPP}: + conf.icGroup.incl(canonicalizePath(conf, AbsoluteFile arg).string) of "import": expectArg(conf, switch, arg, pass, info) if pass in {passCmd2, passPP}: diff --git a/compiler/deps.nim b/compiler/deps.nim index 1e1db907b5..0c7c2bb517 100644 --- a/compiler/deps.nim +++ b/compiler/deps.nim @@ -10,7 +10,7 @@ ## Generate a .build.nif file for nifmake from a Nim project. ## This enables incremental and parallel compilation using the `m` switch. -import std / [os, tables, sets, times, osproc] +import std / [os, tables, sets, times, osproc, algorithm] import options, msgs, lineinfos, pathutils import "../dist/nimony/src/lib" / [nifstreams, bitabs, nifreader, nifbuilder] @@ -225,6 +225,19 @@ proc evalCondExpr(c: DepContext; s: var Stream): bool = if n.kind == ParLe: inc depth elif n.kind == ParRi: dec depth elif n.kind == EofToken: return + of "par": + # a parenthesised grouping such as `(defined(a) or defined(b))`: evaluate + # the inner expression. Without this, `par` fell through to the `else` + # branch below and evaluated to `true`, which silently inverted conditions + # like `not (defined(macosx) or defined(bsd))` and dropped real imports + # (e.g. `cpuinfo`'s conditional `import std/posix`). + result = evalCondExpr(c, s) + var depth = 1 + while depth > 0: + let n = next(s) + if n.kind == ParLe: inc depth + elif n.kind == ParRi: dec depth + elif n.kind == EofToken: return else: skipSubtree(s, t) result = true @@ -434,6 +447,58 @@ proc traverseDeps(c: var DepContext; pair: FilePair; current: Node) = return readDepsFile(c, pair, current) +proc computeSCCs(c: DepContext): seq[seq[int]] = + ## Tarjan's strongly-connected-components over the module dependency graph + ## (`node.deps`). Each returned component is a list of node indices; a module + ## that is not part of any import cycle yields a singleton component. Tarjan + ## emits components in reverse-topological order (a component's external + ## dependencies come out before it), which is exactly the order `nifmake` + ## needs for the per-group `nim m` build rules. + type Frame = object + v, pi: int + let n = c.nodes.len + var index = newSeq[int](n) + var lowlink = newSeq[int](n) + var onStack = newSeq[bool](n) + var visited = newSeq[bool](n) + var stack: seq[int] = @[] + var counter = 0 + result = @[] + + # Iterative Tarjan (explicit work stack) so a deep module-dependency chain + # cannot overflow the call stack. + for start in 0.. 0: + let v = work[^1].v + if work[^1].pi == 0: + visited[v] = true + index[v] = counter + lowlink[v] = counter + inc counter + stack.add v + onStack[v] = true + if work[^1].pi < c.nodes[v].deps.len: + let w = c.nodes[v].deps[work[^1].pi] + inc work[^1].pi + if not visited[w]: + work.add Frame(v: w, pi: 0) + elif onStack[w]: + lowlink[v] = min(lowlink[v], index[w]) + else: + if lowlink[v] == index[v]: + var comp: seq[int] = @[] + while true: + let w = stack.pop() + onStack[w] = false + comp.add w + if w == v: break + result.add comp + work.setLen work.len - 1 + if work.len > 0: + lowlink[work[^1].v] = min(lowlink[work[^1].v], lowlink[v]) + proc generateBuildFile(c: DepContext): string = ## Generate the .build.nif file for nifmake let nimcache = getNimcacheDir(c.config).string @@ -506,34 +571,67 @@ proc generateBuildFile(c: DepContext): string = b.endTree() b.endTree() - # Build rules for semantic checking (nim m) - for i in countdown(c.nodes.len - 1, 0): - let node = c.nodes[i] - let pair = node.files[0] + # Build rules for semantic checking (nim m). + # + # Modules are grouped into strongly-connected components: a module that is not + # in an import cycle is its own singleton group and compiles in its own + # `nim m ` invocation as before. A cycle (A imports B, B imports A) cannot + # be ordered for separate per-module compilation, so the whole component is + # handed to a single `nim m` invocation: the first member is the project file, + # every member is passed via `--icGroup:` so the compiler compiles them + # all from source in one process (resolving the recursion in-memory) and writes + # a NIF for each. Only dependencies *outside* the component become build-graph + # inputs — intra-component edges are produced by this very rule and listing + # them would reintroduce the cycle nifmake just rejected. + let sccs = computeSCCs(c) + var sccOf = newSeq[int](c.nodes.len) + for sccId, comp in sccs: + for nodeIdx in comp: sccOf[nodeIdx] = sccId + for comp in sccs: + # Representative (project file for this invocation) = smallest node id, so a + # component containing the root (node 0) is driven by the root. + var members = comp + members.sort() + let repPair = c.nodes[members[0]].files[0] + let isGroup = members.len > 1 b.addTree "do" b.addIdent "nim_m" + b.addTree "args" # The root module (node 0) is the program's real entry point; mark it so # `isMainModule` resolves to true only for it (every module otherwise gets # `sfMainModule` for NIF writing under `nim m`). - if i == 0: - b.withTree "args": - b.addStrLit "--isMainModule:on" - # Input: all parsed files for this module - b.withTree "input": - b.addStrLit node.files[0].nimFile - for f in node.files: - b.addTree "input" - b.addStrLit c.parsedFile(f) - b.endTree() - # Also depend on semmed files of dependencies - for depIdx in node.deps: - b.addTree "input" - b.addStrLit c.semmedFile(c.nodes[depIdx].files[0]) - b.endTree() - # Output: semmed file - b.addTree "output" - b.addStrLit c.semmedFile(pair) + if members[0] == 0: + b.addStrLit "--isMainModule:on" + # For a real cycle, tell the compiler which modules form the group so it + # compiles them all from source and writes each one's NIF. + if isGroup: + for m in members: + b.addStrLit "--icGroup:" & c.nodes[m].files[0].nimFile b.endTree() + # Input 0 (the project file passed to `nim m`): the representative's .nim. + b.withTree "input": + b.addStrLit repPair.nimFile + # All parsed files of every member (nifler outputs this group consumes). + for m in members: + for f in c.nodes[m].files: + b.addTree "input" + b.addStrLit c.parsedFile(f) + b.endTree() + # Depend only on the semmed files of dependencies *outside* this component. + var seenDep = initHashSet[string]() + for m in members: + for depIdx in c.nodes[m].deps: + if sccOf[depIdx] == sccOf[m]: continue # intra-component edge + let depSem = c.semmedFile(c.nodes[depIdx].files[0]) + if not seenDep.containsOrIncl(depSem): + b.addTree "input" + b.addStrLit depSem + b.endTree() + # Output: one semmed NIF per member. + for m in members: + b.addTree "output" + b.addStrLit c.semmedFile(c.nodes[m].files[0]) + b.endTree() b.endTree() # Final compilation step: generate executable from main module diff --git a/compiler/options.nim b/compiler/options.nim index b9715b7803..434f30f47f 100644 --- a/compiler/options.nim +++ b/compiler/options.nim @@ -380,6 +380,12 @@ type lastCmdTime*: float # when caas is enabled, we measure each command symbolFiles*: SymbolFilesOption ic*: bool # whether ic is enabled + icGroup*: HashSet[string] # under `nim m`: absolute paths of the modules in + # this strongly-connected import group. They are all + # compiled from source in one process (so mutual + # recursion resolves in-memory) and each gets its NIF + # written, instead of being loaded from a precompiled + # NIF. See `compiler/deps.nim` (SCC grouping). spellSuggestMax*: int # max number of spelling suggestions for typos cppDefines*: HashSet[string] # (*) @@ -588,6 +594,7 @@ proc newConfigRef*(): ConfigRef = arcToExpand: newStringTable(modeStyleInsensitive), m: initMsgConfig(), cppDefines: initHashSet[string](), + icGroup: initHashSet[string](), headerFile: "", features: {}, legacyFeatures: {}, configVars: newStringTable(modeStyleInsensitive), symbols: newStringTable(modeStyleInsensitive), diff --git a/compiler/parser.nim b/compiler/parser.nim index 32bf4b3d52..38f556efb2 100644 --- a/compiler/parser.nim +++ b/compiler/parser.nim @@ -54,7 +54,10 @@ import when not defined(nimCustomAst): import ast -else: +when defined(nimCustomAst): + # NOTE: explicit negated `when` rather than `else:` — nifler's dep scanner + # guards `when`/`elif` imports with their condition but emits `else:` imports + # unconditionally, which would wrongly schedule this module under `nim ic`. import plugins / customast import std/strutils diff --git a/compiler/pipelines.nim b/compiler/pipelines.nim index 8fe918a0a1..501b922b00 100644 --- a/compiler/pipelines.nim +++ b/compiler/pipelines.nim @@ -15,7 +15,7 @@ import ../dist/checksums/src/checksums/sha1 when not defined(leanCompiler): import jsgen, docgen2 -import std/[syncio, objectdollar, assertions, tables, strutils, strtabs] +import std/[syncio, objectdollar, assertions, tables, strutils, strtabs, sets] import renderer import ic/replayer @@ -243,9 +243,14 @@ proc processPipelineModule*(graph: ModuleGraph; module: PSym; idgen: IdGenerator when not defined(nimKochBootstrap): # For cmdM: only write NIF for the main module, not for imported modules - # (imported modules should be loaded from existing NIF files) + # (imported modules should be loaded from existing NIF files). Members of the + # current strongly-connected import group (`--icGroup`) are the exception: + # they are compiled from source here, so each must write its own NIF. let shouldWriteNif = (optCompress in graph.config.globalOptions) or - (graph.config.cmd == cmdM and sfMainModule in module.flags) + (graph.config.cmd == cmdM and + (sfMainModule in module.flags or + (graph.config.icGroup.len > 0 and + toFullPath(graph.config, module.position.FileIndex) in graph.config.icGroup))) if shouldWriteNif and not graph.config.isDefined("nimscript"): topLevelStmts.add finalNode # Collect replay actions from both pragma computations and VM state diff @@ -278,11 +283,18 @@ proc compilePipelineModule*(graph: ModuleGraph; fileIdx: FileIndex; flags: TSymF if result == nil: when not defined(nimKochBootstrap): # For cmdM: load imports from NIF files (but compile the main module from source) - # Skip when withinSystem is true (compiling system.nim itself) + # Skip when withinSystem is true (compiling system.nim itself). + # Also skip for members of the current strongly-connected import group + # (`--icGroup`): those are mutually recursive with the main module and have + # no precompiled NIF yet, so they must be compiled from source in this same + # process (falling through below) — that resolves the cycle in-memory, the + # same way the non-incremental compiler handles recursive module imports. if graph.config.cmd == cmdM and sfMainModule notin flags and not graph.withinSystem and - not graph.config.isDefined("nimscript"): + not graph.config.isDefined("nimscript") and + (graph.config.icGroup.len == 0 or + toFullPath(graph.config, fileIdx) notin graph.config.icGroup): let precomp = moduleFromNifFile(graph, fileIdx) if precomp.module == nil: let nifPath = toNifFilename(graph.config, fileIdx) diff --git a/compiler/typekeys.nim b/compiler/typekeys.nim index f23915fb82..26716e4a37 100644 --- a/compiler/typekeys.nim +++ b/compiler/typekeys.nim @@ -227,10 +227,37 @@ proc typeKey(c: var Context; t: PType; flags: set[ConsiderFlag]; conf: ConfigRef assert inst.kind == tyGenericInst c.typeKey inst.sonsImpl[0], flags, conf for i in 1.. 0: + # Hack to prevent endless recursion (a field may reference this type). + let oldFlags = symWithFlags.flagsImpl + symWithFlags.flagsImpl.excl {sfAnon, sfGenSym} + c.treeKey(t.nImpl, flags + {CoHashTypeInsideNode}, conf) + symWithFlags.flagsImpl = oldFlags + else: + c.m.addIdent "´empty" + # Object inheritance is part of identity: key the base class too. + if t.kind == tyObject and t.sonsImpl.len > 0 and t.sonsImpl[0] != nil: + c.typeKey t.sonsImpl[0], flags, conf else: c.m.addIdent "`bug" of tyFromExpr: diff --git a/compiler/vmops.nim b/compiler/vmops.nim index f3d349e803..b526916d3c 100644 --- a/compiler/vmops.nim +++ b/compiler/vmops.nim @@ -36,7 +36,9 @@ from std/osproc import nil when defined(nimPreviewSlimSystem): import std/syncio -else: +when not defined(nimPreviewSlimSystem): + # explicit negated `when` rather than `else:` so nifler's dep scanner guards + # this import with its condition (it emits `else:` imports unconditionally). from std/formatfloat import addFloatRoundtrip, addFloatSprintf diff --git a/koch.nim b/koch.nim index ae1d6557c0..6fff0de5af 100644 --- a/koch.nim +++ b/koch.nim @@ -76,6 +76,7 @@ Options: --skipIntegrityCheck skips integrity check when booting the compiler Possible Commands: boot [options] bootstraps with given command line options + bootic [options] bootstraps via the incremental compiler (`nim ic`) distrohelper [bindir] helper for distro packagers tools builds Nim related tools toolsNoExternal builds Nim related tools (except external tools, @@ -406,6 +407,41 @@ proc boot(args: string, skipIntegrityCheck: bool) = if not skipIntegrityCheck: echo "[Warning] executables are still not equal" +proc bootic(args: string, skipIntegrityCheck: bool) = + ## Like `boot`, but bootstraps the compiler through the NIF-based incremental + ## compiler (`nim ic`) instead of `nim c`. Differences from `boot`: + ## * It starts from an already-bootstrapped Nim (found via `findStartNim`): the + ## csources compiler is far too old to provide the `ic` command, and the + ## `-d:nimKochBootstrap` define used by `boot`'s first stage *disables* + ## `commandIc`, so neither can be used here. + ## * `nim ic` drives the per-module build and the final link itself (via + ## `nifmake`), so there is no `--compileOnly` + `jsonscript` split. + ## The 3-step fixed-point check is kept: a successful run proves the compiler + ## can compile itself under IC and reproduces a stable binary. + var output = "compiler" / "nim".exe + var finalDest = "bin" / "nim".exe + let smartNimcache = (if "release" in args or "danger" in args: "nimcache/ric_" else: "nimcache/dic_") & + hostOS & "_" & hostCPU + + bundleChecksums(false) + + let nimStart = findStartNim().quoteShell() + let times = 2 - ord(skipIntegrityCheck) + for i in 0..times: + echo "iteration: ", i+1 + let nimi = if i == 0: nimStart else: i.thVersion + exec "$# ic --nimcache:$# $# compiler" / "nim.nim" % + [nimi, smartNimcache, args] + if sameFileContent(output, i.thVersion): + copyExe(output, finalDest) + echo "executables are equal: SUCCESS!" + return + copyExe(output, (i+1).thVersion) + copyExe(output, finalDest) + when not defined(windows): + if not skipIntegrityCheck: + echo "[Warning] executables are still not equal" + # -------------- clean -------------------------------------------------------- const @@ -744,6 +780,7 @@ when isMainModule: of cmdArgument: case normalize(op.key) of "boot": boot(op.cmdLineRest, skipIntegrityCheck) + of "bootic": bootic(op.cmdLineRest, skipIntegrityCheck) of "clean": clean(op.cmdLineRest) of "doc", "docs": buildDocs(op.cmdLineRest & " --d:nimPreviewSlimSystem " & paCode, localDocsOnly, localDocsOut) of "doc0", "docs0":