diff --git a/compiler/ast2nif.nim b/compiler/ast2nif.nim index cc30b116bc..66d1c2be95 100644 --- a/compiler/ast2nif.nim +++ b/compiler/ast2nif.nim @@ -859,7 +859,11 @@ proc writeNifModule*(config: ConfigRef; thisModule: int32; n: PNode; dest.addParRi() - writeFile(dest, d) + # OnlyIfChanged keeps the mtime of content-identical rewrites: nifmake's + # mtime-based `needsRebuild` then prunes the rebuild cascade level by + # level, and the nifc backend can trust "semmed NIF older than the cnif + # artifact" as an honest per-module unchanged stamp. + writeFile(dest, d, OnlyIfChanged) # --------------------------- Loader (lazy!) ----------------------------------------------- diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 49ade45a08..9a90a72858 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -3492,6 +3492,8 @@ proc genConstDefinition(q: BModule; p: BProc; sym: PSym) = data.addVarWithInitializer(Local, actualConstName, typ = td): genBracedInit(q.initProc, sym.astdef, isConst = true, sym.typ, data) q.s[cfsData].add(extract(data)) + if q.config.cmd == cmdNifC: + q.icDataDefs.add stripCnifMarks(actualConstName) if q.hcrOn: # generate the global pointer with the real name q.s[cfsVars].addVar(kind = Global, name = sym.loc.snippet, diff --git a/compiler/ccgtypes.nim b/compiler/ccgtypes.nim index a9be5cd15a..075d67ba05 100644 --- a/compiler/ccgtypes.nim +++ b/compiler/ccgtypes.nim @@ -1398,6 +1398,8 @@ proc genTypeInfoAuxBase(m: BModule; typ, origType: PType; else: m.s[cfsStrData].addDeclWithVisibility(Private): m.s[cfsStrData].addVar(kind = Local, name = name, typ = "TNimType") + if m.config.cmd == cmdNifC: + m.icDataDefs.add name proc genTypeInfoAux(m: BModule; typ, origType: PType, name: Rope; info: TLineInfo) = @@ -1825,6 +1827,8 @@ proc genTypeInfoV2OldImpl(m: BModule; t, origType: PType, name: Rope; info: TLin cgsym(m, "TNimTypeV2") m.s[cfsStrData].addDeclWithVisibility(Private): m.s[cfsStrData].addVar(kind = Local, name = name, typ = "TNimTypeV2") + if m.config.cmd == cmdNifC: + m.icDataDefs.add name var flags = 0 if not canFormAcycle(m.g.graph, t): flags = flags or 1 @@ -1889,6 +1893,8 @@ proc genTypeInfoV2Impl(m: BModule; t, origType: PType, name: Rope; info: TLineIn cgsym(m, "TNimTypeV2") m.s[cfsStrData].addDeclWithVisibility(Private): m.s[cfsStrData].addVar(kind = Local, name = name, typ = "TNimTypeV2") + if m.config.cmd == cmdNifC: + m.icDataDefs.add name var flags = 0 if not canFormAcycle(m.g.graph, t): flags = flags or 1 @@ -1988,6 +1994,12 @@ proc genTypeInfoV2(m: BModule; t: PType; info: TLineInfo): Rope = m.typeInfoMarkerV2[sig] = result let owner = t.skipTypes(typedescPtrs).itemId.module + if m.config.cmd == cmdNifC and result in m.g.graph.icCachedDataDefs: + # already defined inside a reused TU from the previous run + cgsym(m, "TNimTypeV2") + declareNimType(m, "TNimTypeV2", result, owner) + m.g.typeInfoMarkerV2[sig] = (str: result, owner: owner) + return prefixTI(result) if owner != m.module.position and myModuleOpenForCodegen(m, FileIndex owner): # make sure the type info is created in the owner module discard genTypeInfoV2(m.g.mods[owner], origType, info) @@ -2065,6 +2077,15 @@ proc genTypeInfoV1(m: BModule; t: PType; info: TLineInfo): Rope = result = "NTI$1$2_" % [rope(typeToC(t)), rope($sig)] m.typeInfoMarker[sig] = result + if m.config.cmd == cmdNifC and result in m.g.graph.icCachedDataDefs: + # already defined inside a reused TU from the previous run + cgsym(m, "TNimType") + cgsym(m, "TNimNode") + declareNimType(m, "TNimType", result, t.skipTypes(typedescPtrs).itemId.module) + m.g.typeInfoMarker[sig] = (str: result, + owner: t.skipTypes(typedescPtrs).itemId.module) + return prefixTI(result) + let old = m.g.graph.emittedTypeInfo.getOrDefault($result) if old != FileIndex(0): cgsym(m, "TNimType") diff --git a/compiler/cgen.nim b/compiler/cgen.nim index 3caf252aa3..d18a8c5399 100644 --- a/compiler/cgen.nim +++ b/compiler/cgen.nim @@ -19,7 +19,7 @@ import mangleutils, cbuilderbase, modulegraphs from expanddefaults import caseObjDefaultBranch -from ast2nif import globalName +from ast2nif import globalName, toNifFilename import cnif import pipelineutils @@ -63,6 +63,7 @@ proc addForwardedProc(m: BModule, prc: PSym) = m.g.forwardedProcs.add(prc) proc newModule*(g: BModuleList; module: PSym; conf: ConfigRef; idgen: IdGenerator): BModule +proc getCFile*(m: BModule): AbsoluteFile proc findPendingModule(m: BModule, s: PSym): BModule = # TODO fixme @@ -82,6 +83,33 @@ proc findPendingModule(m: BModule, s: PSym): BModule = var ms = getModule(s) result = m.g.mods[ms.position] +proc isReusedTU(m: BModule): bool = + ## Whether this module's cached translation unit is reused — either as a + ## loaded backend module or purely at the file level (a module the + ## backend never loaded whose BModule demand-driven codegen created). + m.config.cmd == cmdNifC and + (m.module.position in m.g.graph.icReusedModules or + (m.g.graph.icFileReusedCnames.len > 0 and + getCFile(m).string in m.g.graph.icFileReusedCnames)) + +proc redirectToLiveModule(m: BModule, q: BModule): BModule = + ## A module whose cached translation unit is reused never generates code, + ## so a definition that `findPendingModule` routes into it must be emitted + ## elsewhere: into the demanding module, or — when the demander is itself + ## reused (demands raised while wiring up a reused module's init call) — + ## into the main module, which is always regenerated. + result = q + if q != nil and m.config.cmd == cmdNifC and isReusedTU(q): + if not isReusedTU(m): + result = m + else: + result = nil + for cand in m.g.mods: + if cand != nil and sfMainModule in cand.module.flags: + result = cand + break + if result == nil: result = m + proc initLoc(k: TLocKind, lode: PNode, s: TStorageLoc, flags: TLocFlags = {}): TLoc = result = TLoc(k: k, storage: s, lode: lode, snippet: "", flags: flags) @@ -126,8 +154,6 @@ proc useHeader(m: BModule, sym: PSym) = proc cgsym(m: BModule, name: string) proc cgsymValue(m: BModule, name: string): Rope -proc getCFile(m: BModule): AbsoluteFile - proc getModuleDllPath(m: BModule): Rope = let (dir, name, ext) = splitFile(getCFile(m)) let filename = strutils.`%`(platform.OS[m.g.config.target.targetOS].dllFrmt, [name & ext]) @@ -758,6 +784,8 @@ proc assignGlobalVar(p: BProc, n: PNode; value: Rope) = useHeader(p.module, s) if lfNoDecl in s.loc.flags: return if not containsOrIncl(p.module.declaredThings, s.id): + if p.config.cmd == cmdNifC and sfImportc notin s.flags: + p.module.icDataDefs.add stripCnifMarks(s.loc.snippet) if sfThread in s.flags: declareThreadVar(p.module, s, sfImportc in s.flags) if value != "": @@ -1330,14 +1358,26 @@ proc genProcBody(p: BProc; procBody: PNode) = proc genProcLvl3*(m: BModule, prc: PSym) = if m.config.cmd == cmdNifC: fillBackendName(m, prc) - if isSharedInstanceCName(m, prc): - # one definition program-wide: the first claimant's TU embeds it, - # everyone else declares it + # inline procs are emitted into every using TU; they are never shared + # across translation units, so cached/cross-TU dedup must not touch them let key = stripCnifMarks(prc.loc.snippet) - if m.g.graph.icSharedDefOwner.hasKeyOrPut(key, prc.itemId) and - m.g.graph.icSharedDefOwner[key] != prc.itemId: - genProcPrototype(m, prc) - return + if prc.typ == nil or prc.typ.callConv != ccInline: + if key in m.g.graph.icCachedCDefs: + # already defined inside a reused TU from the previous run + genProcPrototype(m, prc) + return + if isSharedInstanceCName(m, prc) or + prc.itemId.module != m.module.position: + # one definition program-wide: shared instances by design; otherwise a + # definition redirected away from a reused TU — the first claimant's + # TU embeds it, everyone else declares it. The claim records the TU + # as well: with redirects the same symbol can be demanded into + # several TUs. + let claim = (sym: prc.itemId, tu: m.module.position) + if m.g.graph.icSharedDefOwner.hasKeyOrPut(key, claim) and + m.g.graph.icSharedDefOwner[key] != claim: + genProcPrototype(m, prc) + return var p = newProc(prc, m) var header = newBuilder("") let isCppMember = m.config.backend == backendCpp and sfCppMember * prc.flags != {} @@ -1589,7 +1629,7 @@ proc genProcLvl2(m: BModule, prc: PSym) = genProcLvl3(m, prcCopy) else: let m2 = if m.config.symbolFiles != disabledSf: m - else: findPendingModule(m, prc) + else: redirectToLiveModule(m, findPendingModule(m, prc)) fillProcLoc(m2, prc.ast[namePos]) #elif {sfExportc, sfImportc} * prc.flags == {}: # # reset name to restore consistency in case of hashing collisions: @@ -1599,7 +1639,7 @@ proc genProcLvl2(m: BModule, prc: PSym) = genProcPrototype(m, prc) genProcLvl3(m, prc) elif sfImportc notin prc.flags: - var q = findPendingModule(m, prc) + var q = redirectToLiveModule(m, findPendingModule(m, prc)) fillProcLoc(q, prc.ast[namePos]) # generate a getProc call to initialize the pointer for this # externally-to-the-current-module defined proc, also important @@ -1629,7 +1669,22 @@ proc requestConstImpl(p: BProc, sym: PSym) = let m = p.module # declare implementation: var q = findPendingModule(m, sym) - if q != nil and not containsOrIncl(q.declaredThings, sym.id): + var defineIt = true + if m.config.cmd == cmdNifC: + if stripCnifMarks(sym.loc.snippet) in m.g.graph.icCachedDataDefs: + # already defined inside a reused TU from the previous run + defineIt = false + else: + let q2 = redirectToLiveModule(m, q) + if q2 != q: + # redirected definition: one TU program-wide embeds it + q = q2 + let key = stripCnifMarks(sym.loc.snippet) + let claim = (sym: sym.itemId, tu: q2.module.position) + if m.g.graph.icSharedDefOwner.hasKeyOrPut(key, claim) and + m.g.graph.icSharedDefOwner[key] != claim: + defineIt = false + if defineIt and q != nil and not containsOrIncl(q.declaredThings, sym.id): assert q.initProc.module == q genConstDefinition(q, p, sym) # declare header: @@ -1659,6 +1714,12 @@ proc genProc(m: BModule, prc: PSym) = if not containsOrIncl(m.g.generatedHeader.declaredThings, prc.id): genProcLvl3(m.g.generatedHeader, prc) +proc requestProcDef*(m: BModule, prc: PSym) = + ## Public demand entry: request `prc`'s definition; it is routed to the + ## module that owns it and generated once, exactly as if some generated + ## code had referenced it. + genProc(m, prc) + proc genVarPrototype(m: BModule, n: PNode) = #assert(sfGlobal in sym.flags) let sym = n.sym @@ -2115,6 +2176,59 @@ proc registerModuleToMain(g: BModuleList; m: BModule) = else: g.otherModsInit.addCallStmt(init) +proc registerReusedInit*(g: BModuleList; moduleBase: string; + initRequired, datInitRequired: bool) = + ## init/datInit registration for a translation unit that is reused purely + ## from its cached files (the module is not even loaded); the names are + ## reconstructed from the module's mangled base name recorded in the + ## artifact's meta head. + if datInitRequired: + let datInit = moduleBase & "DatInit000" + g.mainModProcs.addDeclWithVisibility(Private): + g.mainModProcs.addProcHeader(ccNimCall, datInit, CVoid, cProcParams()) + g.mainModProcs.finishProcHeaderAsProto() + g.mainDatInit.addCallStmt(datInit) + if initRequired: + let init = moduleBase & "Init000" + g.mainModProcs.addDeclWithVisibility(Private): + g.mainModProcs.addProcHeader(ccNimCall, init, CVoid, cProcParams()) + g.mainModProcs.finishProcHeaderAsProto() + g.otherModsInit.addCallStmt(init) + +proc registerReusedModuleToMain(g: BModuleList; m: BModule; + initRequired, datInitRequired: bool) = + ## `registerModuleToMain` for a module whose cached translation unit is + ## reused: the init/datInit presence comes from the artifact's meta head + ## instead of the (never generated) sections. Mirrors the non-hcr path of + ## `registerModuleToMain` — reuse is disabled when hcr is on. + let + init = m.getInitName + datInit = m.getDatInitName + + if datInitRequired: + g.mainModProcs.addDeclWithVisibility(Private): + g.mainModProcs.addProcHeader(ccNimCall, datInit, CVoid, cProcParams()) + g.mainModProcs.finishProcHeaderAsProto() + g.mainDatInit.addCallStmt(datInit) + + if sfSystemModule in m.module.flags: + if emulatedThreadVars(m.config) and m.config.target.targetOS != osStandalone: + g.mainDatInit.addCallStmt(cgsymValue(m, "initThreadVarsEmulation")) + if m.config.target.targetOS != osStandalone and m.config.selectedGC notin {gcNone, gcArc, gcAtomicArc, gcOrc, gcYrc}: + g.mainDatInit.addCallStmt(cgsymValue(m, "initStackBottomWith"), + cCast(CPointer, cAddr("inner"))) + + if initRequired: + g.mainModProcs.addDeclWithVisibility(Private): + g.mainModProcs.addProcHeader(ccNimCall, init, CVoid, cProcParams()) + g.mainModProcs.finishProcHeaderAsProto() + if sfMainModule in m.module.flags: + g.mainModInit.addCallStmt(init) + elif sfSystemModule in m.module.flags: + g.mainDatInit.addCallStmt(init) # systemInit right after systemDatInit + else: + g.otherModsInit.addCallStmt(init) + proc genDatInitCode(m: BModule) = ## this function is called in cgenWriteModules after all modules are closed, ## it means raising dependency on the symbols is too late as it will not propagate @@ -2362,6 +2476,11 @@ proc genModule(m: BModule, cfile: Cfile): Rope = moduleIsEmpty = false res.add(extract(m.s[i])) + # what `registerModuleToMain` will announce for this module; recorded in + # the artifact's meta head so a later run can reuse the TU + let initRequired = m.s[cfsInitProc].buf.len > 0 + let datInitRequired = m.s[cfsDatInitProc].buf.len > 0 + if m.config.cmd == cmdNifC: # close the definitions section: the init procs that follow belong to # the artifact's top level (always-run code, hence liveness roots) @@ -2391,7 +2510,10 @@ proc genModule(m: BModule, cfile: Cfile): Rope = if m.config.cmd == cmdNifC and result.len > 0: let artifact = cfile.cname.string & ".nif" - writeCnifArtifact(result, artifact) + writeCnifArtifact(result, artifact, initRequired, datInitRequired, + m.icDataDefs, + semmedNif = toNifFilename(m.config, FileIndex m.module.position), + moduleBase = getSomeNameForModule(m)) m.g.graph.icCnifFiles.add artifact # NB: under cmdNifC the returned text still carries the cnif marks; the # caller renders it (dropping dead definitions) or strips it. @@ -2484,7 +2606,7 @@ proc writeHeader(m: BModule) = if not writeRope(headerText, m.filename): rawMessage(m.config, errCannotOpenFile, m.filename.string) -proc getCFile(m: BModule): AbsoluteFile = +proc getCFile*(m: BModule): AbsoluteFile = let ext = if m.compileToCpp: ".nim.cpp" elif m.config.backend == backendObjc or sfCompileToObjc in m.module.flags: ".nim.m" @@ -2717,14 +2839,33 @@ proc genForwardedProcs(g: BModuleList) = # a second pass here # Note: ``genProcLvl2`` may add to ``forwardedProcs`` while g.forwardedProcs.len > 0: - let - prc = g.forwardedProcs.pop() - m = g.mods[prc.itemId.module] + let prc = g.forwardedProcs.pop() + var m = g.mods[prc.itemId.module] + if isReusedTU(m): + # the home TU is reused; emit through the main module instead + for cand in g.mods: + if cand != nil and sfMainModule in cand.module.flags: + m = cand + break if sfForward in prc.flags: internalError(m.config, prc.info, "still forwarded: " & prc.name.s) genProcLvl2(m, prc) +proc reuseCachedModule(g: BModuleList; m: BModule) = + ## The module's cached `.c`/`.o`/artifact are reused: register the cached + ## object file for the link, keep the cached artifact in the liveness + ## inputs and replay the module's init registration from the artifact's + ## meta head. + let cfile = getCFile(m) + var cf = Cfile(nimname: m.module.name.s, cname: cfile, + obj: completeCfilePath(m.config, toObjFile(m.config, cfile)), + flags: {CfileFlag.Cached}) + addFileToCompile(m.config, cf) + g.graph.icCnifFiles.add cfile.string & ".nif" + let meta = g.graph.icReusedMeta.getOrDefault(m.module.position) + registerReusedModuleToMain(g, m, meta.initRequired, meta.datInitRequired) + proc cgenWriteModules*(backend: RootRef, config: ConfigRef) = let g = BModuleList(backend) g.config = config @@ -2734,6 +2875,18 @@ proc cgenWriteModules*(backend: RootRef, config: ConfigRef) = # order anyway) genForwardedProcs(g) + # translation units reused purely from cached files (modules the backend + # never loaded): link their objects, keep their artifacts in the liveness + # inputs, replay their init registration. NB: these TUs rarely have init + # code (they were demand-only in the producing run as well). + for fr in g.graph.icFileReused: + var cf = Cfile(nimname: splitFile(fr.cname).name, cname: AbsoluteFile fr.cname, + obj: completeCfilePath(config, toObjFile(config, AbsoluteFile fr.cname)), + flags: {CfileFlag.Cached}) + addFileToCompile(config, cf) + g.graph.icCnifFiles.add fr.cname & ".nif" + registerReusedInit(g, fr.moduleBase, fr.initRequired, fr.datInitRequired) + if config.cmd == cmdNifC and not isDefined(config, "icNoCDce"): # Two-phase write: produce every module's marked text and artifact # first, then compute global liveness over the artifacts and render @@ -2744,6 +2897,11 @@ proc cgenWriteModules*(backend: RootRef, config: ConfigRef) = var cfs: seq[Cfile] = @[] var codes: seq[string] = @[] for m in cgenModules(g): + if m.module.position in g.graph.icReusedModules: + reuseCachedModule(g, m) + continue + if isReusedTU(m): + continue # file-level reused: registered before this loop already let cfile = getCFile(m) var cf = Cfile(nimname: m.module.name.s, cname: cfile, obj: completeCfilePath(m.config, toObjFile(m.config, cfile)), flags: {}) @@ -2763,6 +2921,11 @@ proc cgenWriteModules*(backend: RootRef, config: ConfigRef) = g.graph.icCDropped = dropped else: for m in cgenModules(g): - m.writeModule() + if config.cmd == cmdNifC and m.module.position in g.graph.icReusedModules: + reuseCachedModule(g, m) + elif isReusedTU(m): + discard # file-level reused: registered before this loop already + else: + m.writeModule() writeMapping(config, g.mapping) if g.generatedHeader != nil: writeHeader(g.generatedHeader) diff --git a/compiler/cgendata.nim b/compiler/cgendata.nim index fb8f2086cb..5aea698962 100644 --- a/compiler/cgendata.nim +++ b/compiler/cgendata.nim @@ -176,6 +176,9 @@ type extensionLoaders*: array['0'..'9', Builder] # special procs for the # OpenGL wrapper sigConflicts*: CountTable[SigHash] + icDataDefs*: seq[string] # C names of data definitions (consts, globals, + # RTTI) this TU embeds; recorded in the cnif + # artifact so a later run can reuse the TU g*: BModuleList template config*(m: BModule): ConfigRef = m.g.config diff --git a/compiler/cnif.nim b/compiler/cnif.nim index 48061154cc..eaf60d2045 100644 --- a/compiler/cnif.nim +++ b/compiler/cnif.nim @@ -75,10 +75,31 @@ proc cnifDefDirective*(name, flags: string): string = proc cnifEndDefs*(): string = CnifDefStart & CnifDefEnd -proc writeCnifArtifact*(code: string; outfile: string) = +proc writeCnifArtifact*(code: string; outfile: string; + initRequired = false; datInitRequired = false; + dataDefs: openArray[string] = []; + semmedNif = ""; moduleBase = "") = ## Splits the marked module text into the `.c.nif` artifact. + ## The artifact starts with a `(meta "semmedNif" "moduleBase")` + ## head — whether the module has an init/datInit proc ('i'/'d'), which + ## semmed NIF it was generated from and the module's mangled base name + ## (what `registerModuleToMain` and the reuse decision need when the TU + ## is reused in a later run, possibly without the module ever being + ## loaded again) — and a `(cdata *)` group naming the data + ## definitions (consts, globals, RTTI) the TU embeds. var b = nifbuilder.open(outfile) b.withTree "stmts": + b.withTree "meta": + var metaFlags = "" + if initRequired: metaFlags.add 'i' + if datInitRequired: metaFlags.add 'd' + if metaFlags.len > 0: b.addIdent metaFlags + else: b.addEmpty + b.addStrLit semmedNif + b.addStrLit moduleBase + b.withTree "cdata": + for d in dataDefs: + b.addSymbolDef d var raw = "" var inDef = false template flushRaw() = @@ -161,6 +182,72 @@ proc renderMarkedC*(code: string; live: HashSet[string]; dropped: var int): stri proc symOrIdentName(c: Cursor): string {.inline.} = if c.kind == Ident: strVal(c) else: symName(c) +type + CnifHeads* = object + ## The cheap-to-parse part of an artifact that a later run needs in + ## order to reuse the TU without regenerating it. + valid*: bool ## file parsed and carries the meta head + initRequired*: bool + datInitRequired*: bool + semmedNif*: string ## the semmed NIF this TU was generated from + moduleBase*: string ## the module's mangled base name + cdefs*: seq[string] ## C names of the proc definitions + cdata*: seq[string] ## C names of the data definitions + +proc readCnifHeads*(f: string): CnifHeads = + ## Reads `(meta ...)`, `(cdata ...)` and the `(cdef ...)` head names from + ## an artifact. Artifacts written before the meta head report `valid=false`. + result = CnifHeads() + if not fileExists(f): return + var pool = newPool() + var tags = newTagPool() + let stmtsTag = tags.registerTag("stmts") + let cdefTag = tags.registerTag("cdef") + let cdataTag = tags.registerTag("cdata") + let metaTag = tags.registerTag("meta") + var buf = parseFromFile(f, 1000, pool, tags) + var c = beginRead(buf) + if c.kind != TagLit or c.cursorTagId != stmtsTag: + endRead(c) + return + c.loopInto: + if c.kind == TagLit: + if c.cursorTagId == metaTag: + result.valid = true + var strIdx = 0 + c.loopInto: + if c.kind == Ident: + for ch in strVal(c): + if ch == 'i': result.initRequired = true + elif ch == 'd': result.datInitRequired = true + inc c + elif c.kind == StrLit: + if strIdx == 0: result.semmedNif = strVal(c) + elif strIdx == 1: result.moduleBase = strVal(c) + inc strIdx + inc c + else: + skip c + elif c.cursorTagId == cdataTag: + c.loopInto: + if c.kind == SymbolDef: + result.cdata.add symName(c) + inc c + else: + skip c + elif c.cursorTagId == cdefTag: + c.loopInto: + if c.kind == SymbolDef: + result.cdefs.add symName(c) + inc c + else: + skip c + else: + skip c + else: + skip c + endRead(c) + type CnifLiveness* = object defs*: int ## proc definitions emitted across all modules @@ -185,6 +272,8 @@ proc computeLiveFromCArtifacts*(files: openArray[string]): CnifLiveness = var tags = newTagPool() let stmtsTag = tags.registerTag("stmts") let cdefTag = tags.registerTag("cdef") + let cdataTag = tags.registerTag("cdata") + let metaTag = tags.registerTag("meta") var uses = initTable[string, HashSet[string]]() var roots = initHashSet[string]() var defs = initHashSet[string]() @@ -204,7 +293,10 @@ proc computeLiveFromCArtifacts*(files: openArray[string]): CnifLiveness = roots.incl symOrIdentName(c) inc c of TagLit: - if c.cursorTagId == cdefTag: + if c.cursorTagId == metaTag or c.cursorTagId == cdataTag: + # bookkeeping for TU reuse, irrelevant for liveness + skip c + elif c.cursorTagId == cdefTag: var owner = "" var flagsSeen = false c.loopInto: diff --git a/compiler/dce.nim b/compiler/dce.nim index 5617e1ca58..499f681cbf 100644 --- a/compiler/dce.nim +++ b/compiler/dce.nim @@ -205,12 +205,16 @@ proc markLive(ctx: DceContext): HashSet[SymId] = work.add dep proc computeLiveSymbols*(conf: ConfigRef; seedFiles: openArray[string]; - live: var HashSet[string]; stats: var DceStats): bool = + live: var HashSet[string]; stats: var DceStats; + nifDeps: var Table[string, seq[string]]): bool = ## Global liveness over a program's NIF modules: the seeds plus the ## transitive closure of their `(import ...)` entries. On success fills ## `live` with the NIF names (`name.disamb.modsuffix`) of every reachable ## symbol and returns true. Returns false when any module could not be ## analyzed — the caller must then treat everything as live. + ## `nifDeps` receives the import graph over NIF file paths — the full + ## closure including the modules the backend's own module list omits; + ## the artifact-reuse decision needs it for transitive invalidation. var ctx = DceContext(pool: newPool(), tags: newTagPool()) ctx.stmtsTag = ctx.tags.registerTag("stmts") ctx.sdefTag = ctx.tags.registerTag("sd") @@ -236,8 +240,12 @@ proc computeLiveSymbols*(conf: ConfigRef; seedFiles: openArray[string]; analyzeNifFile(ctx, f, imports) if ctx.broken: return false if conf != nil: + var depFiles = newSeqOfCap[string](imports.len) for suffix in imports: - queue.add toGeneratedFile(conf, AbsoluteFile(suffix), ".nif").string + let depFile = toGeneratedFile(conf, AbsoluteFile(suffix), ".nif").string + depFiles.add depFile + queue.add depFile + nifDeps[f] = depFiles let liveIds = markLive(ctx) live = initHashSet[string](liveIds.len) for s in liveIds: diff --git a/compiler/modulegraphs.nim b/compiler/modulegraphs.nim index 679a59f930..2a36ba072b 100644 --- a/compiler/modulegraphs.nim +++ b/compiler/modulegraphs.nim @@ -80,8 +80,23 @@ type icCDefs*, icCLiveDefs*, icCDropped*: int # render-time DCE stats icSharedSigs*: Table[string, string] # shared instance C name -> signature # (collision guard for the 30-bit hash) - icSharedDefOwner*: Table[string, ItemId] # shared instance C name -> - # the symbol whose TU embeds the definition + icSharedDefOwner*: Table[string, tuple[sym: ItemId, tu: int]] + # shared instance C name -> the symbol and + # the TU (module position) embedding the + # single program-wide definition + icReusedModules*: IntSet # module positions whose cached `.c`/`.o` + # is reused: codegen is skipped for them + icCachedCDefs*: HashSet[string] # C names of proc definitions inside + # reused TUs (from their artifacts' cdef heads) + icCachedDataDefs*: HashSet[string] # C names of data definitions (consts, + # globals, RTTI) inside reused TUs + icReusedMeta*: Table[int, tuple[initRequired, datInitRequired: bool]] + icFileReused*: seq[tuple[cname, moduleBase: string; + initRequired, datInitRequired: bool]] + # TUs reused purely from cached files: modules the backend never + # loads (reached only through system or demand-driven codegen) + icFileReusedCnames*: HashSet[string] # their .c paths, so demand-created + # BModules for them never write anything packageSyms*: TStrTable deps*: IntSet # the dependency graph or potentially its transitive closure. @@ -670,7 +685,10 @@ proc getModule*(g: ModuleGraph; fileIdx: FileIndex): PSym = result = nil proc moduleOpenForCodegen*(g: ModuleGraph; m: FileIndex): bool {.inline.} = - result = true + ## A module whose cached translation unit is reused does not accept new + ## definitions: anything that would be emitted into it must be emitted + ## into the demanding module instead. + result = m.int notin g.icReusedModules proc dependsOn(a, b: int): int {.inline.} = (a shl 15) + b diff --git a/compiler/nifbackend.nim b/compiler/nifbackend.nim index 92639a25b9..68805730c5 100644 --- a/compiler/nifbackend.nim +++ b/compiler/nifbackend.nim @@ -17,7 +17,7 @@ ## 1. Compile modules to NIF: nim m mymodule.nim ## 2. Generate C from NIF: nim nifc myproject.nim -import std/[intsets, tables, sets, os, algorithm, syncio] +import std/[intsets, tables, sets, os, algorithm, syncio, times, strutils] when defined(nimPreviewSlimSystem): import std/assertions @@ -75,6 +75,129 @@ proc setupNifBackendModule(g: ModuleGraph; module: PSym): BModule = g.backend = cgendata.newModuleList(g) result = cgen.newModule(BModuleList(g.backend), module, g.config, idGeneratorForBackend(module)) +proc computeModuleReuse(g: ModuleGraph; modules: seq[PrecompiledModule]; + precompSys: PrecompiledModule; + nifDeps: Table[string, seq[string]]) = + ## Decides which modules' cached translation units can be reused: codegen + ## is skipped for them and their `.c`/`.o`/artifact files are used as is. + ## + ## A module is reusable when the newest semmed NIF in its transitive + ## import closure is older than its `.c.nif` artifact — so neither the + ## module itself nor anything that can influence its generated C (type + ## layouts of dependencies in particular) has changed — and the cached + ## artifact, `.c` and `.o` files are all present. The main module is + ## always regenerated: it carries NimMain's init-call list and the method + ## dispatchers, which depend on the whole program. + ## + ## A regenerated module may still demand entities that live in a reused + ## TU: definitions already inside the cached TU become prototypes (see + ## `genProcLvl3`/`genTypeInfo*` and the artifact's cdef/cdata heads), + ## fresh demands are redirected into the demanding TU + ## (`redirectToLiveModule`). + if not g.icDceEnabled or isDefined(g.config, "icNoReuse") or + g.config.hcrOn or g.config.symbolFiles != disabledSf: + return + let icDebug = isDefined(g.config, "icTimings") + # newest mtime in every NIF file's transitive import closure, via + # fixpoint iteration (the import graph can contain cycles). The implicit + # system import is not part of the NIF import lists, so system counts as + # a dependency of every module. + let systemNif = toNifFilename(g.config, g.config.m.systemFileIdx) + var maxTime = initTable[string, Time]() + for f in nifDeps.keys: + maxTime[f] = getLastModificationTime(f) + var changed = true + while changed: + changed = false + for f, deps in nifDeps: + var newest = maxTime[f] + if systemNif in maxTime and maxTime[systemNif] > newest and f != systemNif: + newest = maxTime[systemNif] + for d in deps: + if d in maxTime and maxTime[d] > newest: newest = maxTime[d] + if newest > maxTime[f]: + maxTime[f] = newest + changed = true + + let bl = BModuleList(g.backend) + var handledArtifacts = initHashSet[string]() + for i in 0..modules.len: + let pm = if i < modules.len: modules[i] else: precompSys + if pm.module == nil: continue + let pos = pm.module.position + let bmod = bl.mods[pos] + if bmod == nil: continue + let artifact = getCFile(bmod).string & ".nif" + # claimed by a loaded module — regenerated or reused, but never + # eligible for the file-level reuse below + handledArtifacts.incl artifact + if sfMainModule in pm.module.flags: continue + let nifFile = toNifFilename(g.config, FileIndex pos) + template reject(reason: string) = + if icDebug: + stderr.writeLine "[icReuse] regen " & cachedModuleSuffix(g.config, FileIndex pos) & + ": " & reason + continue + if nifFile notin maxTime: reject("not in dce closure: " & nifFile) + let cfile = getCFile(bmod) + let obj = completeCfilePath(g.config, toObjFile(g.config, cfile)) + if not fileExists(artifact): reject("no artifact " & artifact) + if not fileExists(cfile.string): reject("no C file") + if not fileExists(obj.string): reject("no object file") + if maxTime[nifFile] > getLastModificationTime(artifact): + reject("dependency closure newer than artifact") + let heads = readCnifHeads(artifact) + if not heads.valid: reject("artifact has no meta head") + g.icReusedModules.incl pos + g.icReusedMeta[pos] = (heads.initRequired, heads.datInitRequired) + for d in heads.cdefs: g.icCachedCDefs.incl d + for d in heads.cdata: g.icCachedDataDefs.incl d + + # Translation units of modules the backend module list does not even + # contain (reached only through system's imports or demand-driven + # codegen): their artifacts are self-describing, so they can be reused + # purely at the file level. When one of them is stale, its import + # closure is stale too, so every TU that could reference it regenerates + # and demand recreates the definitions. + for artifact in walkFiles(getNimcacheDir(g.config).string / "*.c.nif"): + if artifact in handledArtifacts: continue + let heads = readCnifHeads(artifact) + if not heads.valid or heads.semmedNif.len == 0 or heads.moduleBase.len == 0: + continue + if heads.semmedNif notin maxTime: + continue # not part of this program (e.g. a removed module) + let cname = artifact[0..^5] # strip ".nif" + let obj = completeCfilePath(g.config, toObjFile(g.config, AbsoluteFile cname)) + if not (fileExists(cname) and fileExists(obj.string)): continue + if maxTime[heads.semmedNif] > getLastModificationTime(artifact): + continue + g.icFileReused.add (cname, heads.moduleBase, + heads.initRequired, heads.datInitRequired) + g.icFileReusedCnames.incl cname + for d in heads.cdefs: g.icCachedCDefs.incl d + for d in heads.cdata: g.icCachedDataDefs.incl d + if icDebug and g.icFileReused.len > 0: + stderr.writeLine "[icReuse] file-level reused TUs: " & $g.icFileReused.len + +proc isMetaIter(t: PType, closure: RootRef): bool = + # openArray/varargs hooks are sem bookkeeping: no real flow ever demands + # them, and generating one pollutes the TU's type cache with a struct + # descriptor for what must remain a (ptr, len) parameter expansion + t.kind in tyMetaTypes + {tyTyped, tyUntyped, tyNone, tyVarargs, tyOpenArray} + +proc eagerHookCandidate(sym: PSym): bool = + ## Announced hooks that can actually be code-generated: generic hook + ## announcements and meta-typed ones (`varargs[typed]` etc.) are replay + ## information for sem, not code. + let typ = sym.typ + if typ == nil or containsGenericType(typ): return false + if typ.n == nil: return false + for i in 1.. 0: stderr.writeLine "[icDceC] cdefs: " & $g.icCDefs & " live: " & $g.icCLiveDefs & @@ -191,5 +358,6 @@ proc generateCode*(g: ModuleGraph; mainFileIdx: FileIndex) = # Run C compiler if g.config.cmd != cmdTcc: extccomp.callCCompiler(g.config) + phaseDone "cc+link" if not g.config.hcrOn: extccomp.writeJsonBuildInstructions(g.config, g.cachedFiles) diff --git a/compiler/pipelineutils.nim b/compiler/pipelineutils.nim index 75ba33f14d..1f2bb0a8dd 100644 --- a/compiler/pipelineutils.nim +++ b/compiler/pipelineutils.nim @@ -1,3 +1,4 @@ +import std/intsets import ast, options, lineinfos, pathutils, msgs, modulegraphs, packages proc skipCodegen*(config: ConfigRef; n: PNode): bool {.inline.} = @@ -22,5 +23,6 @@ proc prepareConfigNotes*(graph: ModuleGraph; module: PSym) = graph.config.notes = graph.config.foreignPackageNotes proc moduleHasChanged*(graph: ModuleGraph; module: PSym): bool {.inline.} = - result = true - #module.id >= 0 or isDefined(graph.config, "nimBackendAssumesChange") + # under `nim nifc` a module whose cached translation unit is reused + # does not generate code; the set is empty for every other command + result = module.position notin graph.icReusedModules