Files
Nim/compiler/mangleutils.nim
Araq b4c71af517 IC: module suffix is now the trailing token of mangled C names
Reorder mangleProcNameExt and makeUnique so the module suffix comes LAST:
name_u<disamb>__<suffix> (was name__<suffix>_u<disamb>). The suffix is now a
strippable trailing token, so content-addressed cross-module merging (the
per-module backend's instance/hook dedup) can recover a mint-site-independent
name by chopping everything from the final "__" -- no reference rewriting.

Also drops the main-module special case in mangleProcNameExt: it omitted the
suffix because the main module's symbols key on its NIF-suffix file index. But
the backend already aliases that suffix to the main's source index
(nifbackend.loadModuleDependencies), so graph.ifaces[s.itemId.module] is
populated for the main module too -- the guard was redundant. Main-module
procs now mangle uniformly (e.g. mainProc_u0__<mainname>).

icFormatVersion 3 -> 4: cached .c.nif artifacts hold the old name scheme and
must be wiped.

Validated: koch boot (non-IC self-host) reaches fixed point; koch ic thallo
tconverter timp tmiscs tparseutils all green; a 3-module diamond IC build
runs correctly.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-13 22:09:16 +02:00

82 lines
2.9 KiB
Nim

import std/strutils
import ast, modulegraphs
proc mangle*(name: string): string =
result = newStringOfCap(name.len)
var start = 0
if name[0] in Digits:
result.add("X" & name[0])
start = 1
var requiresUnderscore = false
template special(x) =
result.add x
requiresUnderscore = true
for i in start..<name.len:
let c = name[i]
case c
of 'a'..'z', '0'..'9', 'A'..'Z':
result.add(c)
of '_':
# we generate names like 'foo_9' for scope disambiguations and so
# disallow this here:
if i > 0 and i < name.len-1 and name[i+1] in Digits:
discard
else:
result.add(c)
of '$': special "dollar"
of '%': special "percent"
of '&': special "amp"
of '^': special "roof"
of '!': special "emark"
of '?': special "qmark"
of '*': special "star"
of '+': special "plus"
of '-': special "minus"
of '/': special "slash"
of '\\': special "backslash"
of '=': special "eq"
of '<': special "lt"
of '>': special "gt"
of '~': special "tilde"
of ':': special "colon"
of '.': special "dot"
of '@': special "at"
of '|': special "bar"
else:
result.add("X" & toHex(ord(c), 2))
requiresUnderscore = true
if requiresUnderscore:
result.add "_"
proc mangleParamExt*(s: PSym): string =
result = "_p"
result.addInt s.position
proc mangleProcNameExt*(graph: ModuleGraph, s: PSym): string =
# The disambiguator comes first and the module suffix LAST, so the suffix is
# a strippable trailing token: content-addressed cross-module merging chops
# everything from the final `__` to recover a mint-site-independent name.
if s.itemId.isBackendMinted:
# A symbol minted during IC codegen (`idGeneratorForBackend`): its idgen
# starts with an EMPTY per-name disamb table, so its `disamb` restarts at 0
# and collides with same-named sem-time symbols loaded from NIFs (two
# `=destroy` hooks both mangling to `_u2` → "conflicting types for ..." in
# the generated C). These symbols never cross a process boundary (nifc
# lifts, emits and compiles them in one run), so the per-module-unique
# item id is a safe and deterministic discriminator; the `_c` marker keeps
# the namespace disjoint from `_u<disamb>`.
result = "_c"
result.addInt s.itemId.item
else:
result = "_u"
# Use `disamb` rather than `itemId.item`: under incremental compilation a
# symbol loaded from a NIF file gets a fresh, load-order-dependent `itemId.item`
# (from the per-module symbol counter), which is neither stable across the
# processes that compile vs. use a module nor guaranteed distinct from another
# loaded symbol's. `disamb` is assigned deterministically per (module, name)
# and, together with the already-prepended mangled name, yields a unique and
# stable C identifier.
result.addInt s.disamb
result.add "__"
result.add graph.ifaces[s.itemId.module].uniqueName