From 4780b08b9d2b06d20ce45167b2b4d11a816518af Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Sun, 11 Apr 2021 17:37:32 +0200 Subject: [PATCH] IC: integrity checking (#17695) * IC: integrity checking: the plumbing code * progress * progress + bugfix (yes, the code already found a bug) * implemented integrity checking --- compiler/ic/bitabs.nim | 4 + compiler/ic/ic.nim | 3 +- compiler/ic/integrity.nim | 151 ++++++++++++++++++++++++++++++++++++++ compiler/main.nim | 4 +- compiler/semdata.nim | 3 + compiler/semstmts.nim | 2 +- koch.nim | 2 +- testament/categories.nim | 2 +- 8 files changed, 166 insertions(+), 5 deletions(-) create mode 100644 compiler/ic/integrity.nim diff --git a/compiler/ic/bitabs.nim b/compiler/ic/bitabs.nim index 1f75b77592..0bce30b5da 100644 --- a/compiler/ic/bitabs.nim +++ b/compiler/ic/bitabs.nim @@ -36,6 +36,10 @@ const template idToIdx(x: LitId): int = x.int - idStart +proc hasLitId*[T](t: BiTable[T]; x: LitId): bool = + let idx = idToIdx(x) + result = idx >= 0 and idx < t.vals.len + proc enlarge[T](t: var BiTable[T]) = var n: seq[LitId] newSeq(n, len(t.keys) * 2) diff --git a/compiler/ic/ic.nim b/compiler/ic/ic.nim index 46bb63e07a..6769c6ffee 100644 --- a/compiler/ic/ic.nim +++ b/compiler/ic/ic.nim @@ -31,7 +31,7 @@ type moduleFlags: TSymFlags includes: seq[(LitId, string)] # first entry is the module filename itself imports: seq[LitId] # the modules this module depends on - toReplay: PackedTree # pragmas and VM specific state to replay. + toReplay*: PackedTree # pragmas and VM specific state to replay. topLevel*: PackedTree # top level statements bodies*: PackedTree # other trees. Referenced from typ.n and sym.ast by their position. #producedGenerics*: Table[GenericKey, SymId] @@ -167,6 +167,7 @@ proc addExported*(c: var PackedEncoder; m: var PackedModule; s: PSym) = m.exports.add((nameId, s.itemId.item)) proc addConverter*(c: var PackedEncoder; m: var PackedModule; s: PSym) = + assert c.thisModule == s.itemId.module m.converters.add(s.itemId.item) proc addTrmacro*(c: var PackedEncoder; m: var PackedModule; s: PSym) = diff --git a/compiler/ic/integrity.nim b/compiler/ic/integrity.nim new file mode 100644 index 0000000000..b4545de94a --- /dev/null +++ b/compiler/ic/integrity.nim @@ -0,0 +1,151 @@ +# +# +# The Nim Compiler +# (c) Copyright 2021 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Integrity checking for a set of .rod files. +## The set must cover a complete Nim project. + +import std / sets +import ".." / [ast, modulegraphs] +import packed_ast, bitabs, ic + +type + CheckedContext = object + g: ModuleGraph + thisModule: int32 + checkedSyms: HashSet[ItemId] + checkedTypes: HashSet[ItemId] + +proc checkType(c: var CheckedContext; typeId: PackedItemId) +proc checkForeignSym(c: var CheckedContext; symId: PackedItemId) +proc checkNode(c: var CheckedContext; tree: PackedTree; n: NodePos) + +proc checkTypeObj(c: var CheckedContext; typ: PackedType) = + for child in typ.types: + checkType(c, child) + if typ.n != emptyNodeId: + checkNode(c, c.g.packed[c.thisModule].fromDisk.bodies, NodePos typ.n) + if typ.sym != nilItemId: + checkForeignSym(c, typ.sym) + if typ.owner != nilItemId: + checkForeignSym(c, typ.owner) + checkType(c, typ.typeInst) + +proc checkType(c: var CheckedContext; typeId: PackedItemId) = + if typeId == nilItemId: return + let itemId = translateId(typeId, c.g.packed, c.thisModule, c.g.config) + if not c.checkedTypes.containsOrIncl(itemId): + let oldThisModule = c.thisModule + c.thisModule = itemId.module + checkTypeObj c, c.g.packed[itemId.module].fromDisk.sh.types[itemId.item] + c.thisModule = oldThisModule + +proc checkSym(c: var CheckedContext; s: PackedSym) = + if s.name != LitId(0): + assert c.g.packed[c.thisModule].fromDisk.sh.strings.hasLitId s.name + checkType c, s.typ + if s.ast != emptyNodeId: + checkNode(c, c.g.packed[c.thisModule].fromDisk.bodies, NodePos s.ast) + if s.owner != nilItemId: + checkForeignSym(c, s.owner) + +proc checkLocalSym(c: var CheckedContext; item: int32) = + let itemId = ItemId(module: c.thisModule, item: item) + if not c.checkedSyms.containsOrIncl(itemId): + checkSym c, c.g.packed[c.thisModule].fromDisk.sh.syms[item] + +proc checkForeignSym(c: var CheckedContext; symId: PackedItemId) = + let itemId = translateId(symId, c.g.packed, c.thisModule, c.g.config) + if not c.checkedSyms.containsOrIncl(itemId): + let oldThisModule = c.thisModule + c.thisModule = itemId.module + checkSym c, c.g.packed[itemId.module].fromDisk.sh.syms[itemId.item] + c.thisModule = oldThisModule + +proc checkNode(c: var CheckedContext; tree: PackedTree; n: NodePos) = + if tree[n.int].typeId != nilItemId: + checkType(c, tree[n.int].typeId) + case n.kind + of nkEmpty, nkNilLit, nkType, nkNilRodNode: + discard + of nkIdent: + assert c.g.packed[c.thisModule].fromDisk.sh.strings.hasLitId n.litId + of nkSym: + checkLocalSym(c, tree.nodes[n.int].operand) + of directIntLit: + discard + of externIntLit: + assert c.g.packed[c.thisModule].fromDisk.sh.integers.hasLitId n.litId + of nkStrLit..nkTripleStrLit: + assert c.g.packed[c.thisModule].fromDisk.sh.strings.hasLitId n.litId + of nkFloatLit..nkFloat128Lit: + assert c.g.packed[c.thisModule].fromDisk.sh.floats.hasLitId n.litId + of nkModuleRef: + let (n1, n2) = sons2(tree, n) + assert n1.kind == nkInt32Lit + assert n2.kind == nkInt32Lit + checkForeignSym(c, PackedItemId(module: n1.litId, item: tree.nodes[n2.int].operand)) + else: + for n0 in sonsReadonly(tree, n): + checkNode(c, tree, n0) + +proc checkTree(c: var CheckedContext; t: PackedTree) = + for p in allNodes(t): checkNode(c, t, p) + +proc checkLocalSymIds(c: var CheckedContext; m: PackedModule; symIds: seq[int32]) = + for symId in symIds: + assert symId >= 0 and symId < m.sh.syms.len, $symId & " " & $m.sh.syms.len + +proc checkModule(c: var CheckedContext; m: PackedModule) = + # We check that: + # - Every symbol references existing types and symbols. + # - Every tree node references existing types and symbols. + for i in 0..high(m.sh.syms): + checkLocalSym c, int32(i) + + checkTree c, m.toReplay + checkTree c, m.topLevel + + for e in m.exports: + assert e[1] >= 0 and e[1] < m.sh.syms.len + assert e[0] == m.sh.syms[e[1]].name + + for e in m.compilerProcs: + assert e[1] >= 0 and e[1] < m.sh.syms.len + assert e[0] == m.sh.syms[e[1]].name + + checkLocalSymIds c, m, m.converters + checkLocalSymIds c, m, m.methods + checkLocalSymIds c, m, m.trmacros + checkLocalSymIds c, m, m.pureEnums + #[ + To do: Check all these fields: + + reexports*: seq[(LitId, PackedItemId)] + macroUsages*: seq[(PackedItemId, PackedLineInfo)] + + typeInstCache*: seq[(PackedItemId, PackedItemId)] + procInstCache*: seq[PackedInstantiation] + attachedOps*: seq[(TTypeAttachedOp, PackedItemId, PackedItemId)] + methodsPerType*: seq[(PackedItemId, int, PackedItemId)] + enumToStringProcs*: seq[(PackedItemId, PackedItemId)] + ]# + +proc checkIntegrity*(g: ModuleGraph) = + var c = CheckedContext(g: g) + for i in 0..high(g.packed): + # case statement here to enforce exhaustive checks. + case g.packed[i].status + of undefined: + discard "nothing to do" + of loading: + assert false, "cannot check integrity: Module still loading" + of stored, storing, outdated, loaded: + c.thisModule = int32 i + checkModule(c, g.packed[i].fromDisk) + diff --git a/compiler/main.nim b/compiler/main.nim index 26a19063ff..3c50185461 100644 --- a/compiler/main.nim +++ b/compiler/main.nim @@ -21,7 +21,7 @@ import modules, modulegraphs, tables, lineinfos, pathutils, vmprofiler -import ic / cbackend +import ic / [cbackend, integrity] from ic / ic import rodViewer when not defined(leanCompiler): @@ -93,6 +93,8 @@ proc commandCompileToC(graph: ModuleGraph) = if conf.symbolFiles == disabledSf: cgenWriteModules(graph.backend, conf) else: + if isDefined(conf, "nimIcIntegrityChecks"): + checkIntegrity(graph) generateCode(graph) # graph.backend can be nil under IC when nothing changed at all: if graph.backend != nil: diff --git a/compiler/semdata.nim b/compiler/semdata.nim index 51b2cdea16..daf1265e87 100644 --- a/compiler/semdata.nim +++ b/compiler/semdata.nim @@ -341,6 +341,9 @@ proc addConverter*(c: PContext, conv: LazySym) = assert conv.sym != nil if inclSym(c.converters, conv.sym): add(c.graph.ifaces[c.module.position].converters, conv) + +proc addConverterDef*(c: PContext, conv: LazySym) = + addConverter(c, conv) if c.config.symbolFiles != disabledSf: addConverter(c.encoder, c.packedRepr, conv.sym) diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim index bbfd49e2a6..4c6a745f65 100644 --- a/compiler/semstmts.nim +++ b/compiler/semstmts.nim @@ -2093,7 +2093,7 @@ proc semConverterDef(c: PContext, n: PNode): PNode = var t = s.typ if t[0] == nil: localError(c.config, n.info, errXNeedsReturnType % "converter") if t.len != 2: localError(c.config, n.info, "a converter takes exactly one argument") - addConverter(c, LazySym(sym: s)) + addConverterDef(c, LazySym(sym: s)) proc semMacroDef(c: PContext, n: PNode): PNode = checkSonsLen(n, bodyPos + 1, c.config) diff --git a/koch.nim b/koch.nim index 3b00d3564b..44d38589be 100644 --- a/koch.nim +++ b/koch.nim @@ -492,7 +492,7 @@ proc icTest(args: string) = for fragment in content.split("#!EDIT!#"): let file = inp.replace(".nim", "_temp.nim") writeFile(file, fragment) - var cmd = nimExe & " cpp --ic:on --listcmd " + var cmd = nimExe & " cpp --ic:on -d:nimIcIntegrityChecks --listcmd " if i == 0: cmd.add "-f " cmd.add quoteShell(file) diff --git a/testament/categories.nim b/testament/categories.nim index af6331dde3..6180462871 100644 --- a/testament/categories.nim +++ b/testament/categories.nim @@ -488,7 +488,7 @@ proc icTests(r: var TResults; testsDir: string, cat: Category, options: string) tooltests = ["compiler/nim.nim", "tools/nimgrep.nim"] writeOnly = " --incremental:writeonly " readOnly = " --incremental:readonly " - incrementalOn = " --incremental:on " + incrementalOn = " --incremental:on -d:nimIcIntegrityChecks " template test(x: untyped) = testSpecWithNimcache(r, makeRawTest(file, x & options, cat), nimcache)