WIP: an API for VM replay global state support

This commit is contained in:
Andreas Rumpf
2018-06-03 20:18:43 +02:00
parent 9ab92824f6
commit b5194f592c
8 changed files with 288 additions and 35 deletions

View File

@@ -634,14 +634,18 @@ type
mNaN, mInf, mNegInf,
mCompileOption, mCompileOptionArg,
mNLen, mNChild, mNSetChild, mNAdd, mNAddMultiple, mNDel,
mNKind, mNSymKind
mNKind, mNSymKind,
mNccValue, mNccInc, mNcsAdd, mNcsIncl, mNcsLen, mNcsAt,
mNctPut, mNctLen, mNctGet, mNctHasNext, mNctNext,
mNIntVal, mNFloatVal, mNSymbol, mNIdent, mNGetType, mNStrVal, mNSetIntVal,
mNSetFloatVal, mNSetSymbol, mNSetIdent, mNSetType, mNSetStrVal, mNLineInfo,
mNNewNimNode, mNCopyNimNode, mNCopyNimTree, mStrToIdent,
mNBindSym, mLocals, mNCallSite,
mEqIdent, mEqNimrodNode, mSameNodeType, mGetImpl,
mEqIdent, mEqNimrodNode, mSameNodeType, mGetImpl, mNGenSym,
mNHint, mNWarning, mNError,
mInstantiationInfo, mGetTypeInfo, mNGenSym,
mInstantiationInfo, mGetTypeInfo,
mNimvm, mIntDefine, mStrDefine, mRunnableExamples,
mException, mBuiltinType

View File

@@ -0,0 +1,49 @@
#
#
# The Nim Compiler
# (c) Copyright 2018 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements helpers for the macro cache.
import lineinfos, ast, modulegraphs, vmdef, magicsys
proc genCall3(g: ModuleGraph; m: TMagic; s: string; a, b, c: PNode): PNode =
newTree(nkStaticStmt, newTree(nkCall, createMagic(g, s, m).newSymNode, a, b, c))
proc genCall2(g: ModuleGraph; m: TMagic; s: string; a, b: PNode): PNode =
newTree(nkStaticStmt, newTree(nkCall, createMagic(g, s, m).newSymNode, a, b))
template nodeFrom(s: string): PNode =
var res = newStrNode(s, info)
res.typ = getSysType(g, info, tyString)
res
template nodeFrom(i: BiggestInt): PNode =
var res = newIntNode(i, info)
res.typ = getSysType(g, info, tyInt)
res
template nodeFrom(n: PNode): PNode = copyTree(n)
template record(call) =
g.recordStmt(g, c.module, call)
proc recordInc*(c: PCtx; info: TLineInfo; key: string; by: BiggestInt) =
let g = c.graph
record genCall2(mNccInc, "inc", nodeFrom key, nodeFrom by)
proc recordPut*(c: PCtx; info: TLineInfo; key: string; k: string; val: PNode) =
let g = c.graph
record genCall3(mNctPut, "[]=", nodeFrom key, nodeFrom k, nodeFrom val)
proc recordAdd*(c: PCtx; info: TLineInfo; key: string; val: PNode) =
let g = c.graph
record genCall2(mNcsAdd, "add", nodeFrom key, nodeFrom val)
proc recordIncl*(c: PCtx; info: TLineInfo; key: string; val: PNode) =
let g = c.graph
record genCall2(mNcsIncl, "incl", nodeFrom key, nodeFrom val)

View File

@@ -10,16 +10,12 @@
## This module implements the new compilation cache.
import strutils, os, intsets, tables, ropes, db_sqlite, msgs, options, types,
renderer, rodutils, idents, astalgo, btrees, magicsys, cgmeth, extccomp
renderer, rodutils, idents, astalgo, btrees, magicsys, cgmeth, extccomp, vm
## Todo:
## - Implement the 'import' replay logic so that the codegen runs over
## dependent modules.
## - Make conditional symbols and the configuration part of a module's
## dependencies.
## - Test multi methods.
## - Implement the limited VM support based on replays.
## - Depencency computation should use *signature* hashes in order to
## dependencies. Also include the rodfile "version".
## - Dependency computation should use *signature* hashes in order to
## avoid recompiling dependent modules.
template db(): DbConn = g.incr.db
@@ -157,8 +153,6 @@ proc encodeLoc(g: ModuleGraph; loc: TLoc, result: var string) =
if loc.lode != nil:
add(result, '^')
encodeNode(g, unknownLineInfo(), loc.lode, result)
#encodeVInt(cast[int32](loc.t.id), result)
#pushType(w, loc.t)
if loc.r != nil:
add(result, '!')
encodeStr($loc.r, result)
@@ -765,11 +759,9 @@ proc loadModuleSymTab(g; module: PSym) =
proc replay(g: ModuleGraph; module: PSym; n: PNode) =
case n.kind
of nkStaticStmt:
#evalStaticStmt()
discard "XXX to implement"
of nkVarSection, nkLetSection:
#setupCompileTimeVar()
discard "XXX to implement"
evalStaticStmt(module, g, n[0], module)
#of nkVarSection, nkLetSection:
# nkVarSections are already covered by the vmgen which produces nkStaticStmt
of nkMethodDef:
methodDef(g, n[namePos].sym, fromCache=true)
of nkCommentStmt:
@@ -798,10 +790,9 @@ proc replay(g: ModuleGraph; module: PSym; n: PNode) =
internalAssert g.config, false
of nkImportStmt:
for x in n:
if x.kind == nkStrLit:
# XXX check that importModuleCallback implements the right logic
let imported = g.importModuleCallback(g, module, fileInfoIdx(g.config, n[0].strVal))
internalAssert g.config, imported.id < 0
internalAssert g.config, x.kind == nkStrLit
let imported = g.importModuleCallback(g, module, fileInfoIdx(g.config, n[0].strVal))
internalAssert g.config, imported.id < 0
of nkStmtList, nkStmtListExpr:
for x in n: replay(g, module, x)
else: discard "nothing to do for this node"

View File

@@ -19,7 +19,7 @@ import ast except getstr
import
strutils, astalgo, msgs, vmdef, vmgen, nimsets, types, passes,
parser, vmdeps, idents, trees, renderer, options, transf, parseutils,
vmmarshal, gorgeimpl, lineinfos
vmmarshal, gorgeimpl, lineinfos, tables, btrees, macrocacheimpl
from semfold import leValueConv, ordinalValToString
from evaltempl import evalTemplate
@@ -1572,6 +1572,95 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame): TFullReg =
var sym = newSym(k.TSymKind, getIdent(c.cache, name), c.module.owner, c.debug[pc])
incl(sym.flags, sfGenSym)
regs[ra].node = newSymNode(sym)
of opcNccValue:
decodeB(rkInt)
let destKey = regs[rb].node.strVal
regs[ra].intVal = getOrDefault(c.cacheCounters, destKey)
of opcNccInc:
let rb = instr.regB
let destKey = regs[ra].node.strVal
let by = regs[rb].intVal
let v = getOrDefault(c.cacheCounters, destKey)
c.cacheCounters[destKey] = v+by
recordInc(c, c.debug[pc], destKey, by)
of opcNcsAdd:
let destKey = regs[ra].node.strVal
let val = regs[instr.regB].node
if not contains(c.cacheSeqs, destKey):
c.cacheSeqs[destKey] = newTree(nkStmtList, val)
# newNodeI(nkStmtList, c.debug[pc])
else:
c.cacheSeqs[destKey].add val
recordAdd(c, c.debug[pc], destKey, val)
of opcNcsIncl:
let destKey = regs[ra].node.strVal
let val = regs[instr.regB].node
if not contains(c.cacheSeqs, destKey):
c.cacheSeqs[destKey] = newTree(nkStmtList, val)
else:
block search:
for existing in c.cacheSeqs[destKey]:
if exprStructuralEquivalent(existing, val, strictSymEquality=true):
break search
c.cacheSeqs[destKey].add val
recordIncl(c, c.debug[pc], destKey, val)
of opcNcsLen:
decodeB(rkInt)
let destKey = regs[rb].node.strVal
regs[ra].intVal =
if contains(c.cacheSeqs, destKey): c.cacheSeqs[destKey].len else: 0
of opcNcsAt:
decodeBC(rkNode)
let idx = regs[rc].intVal
let destKey = regs[rb].node.strVal
if contains(c.cacheSeqs, destKey) and idx <% c.cacheSeqs[destKey].len:
regs[ra].node = c.cacheSeqs[destKey][idx.int]
else:
stackTrace(c, tos, pc, errIndexOutOfBounds)
of opcNctPut:
let destKey = regs[ra].node.strVal
let key = regs[instr.regB].node.strVal
let val = regs[instr.regC].node
if not contains(c.cacheTables, destKey):
c.cacheTables[destKey] = initBTree[string, PNode]()
if not contains(c.cacheTables[destKey], key):
c.cacheTables[destKey].add(key, val)
recordPut(c, c.debug[pc], destKey, key, val)
else:
stackTrace(c, tos, pc, "key already exists: " & key)
of opcNctLen:
decodeB(rkInt)
let destKey = regs[rb].node.strVal
regs[ra].intVal =
if contains(c.cacheTables, destKey): c.cacheTables[destKey].len else: 0
of opcNctGet:
decodeBC(rkNode)
let destKey = regs[rb].node.strVal
let key = regs[rc].node.strVal
if contains(c.cacheTables, destKey):
if contains(c.cacheTables[destKey], key):
regs[ra].node = getOrDefault(c.cacheTables[destKey], key)
else:
stackTrace(c, tos, pc, "key does not exist: " & key)
else:
stackTrace(c, tos, pc, "key does not exist: " & destKey)
of opcNctHasNext:
decodeBC(rkInt)
let destKey = regs[rb].node.strVal
regs[ra].intVal =
btrees.hasNext(c.cacheTables[destKey], regs[rc].intVal.int) else: 0
of opcNctNext:
decodeBC(rkNode)
let destKey = regs[rb].node.strVal
let index = regs[rc].intVal
if contains(c.cacheTables, destKey):
let (k, v, nextIndex) = btrees.next(c.cacheTables[destKey], index.int)
regs[ra].node = newTree(nkTupleConstr, newStrNode(k, c.debug[pc]), v,
newIntNode(nkIntLit, nextIndex))
else:
stackTrace(c, tos, pc, "key does not exist: " & destKey)
of opcTypeTrait:
# XXX only supports 'name' for now; we can use regC to encode the
# type trait operation

View File

@@ -10,7 +10,8 @@
## This module contains the type definitions for the new evaluation engine.
## An instruction is 1-3 int32s in memory, it is a register based VM.
import ast, passes, msgs, idents, intsets, options, modulegraphs, lineinfos
import ast, passes, msgs, idents, intsets, options, modulegraphs, lineinfos,
tables, btrees
const
byteExcess* = 128 # we use excess-K for immediates
@@ -91,6 +92,9 @@ type
opcNSetFloatVal, opcNSetSymbol, opcNSetIdent, opcNSetType, opcNSetStrVal,
opcNNewNimNode, opcNCopyNimNode, opcNCopyNimTree, opcNDel, opcGenSym,
opcNccValue, opcNccInc, opcNcsAdd, opcNcsIncl, opcNcsLen, opcNcsAt,
opcNctPut, opcNctLen, opcNctGet, opcNctHasNext, opcNctNext,
opcSlurp,
opcGorge,
opcParseExprToAst,
@@ -209,6 +213,9 @@ type
config*: ConfigRef
graph*: ModuleGraph
oldErrorCount*: int
cacheSeqs*: Table[string, PNode]
cacheCounters*: Table[string, BiggestInt]
cacheTables*: Table[string, BTree[string, PNode]]
TPosition* = distinct int
@@ -219,7 +226,10 @@ proc newCtx*(module: PSym; cache: IdentCache; g: ModuleGraph): PCtx =
globals: newNode(nkStmtListExpr), constants: newNode(nkStmtList), types: @[],
prc: PProc(blocks: @[]), module: module, loopIterations: MaxLoopIterations,
comesFromHeuristic: unknownLineInfo(), callbacks: @[], errorFlag: "",
cache: cache, config: g.config, graph: g)
cache: cache, config: g.config, graph: g,
cacheSeqs: initTable[string, PNode]()
cacheCounters: initTable[string, BiggestInt]()
cacheTables: initTable[string, BTree[string, PNode]]())
proc refresh*(c: PCtx, module: PSym) =
c.module = module

View File

@@ -804,6 +804,17 @@ proc genIntCast(c: PCtx; n: PNode; dest: var TDest) =
else:
globalError(c.config, n.info, "VM is only allowed to 'cast' between integers of same size")
proc genVoidABC(c: PCtx, n: PNode, dest: TRegister, opcode: TOpcode)
unused(c, n, dest)
var
tmp1 = c.genx(n.sons[1])
tmp2 = c.genx(n.sons[2])
tmp3 = c.genx(n.sons[3])
c.gABC(n, opcode, tmp1, tmp2, tmp3)
c.freeTemp(tmp1)
c.freeTemp(tmp2)
c.freeTemp(tmp3)
proc genMagic(c: PCtx; n: PNode; dest: var TDest; m: TMagic) =
case m
of mAnd: c.genAndOr(n, opcFJmp, dest)
@@ -1067,20 +1078,25 @@ proc genMagic(c: PCtx; n: PNode; dest: var TDest; m: TMagic) =
of mNLen: genUnaryABI(c, n, dest, opcLenSeq, nimNodeFlag)
of mGetImpl: genUnaryABC(c, n, dest, opcGetImpl)
of mNChild: genBinaryABC(c, n, dest, opcNChild)
of mNSetChild, mNDel:
unused(c, n, dest)
var
tmp1 = c.genx(n.sons[1])
tmp2 = c.genx(n.sons[2])
tmp3 = c.genx(n.sons[3])
c.gABC(n, if m == mNSetChild: opcNSetChild else: opcNDel, tmp1, tmp2, tmp3)
c.freeTemp(tmp1)
c.freeTemp(tmp2)
c.freeTemp(tmp3)
of mNSetChild: genVoidABC(c, n, dest, opcNSetChild)
of mNDel: genVoidABC(c, n, dest, opcNDel)
of mNAdd: genBinaryABC(c, n, dest, opcNAdd)
of mNAddMultiple: genBinaryABC(c, n, dest, opcNAddMultiple)
of mNKind: genUnaryABC(c, n, dest, opcNKind)
of mNSymKind: genUnaryABC(c, n, dest, opcNSymKind)
of mNccValue: genUnaryABC(c, n, dest, opcNccValue)
of mNccInc: genBinaryABC(c, n, dest, opcNccInc)
of mNcsAdd: genBinaryABC(c, n, dest, opcNcsAdd
of mNcsIncl: genBinaryABC(c, n, dest, opcNcsIncl
of mNcsLen: genUnaryABC(c, n, dest, opcNcsLen)
of mNcsAt: genBinaryABC(c, n, dest, opcNcsAt)
of mNctPut: genVoidABC(c, n, dest, opcNctPut)
of mNctLen: genUnaryABC(c, n, dest, opcNctLen)
of mNctGet: genBinaryABC(c, n, dest, opcNctGet)
of mNctHasNext: genBinaryABC(c, n, dest, opcNctHasNext)
of mNctNext: genBinaryABC(c, n, dest, opcNctNext)
of mNIntVal: genUnaryABC(c, n, dest, opcNIntVal)
of mNFloatVal: genUnaryABC(c, n, dest, opcNFloatVal)
of mNSymbol: genUnaryABC(c, n, dest, opcNSymbol)
@@ -1795,6 +1811,8 @@ proc gen(c: PCtx; n: PNode; dest: var TDest; flags: TGenFlags = {}) =
if s.magic != mNone:
genMagic(c, n, dest, s.magic)
elif matches(s, "stdlib", "marshal", "to"):
# XXX marshal load&store should not be opcodes, but use the
# general callback mechanisms.
genMarshalLoad(c, n, dest)
elif matches(s, "stdlib", "marshal", "$$"):
genMarshalStore(c, n, dest)

View File

@@ -268,7 +268,52 @@ severe way. Plenty of different solutions have been proposed:
Since we adopt the "replay the top level statements" idea, the natural
solution to this problem is to emit pseudo top level statements that
reflect the mutations done to the global variable.
reflect the mutations done to the global variable. However, this is
MUCH harder than it sounds, for example ``squeaknim`` uses this
snippet:
.. code-block:: nim
apicall.add(") module: '" & dllName & "'>\C" &
"\t^self externalCallFailed\C!\C\C")
stCode.add(st & "\C\t\"Generated by NimSqueak\"\C\t" & apicall)
We can "replay" ``stCode.add`` only if the values of ``st``
and ``apicall`` are known. And even then a hash table's ``add`` with its
hashing mechanism is too hard to replay.
In practice, things are worse still, consider ``someGlobal[i][j].add arg``.
We only know the root is ``someGlobal`` but the concrete path to the data
is unknown as is the value that is added. We could compute a "diff" between
the global states and use that to compute a symbol patchset, but this is
quite some work, expensive to do at runtime (it would need to run after
every module has been compiled) and also would break for hash tables.
We need an API that hides the complex aliasing problems by not relying
on Nim's global variables. The obvious solution is to use string keys
instead of global variables:
.. code-block:: nim
proc cachePut*(key: string; value: string)
proc cacheGet*(key: string): string
However, the values being strings/json is quite problematic: Many
lookup tables that are built at compiletime embed *proc vars* and
types which have no obvious string representation... Seems like
AST diffing is still the best idea as it will not require to use
an alien API and works with some existing Nimble packages, at least.
On the other hand, in Nim's future I would like to replace the VM
by native code. A diff algorithm wouldn't work for that.
Instead the native code would work with an API like ``put``, ``get``:
.. code-block:: nim
proc cachePut*(key: string; value: NimNode)
proc cacheGet*(key: string): NimNode
The API should embrace the AST diffing notion: See the
module ``macrocache`` for the final details.

47
lib/core/macrocache.nim Normal file
View File

@@ -0,0 +1,47 @@
#
#
# Nim's Runtime Library
# (c) Copyright 2018 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module provides an API for macros that need to collect compile
## time information across module boundaries in global variables.
## Starting with version 0.19 of Nim this is not directly supported anymore
## as it breaks incremental compilations.
## Instead the API here needs to be used. See XXX (wikipedia page) for a
## theoretical foundation behind this.
type
CacheSeq* = distinct string
CacheTable* = distinct string
CacheCounter* = distinct string
proc value*(c: CacheCounter): int {.magic: "NccValue".}
proc inc*(c: CacheCounter; by = 1) {.magic: "NccInc".}
proc add*(s: CacheSeq; value: NimNode) {.magic: "NcsAdd".}
proc incl*(s: CacheSeq; value: NimNode) {.magic: "NcsIncl".}
proc len*(s: CacheSeq): int {.magic: "NcsLen".}
proc `[]`*(s: CacheSeq; i: int): NimNode {.magic: "NcsAt".}
iterator items*(s: CacheSeq): NimNode =
for i in 0 ..< len(s): yield s[i]
proc `[]=`*(t: CacheTable; key: string, value: NimNode) {.magic: "NctPut".}
## 'key' has to be unique!
proc len*(t: CacheTable): int {.magic: "NctLen".}
proc `[]`*(t: CacheTable; key: string): NimNode {.magic: "NctGet".}
proc hasNext(t: CacheTable; iter: int): bool {.magic: "NctHasNext".}
proc next(t: CacheTable; iter: int): (string, NimNode, int) {.magic: "NctNext".}
iterator pairs*(t: CacheTable): (string, NimNode) =
var h = 0
while hasNext(t, h):
let (a, b, h2) = next(t, h)
yield (a, b)
h = h2