Compiler plugin for implementing incremental computation in user space (#10819)

This plugin provides essential building block for implementing incremental computations in your programs. The idea behind incremental computations is that if you do the same calculation multiple times but with slightly different inputs you don't have to recompute everything from scratch. Also you don't want to adopt special algorithms either, you would like to write your code in standard from scratch manner and get incrementality for free when it is possible.

The plugin computes the digest of the proc bodies, recursively hashing all called procs as well . Such digest with the digest of the argument values gives a good "name" for the result. Terminology loosely follows paper "Incremental Computation with Names" link below. It works well if you have no side effects in your computations. If you have global state in your computations then you will need problem specific workarounds to represent global state in set of "names" . SideEffect tracking in Nim also useful in this topic.

Classical examples:

Dashboard with ticking data. New data arrives non stop and you would like to update the dashboard recomputing only changed outputs.
Excel spreadsheet where user changes one cell and you would like to recompute all cells that are affected by the change, but do not want to recompute every cell in the spreadsheet.
This commit is contained in:
cooldome
2019-04-11 22:09:11 +01:00
committed by Andreas Rumpf
parent de02fd0b89
commit 041d15392a
14 changed files with 387 additions and 82 deletions

View File

@@ -12,7 +12,7 @@
import
ast, astalgo, ropes, options, strutils, nimlexbase, msgs, cgendata, rodutils,
intsets, platform, llstream, tables, sighashes, pathutils
intsets, platform, llstream, tables, sighashes, modulegraphs, pathutils
# Careful! Section marks need to contain a tabulator so that they cannot
# be part of C string literals.

View File

@@ -11,7 +11,7 @@
# ------------------------- Name Mangling --------------------------------
import sighashes
import sighashes, modulegraphs
from lowerings import createObj
proc genProcHeader(m: BModule, prc: PSym, asPtr: bool = false): Rope

View File

@@ -11,9 +11,7 @@
import
ast, astalgo, ropes, passes, options, intsets, platform, sighashes,
tables, ndi, lineinfos, pathutils
from modulegraphs import ModuleGraph, PPassContext
tables, ndi, lineinfos, pathutils, modulegraphs
type
TLabel* = Rope # for the C generator a label is just a rope

View File

@@ -32,7 +32,9 @@ import
ast, astalgo, strutils, hashes, trees, platform, magicsys, extccomp, options,
nversion, nimsets, msgs, std / sha1, bitsets, idents, types, os, tables,
times, ropes, math, passes, ccgutils, wordrecg, renderer,
intsets, cgmeth, lowerings, sighashes, lineinfos, rodutils, pathutils, transf
intsets, cgmeth, lowerings, sighashes, modulegraphs, lineinfos, rodutils,
pathutils, transf
from modulegraphs import ModuleGraph, PPassContext

View File

@@ -26,9 +26,11 @@
##
import ast, intsets, tables, options, lineinfos, hashes, idents,
incremental, btrees, sighashes
incremental, btrees, md5
type
SigHash* = distinct Md5Digest
ModuleGraph* = ref object
modules*: seq[PSym] ## indexed by int32 fileIdx
packageSyms*: TStrTable
@@ -58,6 +60,7 @@ type
emptyNode*: PNode
incr*: IncrementalCtx
canonTypes*: Table[SigHash, PType]
symBodyHashes*: Table[int, SigHash] # symId to digest mapping
importModuleCallback*: proc (graph: ModuleGraph; m: PSym, fileIdx: FileIndex): PSym {.nimcall.}
includeFileCallback*: proc (graph: ModuleGraph; m: PSym, fileIdx: FileIndex): PNode {.nimcall.}
recordStmt*: proc (graph: ModuleGraph; m: PSym; n: PNode) {.nimcall.}
@@ -81,6 +84,52 @@ type
close: TPassClose,
isFrontend: bool]
const
cb64 = [
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
"o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9a",
"9b", "9c"]
proc toBase64a(s: cstring, len: int): string =
## encodes `s` into base64 representation.
result = newStringOfCap(((len + 2) div 3) * 4)
result.add '_'
var i = 0
while i < len - 2:
let a = ord(s[i])
let b = ord(s[i+1])
let c = ord(s[i+2])
result.add cb64[a shr 2]
result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)]
result.add cb64[c and 0x3F]
inc(i, 3)
if i < len-1:
let a = ord(s[i])
let b = ord(s[i+1])
result.add cb64[a shr 2]
result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
result.add cb64[((b and 0x0F) shl 2)]
elif i < len:
let a = ord(s[i])
result.add cb64[a shr 2]
result.add cb64[(a and 3) shl 4]
proc `$`*(u: SigHash): string =
toBase64a(cast[cstring](unsafeAddr u), sizeof(u))
proc `==`*(a, b: SigHash): bool =
result = equalMem(unsafeAddr a, unsafeAddr b, sizeof(a))
proc hash*(u: SigHash): Hash =
result = 0
for x in 0..3:
result = (result shl 8) or u.MD5Digest[x].int
proc hash*(x: FileIndex): Hash {.borrow.}
when defined(nimfind):
@@ -140,6 +189,7 @@ proc newModuleGraph*(cache: IdentCache; config: ConfigRef): ModuleGraph =
result.cacheCounters = initTable[string, BiggestInt]()
result.cacheTables = initTable[string, BTree[string, PNode]]()
result.canonTypes = initTable[SigHash, PType]()
result.symBodyHashes = initTable[int, SigHash]()
proc resetAllModules*(g: ModuleGraph) =
initStrTable(g.packageSyms)

View File

@@ -9,81 +9,26 @@
## Computes hash values for routine (proc, method etc) signatures.
import ast, md5, tables, ropes
import ast, tables, ropes, md5, modulegraphs
from hashes import Hash
from astalgo import debug
import types
from strutils import startsWith, contains
when false:
type
SigHash* = uint32 ## a hash good enough for a filename or a proc signature
proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1)
proc `&=`(c: var MD5Context, r: Rope) =
for l in leaves(r): md5Update(c, l, l.len)
proc `&=`(c: var MD5Context, i: BiggestInt) =
md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
proc `&=`(c: var MD5Context, f: BiggestFloat) =
md5Update(c, cast[cstring](unsafeAddr f), sizeof(f))
proc `&=`(c: var MD5Context, s: SigHash) =
md5Update(c, cast[cstring](unsafeAddr s), sizeof(s))
template lowlevel(v) =
md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
proc sdbmHash(hash: SigHash, c: char): SigHash {.inline.} =
return SigHash(c) + (hash shl 6) + (hash shl 16) - hash
template `&=`*(x: var SigHash, c: char) = x = sdbmHash(x, c)
template `&=`*(x: var SigHash, s: string) =
for c in s: x = sdbmHash(x, c)
else:
type
SigHash* = distinct Md5Digest
const
cb64 = [
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
"o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9a",
"9b", "9c"]
proc toBase64a(s: cstring, len: int): string =
## encodes `s` into base64 representation.
result = newStringOfCap(((len + 2) div 3) * 4)
result.add '_'
var i = 0
while i < len - 2:
let a = ord(s[i])
let b = ord(s[i+1])
let c = ord(s[i+2])
result.add cb64[a shr 2]
result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)]
result.add cb64[c and 0x3F]
inc(i, 3)
if i < len-1:
let a = ord(s[i])
let b = ord(s[i+1])
result.add cb64[a shr 2]
result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
result.add cb64[((b and 0x0F) shl 2)]
elif i < len:
let a = ord(s[i])
result.add cb64[a shr 2]
result.add cb64[(a and 3) shl 4]
proc `$`*(u: SigHash): string =
toBase64a(cast[cstring](unsafeAddr u), sizeof(u))
proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len)
proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1)
proc `&=`(c: var MD5Context, r: Rope) =
for l in leaves(r): md5Update(c, l, l.len)
proc `&=`(c: var MD5Context, i: BiggestInt) =
md5Update(c, cast[cstring](unsafeAddr i), sizeof(i))
template lowlevel(v) =
md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v))
proc `==`*(a, b: SigHash): bool =
# {.borrow.}
result = equalMem(unsafeAddr a, unsafeAddr b, sizeof(a))
proc hash*(u: SigHash): Hash =
result = 0
for x in 0..3:
result = (result shl 8) or u.MD5Digest[x].int
type
ConsiderFlag* = enum
CoProc
@@ -359,6 +304,75 @@ proc sigHash*(s: PSym): SigHash =
else:
result = hashNonProc(s)
proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash
proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode)
proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) =
assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}
if sfGlobal notin s.flags:
c &= char(s.kind)
c &= s.name.s
else:
c &= hashNonProc(s)
# this one works for let and const but not for var. True variables can change value
# later on. it is user resposibility to hash his global state if required
if s.ast != nil and s.ast.kind == nkIdentDefs:
hashBodyTree(graph, c, s.ast[^1])
else:
hashBodyTree(graph, c, s.ast)
proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) =
# hash Nim tree recursing into simply
if n == nil:
c &= "nil"
return
c &= char(n.kind)
case n.kind
of nkEmpty, nkNilLit, nkType: discard
of nkIdent:
c &= n.ident.s
of nkSym:
if n.sym.kind in skProcKinds:
c &= symBodyDigest(graph, n.sym)
elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}:
hashVarSymBody(graph, c, n.sym)
else:
c &= hashNonProc(n.sym)
of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef:
discard # we track usage of proc symbols not their definition
of nkCharLit..nkUInt64Lit:
c &= n.intVal
of nkFloatLit..nkFloat64Lit:
c &= n.floatVal
of nkStrLit..nkTripleStrLit:
c &= n.strVal
else:
for i in 0..<n.len:
hashTree(c, n.sons[i])
proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash =
## compute unique digest of the proc/func/method symbols
## recursing into invoked symbols as well
assert(sym.kind in skProcKinds, $sym.kind)
graph.symBodyHashes.withValue(sym.id, value):
return value[]
var c: MD5Context
md5Init(c)
c.hashType(sym.typ, {CoProc})
c &= char(sym.kind)
c.md5Final(result.Md5Digest)
graph.symBodyHashes[sym.id] = result # protect from recursion in the body
if sym.ast != nil:
md5Init(c)
c.md5Update(cast[cstring](result.addr), sizeof(result))
c.hashTree(sym.ast[bodyPos])
c.md5Final(result.Md5Digest)
graph.symBodyHashes[sym.id] = result
proc idOrSig*(s: PSym, currentModule: string,
sigCollisions: var CountTable[SigHash]): Rope =
if s.kind in routineKinds and s.typ != nil:

View File

@@ -16,13 +16,11 @@ import
strutils, astalgo, msgs, vmdef, vmgen, nimsets, types, passes,
parser, vmdeps, idents, trees, renderer, options, transf, parseutils,
vmmarshal, gorgeimpl, lineinfos, tables, btrees, macrocacheimpl,
sighashes
modulegraphs, sighashes
from semfold import leValueConv, ordinalValToString
from evaltempl import evalTemplate
from modulegraphs import ModuleGraph, PPassContext
const
traceCode = defined(nimVMDebug)

View File

@@ -14,6 +14,8 @@ from math import sqrt, ln, log10, log2, exp, round, arccos, arcsin,
floor, ceil, `mod`
from os import getEnv, existsEnv, dirExists, fileExists, putEnv, walkDir, getAppFilename
from md5 import getMD5
from sighashes import symBodyDigest
template mathop(op) {.dirty.} =
registerCallback(c, "stdlib.math." & astToStr(op), `op Wrapper`)
@@ -24,9 +26,15 @@ template osop(op) {.dirty.} =
template systemop(op) {.dirty.} =
registerCallback(c, "stdlib.system." & astToStr(op), `op Wrapper`)
template ioop(op) {.dirty.} =
registerCallback(c, "stdlib.io." & astToStr(op), `op Wrapper`)
template macrosop(op) {.dirty.} =
registerCallback(c, "stdlib.macros." & astToStr(op), `op Wrapper`)
template md5op(op) {.dirty.} =
registerCallback(c, "stdlib.md5." & astToStr(op), `op Wrapper`)
template wrap1f_math(op) {.dirty.} =
proc `op Wrapper`(a: VmArgs) {.nimcall.} =
setResult(a, op(getFloat(a, 0)))
@@ -52,6 +60,11 @@ template wrap2s(op, modop) {.dirty.} =
setResult(a, op(getString(a, 0), getString(a, 1)))
modop op
template wrap2si(op, modop) {.dirty.} =
proc `op Wrapper`(a: VmArgs) {.nimcall.} =
setResult(a, op(getString(a, 0), getInt(a, 1)))
modop op
template wrap1svoid(op, modop) {.dirty.} =
proc `op Wrapper`(a: VmArgs) {.nimcall.} =
op(getString(a, 0))
@@ -62,9 +75,6 @@ template wrap2svoid(op, modop) {.dirty.} =
op(getString(a, 0), getString(a, 1))
modop op
template ioop(op) {.dirty.} =
registerCallback(c, "stdlib.io." & astToStr(op), `op Wrapper`)
proc getCurrentExceptionMsgWrapper(a: VmArgs) {.nimcall.} =
setResult(a, if a.currentException.isNil: ""
else: a.currentException.sons[3].skipColon.strVal)
@@ -106,6 +116,8 @@ proc registerAdditionalOps*(c: PCtx) =
wrap1f_math(floor)
wrap1f_math(ceil)
wrap1s(getMD5, md5op)
proc `mod Wrapper`(a: VmArgs) {.nimcall.} =
setResult(a, `mod`(getFloat(a, 0), getFloat(a, 1)))
registerCallback(c, "stdlib.math.mod", `mod Wrapper`)
@@ -118,6 +130,7 @@ proc registerAdditionalOps*(c: PCtx) =
wrap1s(fileExists, osop)
wrap2svoid(writeFile, ioop)
wrap1s(readFile, ioop)
wrap2si(readLines, ioop)
systemop getCurrentExceptionMsg
registerCallback c, "stdlib.*.staticWalkDir", proc (a: VmArgs) {.nimcall.} =
setResult(a, staticWalkDirImpl(getString(a, 0), getBool(a, 1)))
@@ -126,3 +139,8 @@ proc registerAdditionalOps*(c: PCtx) =
registerCallback c, "stdlib.os.getCurrentCompilerExe", proc (a: VmArgs) {.nimcall.} =
setResult(a, getAppFilename())
registerCallback c, "stdlib.macros.symBodyHash", proc (a: VmArgs) {.nimcall.} =
let n = getNode(a, 0)
if n.kind != nkSym: raise newException(ValueError, "node is not a symbol")
setResult(a, $symBodyDigest(c.graph, n.sym))

View File

@@ -360,6 +360,13 @@ when defined(nimHasSignatureHashInMacro):
## the owning module of the symbol and others. The same identifier is
## used in the back-end to produce the mangled symbol name.
proc symBodyHash*(s: NimNode): string {.noSideEffect.} =
## Returns a stable digest for symbols derived not only from type signature
## and owning module, but also implementation body. All procs/varibles used in
## the implementation of this symbol are hashed recursively as well, including
## magics from system module.
discard
proc getTypeImpl*(n: typedesc): NimNode {.magic: "NGetType", noSideEffect.}
## Version of ``getTypeImpl`` which takes a ``typedesc``.

View File

@@ -197,7 +197,9 @@ proc `$`*(d: MD5Digest): string =
add(result, digits[d[i].int and 0xF])
proc getMD5*(s: string): string =
## Computes an MD5 value of `s` and returns its string representation.
## Computes an MD5 value of `s` and returns its string representation.
## .. note::
## available at compile time
##
## See also:
## * `toMD5 proc <#toMD5,string>`_ which returns the `MD5Digest` of a string

View File

@@ -612,6 +612,25 @@ proc writeFile*(filename, content: string) {.tags: [WriteIOEffect], benign.} =
else:
sysFatal(IOError, "cannot open: " & filename)
proc readLines*(filename: string, n = 1.Natural): seq[TaintedString] =
## read `n` lines from the file named `filename`. Raises an IO exception
## in case of an error. Raises EOF if file does not contain at least `n` lines.
## Available at compile time. A line of text may be delimited by ``LF`` or ``CRLF``.
## The newline character(s) are not part of the returned strings.
var f: File
if open(f, filename):
try:
result = newSeq[TaintedString](n)
for i in 0 .. n - 1:
if not readLine(f, result[i]):
raiseEOF()
finally:
close(f)
else:
sysFatal(IOError, "cannot open: " & filename)
iterator lines*(filename: string): TaintedString {.tags: [ReadIOEffect].} =
## Iterates over any line in the file named `filename`.
##

View File

@@ -0,0 +1,150 @@
discard """
output: '''heavy_calc_impl is called
sub_calc1_impl is called
sub_calc2_impl is called
** no changes recompute effectively
** change one input and recompute effectively
heavy_calc_impl is called
sub_calc2_impl is called'''
"""
# sample incremental
import tables
import macros
var inputs = initTable[string, float]()
var cache = initTable[string, float]()
var dep_tree {.compileTime.} = initTable[string, string]()
macro symHash(s: typed{nkSym}): string =
result = newStrLitNode(symBodyHash(s))
#######################################################################################
template graph_node(key: string) {.pragma.}
proc tag(n: NimNode): NimNode =
## returns graph node unique name of a function or nil if it is not a graph node
expectKind(n, {nnkProcDef, nnkFuncDef})
for p in n.pragma:
if p.len > 0 and p[0] == bindSym"graph_node":
return p[1]
return nil
macro graph_node_key(n: typed{nkSym}): untyped =
result = newStrLitNode(n.symBodyHash)
macro graph_discovery(n: typed{nkSym}): untyped =
# discovers graph dependency tree and updated dep_tree global var
let mytag = newStrLitNode(n.symBodyHash)
var visited: seq[NimNode]
proc discover(n: NimNode) =
case n.kind:
of nnkNone..pred(nnkSym), succ(nnkSym)..nnkNilLit: discard
of nnkSym:
if n.symKind in {nskFunc, nskProc}:
if n notin visited:
visited.add n
let tag = n.getImpl.tag
if tag != nil:
dep_tree[tag.strVal] = mytag.strVal
else:
discover(n.getImpl.body)
else:
for child in n:
discover(child)
discover(n.getImpl.body)
result = newEmptyNode()
#######################################################################################
macro incremental_input(key: static[string], n: untyped{nkFuncDef}): untyped =
# mark leaf nodes of the graph
template getInput(key) {.dirty.} =
{.noSideEffect.}:
inputs[key]
result = n
result.pragma = nnkPragma.newTree(nnkCall.newTree(bindSym"graph_node", newStrLitNode(key)))
result.body = getAst(getInput(key))
macro incremental(n: untyped{nkFuncDef}): untyped =
## incrementalize side effect free computation
## wraps function into caching layer, mark caching function as a graph_node
## injects dependency discovery between graph nodes
template cache_func_body(func_name, func_name_str, func_call) {.dirty.} =
{.noSideEffect.}:
graph_discovery(func_name)
let key = graph_node_key(func_name)
if key in cache:
result = cache[key]
else:
echo func_name_str & " is called"
result = func_call
cache[key] = result
let func_name = n.name.strVal & "_impl"
let func_call = nnkCall.newTree(ident func_name)
for i in 1..<n.params.len:
func_call.add n.params[i][0]
let cache_func = n.copyNimTree
cache_func.body = getAst(cache_func_body(ident func_name, func_name, func_call))
cache_func.pragma = nnkPragma.newTree(newCall(bindSym"graph_node",
newCall(bindSym"symHash", ident func_name)))
n.name = ident(func_name)
result = nnkStmtList.newTree(n, cache_func)
###########################################################################
### Example
###########################################################################
func input1(): float {.incremental_input("a1").}
func input2(): float {.incremental_input("a2").}
func sub_calc1(a: float): float {.incremental.} =
a + input1()
func sub_calc2(b: float): float {.incremental.} =
b + input2()
func heavy_calc(a: float, b: float): float {.incremental.} =
sub_calc1(a) + sub_calc2(b)
###########################################################################
## graph finalize and inputs
###########################################################################
macro finalize_dep_tree(): untyped =
result = nnkTableConstr.newNimNode
for key, val in dep_tree:
result.add nnkExprColonExpr.newTree(newStrLitNode key, newStrLitNode val)
result = nnkCall.newTree(bindSym"toTable", result)
const dep_tree_final = finalize_dep_tree()
proc set_input(key: string, val: float) =
## set input value
## all affected nodes of graph are invalidated
inputs[key] = val
var k = key
while k != "":
k = dep_tree_final.getOrDefault(k , "")
cache.del(k)
###########################################################################
## demo
###########################################################################
set_input("a1", 5)
set_input("a2", 2)
discard heavy_calc(5.0, 10.0)
echo "** no changes recompute effectively"
discard heavy_calc(5.0, 10.0)
echo "** change one input and recompute effectively"
set_input("a2", 10)
discard heavy_calc(5.0, 10.0)

27
tests/vm/tfile_rw.nim Normal file
View File

@@ -0,0 +1,27 @@
discard """
output: '''ok'''
"""
# test file read write in vm
import os, strutils
const filename = splitFile(currentSourcePath).dir / "tfile_rw.txt"
const mytext = "line1\nline2\nline3"
static:
writeFile(filename, mytext)
const myfile_str = staticRead(filename)
const myfile_str2 = readFile(filename)
const myfile_str_seq = readLines(filename, 3)
static:
doAssert myfile_str == mytext
doAssert myfile_str2 == mytext
doAssert myfile_str_seq[0] == "line1"
doAssert myfile_str_seq[1] == "line2"
doAssert myfile_str_seq.join("\n") == mytext
removeFile(filename)
echo "ok"

View File

@@ -0,0 +1,20 @@
# test sym digest is computable at compile time
import macros, algorithm
import md5
macro testmacro(s: typed{nkSym}): string =
let s = getMD5(signaturehash(s) & " - " & symBodyHash(s))
result = newStrLitNode(s)
macro testmacro(s: typed{nkOpenSymChoice|nkClosedSymChoice}): string =
var str = ""
for sym in s:
str &= symBodyHash(sym)
result = newStrLitNode(getMD5(str))
# something recursive and/or generic
discard testmacro(testmacro)
discard testmacro(`[]`)
discard testmacro(binarySearch)
discard testmacro(sort)