From 041d15392aaf732665abab290f0cf5993d909efc Mon Sep 17 00:00:00 2001 From: cooldome Date: Thu, 11 Apr 2019 22:09:11 +0100 Subject: [PATCH] Compiler plugin for implementing incremental computation in user space (#10819) This plugin provides essential building block for implementing incremental computations in your programs. The idea behind incremental computations is that if you do the same calculation multiple times but with slightly different inputs you don't have to recompute everything from scratch. Also you don't want to adopt special algorithms either, you would like to write your code in standard from scratch manner and get incrementality for free when it is possible. The plugin computes the digest of the proc bodies, recursively hashing all called procs as well . Such digest with the digest of the argument values gives a good "name" for the result. Terminology loosely follows paper "Incremental Computation with Names" link below. It works well if you have no side effects in your computations. If you have global state in your computations then you will need problem specific workarounds to represent global state in set of "names" . SideEffect tracking in Nim also useful in this topic. Classical examples: Dashboard with ticking data. New data arrives non stop and you would like to update the dashboard recomputing only changed outputs. Excel spreadsheet where user changes one cell and you would like to recompute all cells that are affected by the change, but do not want to recompute every cell in the spreadsheet. --- compiler/ccgmerge.nim | 2 +- compiler/ccgtypes.nim | 2 +- compiler/cgendata.nim | 4 +- compiler/jsgen.nim | 4 +- compiler/modulegraphs.nim | 52 +++++++++++- compiler/sighashes.nim | 150 +++++++++++++++++++--------------- compiler/vm.nim | 4 +- compiler/vmops.nim | 24 +++++- lib/core/macros.nim | 7 ++ lib/pure/md5.nim | 4 +- lib/system/io.nim | 19 +++++ tests/macros/tincremental.nim | 150 ++++++++++++++++++++++++++++++++++ tests/vm/tfile_rw.nim | 27 ++++++ tests/vm/tsignaturehash.nim | 20 +++++ 14 files changed, 387 insertions(+), 82 deletions(-) create mode 100644 tests/macros/tincremental.nim create mode 100644 tests/vm/tfile_rw.nim create mode 100644 tests/vm/tsignaturehash.nim diff --git a/compiler/ccgmerge.nim b/compiler/ccgmerge.nim index 3aca741cc6..a0aa1b05d0 100644 --- a/compiler/ccgmerge.nim +++ b/compiler/ccgmerge.nim @@ -12,7 +12,7 @@ import ast, astalgo, ropes, options, strutils, nimlexbase, msgs, cgendata, rodutils, - intsets, platform, llstream, tables, sighashes, pathutils + intsets, platform, llstream, tables, sighashes, modulegraphs, pathutils # Careful! Section marks need to contain a tabulator so that they cannot # be part of C string literals. diff --git a/compiler/ccgtypes.nim b/compiler/ccgtypes.nim index bccb59b6d6..fb0f7dbf40 100644 --- a/compiler/ccgtypes.nim +++ b/compiler/ccgtypes.nim @@ -11,7 +11,7 @@ # ------------------------- Name Mangling -------------------------------- -import sighashes +import sighashes, modulegraphs from lowerings import createObj proc genProcHeader(m: BModule, prc: PSym, asPtr: bool = false): Rope diff --git a/compiler/cgendata.nim b/compiler/cgendata.nim index 9d12a5740f..4cd66b3330 100644 --- a/compiler/cgendata.nim +++ b/compiler/cgendata.nim @@ -11,9 +11,7 @@ import ast, astalgo, ropes, passes, options, intsets, platform, sighashes, - tables, ndi, lineinfos, pathutils - -from modulegraphs import ModuleGraph, PPassContext + tables, ndi, lineinfos, pathutils, modulegraphs type TLabel* = Rope # for the C generator a label is just a rope diff --git a/compiler/jsgen.nim b/compiler/jsgen.nim index b1a7d12f31..5f93cbbbd6 100644 --- a/compiler/jsgen.nim +++ b/compiler/jsgen.nim @@ -32,7 +32,9 @@ import ast, astalgo, strutils, hashes, trees, platform, magicsys, extccomp, options, nversion, nimsets, msgs, std / sha1, bitsets, idents, types, os, tables, times, ropes, math, passes, ccgutils, wordrecg, renderer, - intsets, cgmeth, lowerings, sighashes, lineinfos, rodutils, pathutils, transf + intsets, cgmeth, lowerings, sighashes, modulegraphs, lineinfos, rodutils, + pathutils, transf + from modulegraphs import ModuleGraph, PPassContext diff --git a/compiler/modulegraphs.nim b/compiler/modulegraphs.nim index 82b9750b6f..1ed939e7b8 100644 --- a/compiler/modulegraphs.nim +++ b/compiler/modulegraphs.nim @@ -26,9 +26,11 @@ ## import ast, intsets, tables, options, lineinfos, hashes, idents, - incremental, btrees, sighashes + incremental, btrees, md5 type + SigHash* = distinct Md5Digest + ModuleGraph* = ref object modules*: seq[PSym] ## indexed by int32 fileIdx packageSyms*: TStrTable @@ -58,6 +60,7 @@ type emptyNode*: PNode incr*: IncrementalCtx canonTypes*: Table[SigHash, PType] + symBodyHashes*: Table[int, SigHash] # symId to digest mapping importModuleCallback*: proc (graph: ModuleGraph; m: PSym, fileIdx: FileIndex): PSym {.nimcall.} includeFileCallback*: proc (graph: ModuleGraph; m: PSym, fileIdx: FileIndex): PNode {.nimcall.} recordStmt*: proc (graph: ModuleGraph; m: PSym; n: PNode) {.nimcall.} @@ -81,6 +84,52 @@ type close: TPassClose, isFrontend: bool] + +const + cb64 = [ + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", + "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", + "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9a", + "9b", "9c"] + +proc toBase64a(s: cstring, len: int): string = + ## encodes `s` into base64 representation. + result = newStringOfCap(((len + 2) div 3) * 4) + result.add '_' + var i = 0 + while i < len - 2: + let a = ord(s[i]) + let b = ord(s[i+1]) + let c = ord(s[i+2]) + result.add cb64[a shr 2] + result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] + result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)] + result.add cb64[c and 0x3F] + inc(i, 3) + if i < len-1: + let a = ord(s[i]) + let b = ord(s[i+1]) + result.add cb64[a shr 2] + result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] + result.add cb64[((b and 0x0F) shl 2)] + elif i < len: + let a = ord(s[i]) + result.add cb64[a shr 2] + result.add cb64[(a and 3) shl 4] + +proc `$`*(u: SigHash): string = + toBase64a(cast[cstring](unsafeAddr u), sizeof(u)) + +proc `==`*(a, b: SigHash): bool = + result = equalMem(unsafeAddr a, unsafeAddr b, sizeof(a)) + +proc hash*(u: SigHash): Hash = + result = 0 + for x in 0..3: + result = (result shl 8) or u.MD5Digest[x].int + proc hash*(x: FileIndex): Hash {.borrow.} when defined(nimfind): @@ -140,6 +189,7 @@ proc newModuleGraph*(cache: IdentCache; config: ConfigRef): ModuleGraph = result.cacheCounters = initTable[string, BiggestInt]() result.cacheTables = initTable[string, BTree[string, PNode]]() result.canonTypes = initTable[SigHash, PType]() + result.symBodyHashes = initTable[int, SigHash]() proc resetAllModules*(g: ModuleGraph) = initStrTable(g.packageSyms) diff --git a/compiler/sighashes.nim b/compiler/sighashes.nim index f88e45dbe4..8e4458942a 100644 --- a/compiler/sighashes.nim +++ b/compiler/sighashes.nim @@ -9,81 +9,26 @@ ## Computes hash values for routine (proc, method etc) signatures. -import ast, md5, tables, ropes +import ast, tables, ropes, md5, modulegraphs from hashes import Hash from astalgo import debug import types from strutils import startsWith, contains -when false: - type - SigHash* = uint32 ## a hash good enough for a filename or a proc signature +proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len) +proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1) +proc `&=`(c: var MD5Context, r: Rope) = + for l in leaves(r): md5Update(c, l, l.len) +proc `&=`(c: var MD5Context, i: BiggestInt) = + md5Update(c, cast[cstring](unsafeAddr i), sizeof(i)) +proc `&=`(c: var MD5Context, f: BiggestFloat) = + md5Update(c, cast[cstring](unsafeAddr f), sizeof(f)) +proc `&=`(c: var MD5Context, s: SigHash) = + md5Update(c, cast[cstring](unsafeAddr s), sizeof(s)) +template lowlevel(v) = + md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v)) - proc sdbmHash(hash: SigHash, c: char): SigHash {.inline.} = - return SigHash(c) + (hash shl 6) + (hash shl 16) - hash - template `&=`*(x: var SigHash, c: char) = x = sdbmHash(x, c) - template `&=`*(x: var SigHash, s: string) = - for c in s: x = sdbmHash(x, c) - -else: - type - SigHash* = distinct Md5Digest - - const - cb64 = [ - "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", - "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", - "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", - "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", - "0", "1", "2", "3", "4", "5", "6", "7", "8", "9a", - "9b", "9c"] - - proc toBase64a(s: cstring, len: int): string = - ## encodes `s` into base64 representation. - result = newStringOfCap(((len + 2) div 3) * 4) - result.add '_' - var i = 0 - while i < len - 2: - let a = ord(s[i]) - let b = ord(s[i+1]) - let c = ord(s[i+2]) - result.add cb64[a shr 2] - result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] - result.add cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)] - result.add cb64[c and 0x3F] - inc(i, 3) - if i < len-1: - let a = ord(s[i]) - let b = ord(s[i+1]) - result.add cb64[a shr 2] - result.add cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] - result.add cb64[((b and 0x0F) shl 2)] - elif i < len: - let a = ord(s[i]) - result.add cb64[a shr 2] - result.add cb64[(a and 3) shl 4] - - proc `$`*(u: SigHash): string = - toBase64a(cast[cstring](unsafeAddr u), sizeof(u)) - proc `&=`(c: var MD5Context, s: string) = md5Update(c, s, s.len) - proc `&=`(c: var MD5Context, ch: char) = md5Update(c, unsafeAddr ch, 1) - proc `&=`(c: var MD5Context, r: Rope) = - for l in leaves(r): md5Update(c, l, l.len) - proc `&=`(c: var MD5Context, i: BiggestInt) = - md5Update(c, cast[cstring](unsafeAddr i), sizeof(i)) - - template lowlevel(v) = - md5Update(c, cast[cstring](unsafeAddr(v)), sizeof(v)) - - proc `==`*(a, b: SigHash): bool = - # {.borrow.} - result = equalMem(unsafeAddr a, unsafeAddr b, sizeof(a)) - - proc hash*(u: SigHash): Hash = - result = 0 - for x in 0..3: - result = (result shl 8) or u.MD5Digest[x].int type ConsiderFlag* = enum CoProc @@ -359,6 +304,75 @@ proc sigHash*(s: PSym): SigHash = else: result = hashNonProc(s) +proc symBodyDigest*(graph: ModuleGraph, sym: PSym): SigHash + +proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) + +proc hashVarSymBody(graph: ModuleGraph, c: var MD5Context, s: PSym) = + assert: s.kind in {skParam, skResult, skVar, skLet, skConst, skForVar} + if sfGlobal notin s.flags: + c &= char(s.kind) + c &= s.name.s + else: + c &= hashNonProc(s) + # this one works for let and const but not for var. True variables can change value + # later on. it is user resposibility to hash his global state if required + if s.ast != nil and s.ast.kind == nkIdentDefs: + hashBodyTree(graph, c, s.ast[^1]) + else: + hashBodyTree(graph, c, s.ast) + +proc hashBodyTree(graph: ModuleGraph, c: var MD5Context, n: PNode) = + # hash Nim tree recursing into simply + if n == nil: + c &= "nil" + return + c &= char(n.kind) + case n.kind + of nkEmpty, nkNilLit, nkType: discard + of nkIdent: + c &= n.ident.s + of nkSym: + if n.sym.kind in skProcKinds: + c &= symBodyDigest(graph, n.sym) + elif n.sym.kind in {skParam, skResult, skVar, skLet, skConst, skForVar}: + hashVarSymBody(graph, c, n.sym) + else: + c &= hashNonProc(n.sym) + of nkProcDef, nkFuncDef, nkTemplateDef, nkMacroDef: + discard # we track usage of proc symbols not their definition + of nkCharLit..nkUInt64Lit: + c &= n.intVal + of nkFloatLit..nkFloat64Lit: + c &= n.floatVal + of nkStrLit..nkTripleStrLit: + c &= n.strVal + else: + for i in 0..`_ which returns the `MD5Digest` of a string diff --git a/lib/system/io.nim b/lib/system/io.nim index e93f602ae2..4497b1b0ba 100644 --- a/lib/system/io.nim +++ b/lib/system/io.nim @@ -612,6 +612,25 @@ proc writeFile*(filename, content: string) {.tags: [WriteIOEffect], benign.} = else: sysFatal(IOError, "cannot open: " & filename) + +proc readLines*(filename: string, n = 1.Natural): seq[TaintedString] = + ## read `n` lines from the file named `filename`. Raises an IO exception + ## in case of an error. Raises EOF if file does not contain at least `n` lines. + ## Available at compile time. A line of text may be delimited by ``LF`` or ``CRLF``. + ## The newline character(s) are not part of the returned strings. + var f: File + if open(f, filename): + try: + result = newSeq[TaintedString](n) + for i in 0 .. n - 1: + if not readLine(f, result[i]): + raiseEOF() + finally: + close(f) + else: + sysFatal(IOError, "cannot open: " & filename) + + iterator lines*(filename: string): TaintedString {.tags: [ReadIOEffect].} = ## Iterates over any line in the file named `filename`. ## diff --git a/tests/macros/tincremental.nim b/tests/macros/tincremental.nim new file mode 100644 index 0000000000..401d6f3f84 --- /dev/null +++ b/tests/macros/tincremental.nim @@ -0,0 +1,150 @@ +discard """ + output: '''heavy_calc_impl is called +sub_calc1_impl is called +sub_calc2_impl is called +** no changes recompute effectively +** change one input and recompute effectively +heavy_calc_impl is called +sub_calc2_impl is called''' +""" + +# sample incremental + +import tables +import macros + +var inputs = initTable[string, float]() +var cache = initTable[string, float]() +var dep_tree {.compileTime.} = initTable[string, string]() + +macro symHash(s: typed{nkSym}): string = + result = newStrLitNode(symBodyHash(s)) + +####################################################################################### + +template graph_node(key: string) {.pragma.} + +proc tag(n: NimNode): NimNode = + ## returns graph node unique name of a function or nil if it is not a graph node + expectKind(n, {nnkProcDef, nnkFuncDef}) + for p in n.pragma: + if p.len > 0 and p[0] == bindSym"graph_node": + return p[1] + return nil + +macro graph_node_key(n: typed{nkSym}): untyped = + result = newStrLitNode(n.symBodyHash) + +macro graph_discovery(n: typed{nkSym}): untyped = + # discovers graph dependency tree and updated dep_tree global var + let mytag = newStrLitNode(n.symBodyHash) + var visited: seq[NimNode] + proc discover(n: NimNode) = + case n.kind: + of nnkNone..pred(nnkSym), succ(nnkSym)..nnkNilLit: discard + of nnkSym: + if n.symKind in {nskFunc, nskProc}: + if n notin visited: + visited.add n + let tag = n.getImpl.tag + if tag != nil: + dep_tree[tag.strVal] = mytag.strVal + else: + discover(n.getImpl.body) + else: + for child in n: + discover(child) + discover(n.getImpl.body) + result = newEmptyNode() + +####################################################################################### + +macro incremental_input(key: static[string], n: untyped{nkFuncDef}): untyped = + # mark leaf nodes of the graph + template getInput(key) {.dirty.} = + {.noSideEffect.}: + inputs[key] + result = n + result.pragma = nnkPragma.newTree(nnkCall.newTree(bindSym"graph_node", newStrLitNode(key))) + result.body = getAst(getInput(key)) + +macro incremental(n: untyped{nkFuncDef}): untyped = + ## incrementalize side effect free computation + ## wraps function into caching layer, mark caching function as a graph_node + ## injects dependency discovery between graph nodes + template cache_func_body(func_name, func_name_str, func_call) {.dirty.} = + {.noSideEffect.}: + graph_discovery(func_name) + let key = graph_node_key(func_name) + if key in cache: + result = cache[key] + else: + echo func_name_str & " is called" + result = func_call + cache[key] = result + + let func_name = n.name.strVal & "_impl" + let func_call = nnkCall.newTree(ident func_name) + for i in 1..