new .rod file implementation; part 1: writing of the file

This commit is contained in:
Araq
2018-02-15 15:42:14 +01:00
parent 89782460a3
commit f3b8d92216
8 changed files with 594 additions and 8 deletions

View File

@@ -16,7 +16,7 @@ import
cgen, jsgen, json, nversion,
platform, nimconf, importer, passaux, depends, vm, vmdef, types, idgen,
docgen2, service, parser, modules, ccgutils, sigmatch, ropes,
modulegraphs, tables
modulegraphs, tables, rod
from magicsys import systemModule, resetSysTypes
@@ -157,6 +157,7 @@ proc mainCommand*(graph: ModuleGraph; cache: IdentCache) =
when SimulateCaasMemReset:
gGlobalOptions.incl(optCaasEnabled)
setupModuleCache()
# In "nim serve" scenario, each command must reset the registered passes
clearPasses()
gLastCmdTime = epochTime()

View File

@@ -11,7 +11,7 @@
import
ast, astalgo, magicsys, std / sha1, rodread, msgs, cgendata, sigmatch, options,
idents, os, lexer, idgen, passes, syntaxes, llstream, modulegraphs
idents, os, lexer, idgen, passes, syntaxes, llstream, modulegraphs, rod
when false:
type
@@ -125,7 +125,7 @@ proc newModule(graph: ModuleGraph; fileIdx: int32): PSym =
# We cannot call ``newSym`` here, because we have to circumvent the ID
# mechanism, which we do in order to assign each module a persistent ID.
new(result)
result.id = - 1 # for better error checking
result.id = -1 # for better error checking
result.kind = skModule
let filename = fileIdx.toFullPath
result.name = getIdent(splitFile(filename).name)
@@ -175,7 +175,7 @@ proc compileModule*(graph: ModuleGraph; fileIdx: int32; cache: IdentCache, flags
internalError("handleSymbolFile should have set the module's ID")
return
else:
result.id = getID()
result.id = getModuleId(toFullPath(fileIdx))
discard processModule(graph, result,
if sfMainModule in flags and gProjectIsStdin: stdin.llStreamOpen else: nil,
rd, cache)

127
compiler/rod.nim Normal file
View File

@@ -0,0 +1,127 @@
#
#
# The Nim Compiler
# (c) Copyright 2017 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements the canonalization for the various caching mechanisms.
import ast, idgen
when not defined(nimSymbolfiles):
template setupModuleCache* = discard
template storeNode*(module: PSym; n: PNode) = discard
template getModuleId*(fullpath: string): int = getID()
else:
include rodimpl
when false:
type
BlobWriter* = object
buf: string
pos: int
SerializationAction = enum acRead, acWrite
# Varint implementation inspired by SQLite.
proc rdVaruint64(z: ptr UncheckedArray[byte]; n: int; pResult: var uint64): int =
if z[0] <= 240:
pResult = z[0]
return 1
if z[0] <= 248:
if n < 2: return 0
pResult = (z[0] - 241) * 256 + z[1] + 240
return 2
if n < z[0]-246: return 0
if z[0] == 249:
pResult = 2288 + 256*z[1] + z[2]
return 3
if z[0] == 250:
pResult = (z[1] shl 16u64) + (z[2] shl 8u64) + z[3]
return 4
let x = (z[1] shl 24) + (z[2] shl 16) + (z[3] shl 8) + z[4]
if z[0] == 251:
pResult = x
return 5
if z[0] == 252:
pResult = (((uint64)x) shl 8) + z[5]
return 6
if z[0] == 253:
pResult = (((uint64)x) shl 16) + (z[5] shl 8) + z[6]
return 7
if z[0] == 254:
pResult = (((uint64)x) shl 24) + (z[5] shl 16) + (z[6] shl 8) + z[7]
return 8
pResult = (((uint64)x) shl 32) +
(0xffffffff & ((z[5] shl 24) + (z[6] shl 16) + (z[7] shl 8) + z[8]))
return 9
proc varintWrite32(z: ptr UncheckedArray[byte]; y: uint32) =
z[0] = uint8(y shr 24)
z[1] = uint8(y shr 16)
z[2] = uint8(y shr 8)
z[3] = uint8(y)
proc sqlite4PutVarint64(z: ptr UncheckedArray[byte], x: uint64): int =
## Write a varint into z. The buffer z must be at least 9 characters
## long to accommodate the largest possible varint. Returns the number of
## bytes used.
if x <= 240:
z[0] = uint8 x
return 1
if x <= 2287:
y = uint32(x - 240)
z[0] = uint8(y shr 8 + 241)
z[1] = uint8(y and 255)
return 2
if x <= 67823:
y = uint32(x - 2288)
z[0] = 249
z[1] = uint8(y shr 8)
z[2] = uint8(y and 255)
return 3
let y = uint32 x
let w = uint32(x shr 32)
if w == 0:
if y <= 16777215:
z[0] = 250
z[1] = uint8(y shr 16)
z[2] = uint8(y shr 8)
z[3] = uint8(y)
return 4
z[0] = 251
varintWrite32(z+1, y)
return 5
if w <= 255:
z[0] = 252
z[1] = uint8 w
varintWrite32(z+2, y)
return 6
if w <= 65535:
z[0] = 253
z[1] = uint8(w shr 8)
z[2] = uint8 w
varintWrite32(z+3, y)
return 7
if w <= 16777215:
z[0] = 254
z[1] = uint8(w shr 16)
z[2] = uint8(w shr 8)
z[3] = uint8 w
varintWrite32(z+4, y)
return 8
z[0] = 255
varintWrite32(z+1, w)
varintWrite32(z+5, y)
return 9
template field(x: BiggestInt; action: SerializationAction) =
when action == acRead:
readBiggestInt(x)
else:
writeBiggestInt()

452
compiler/rodimpl.nim Normal file
View File

@@ -0,0 +1,452 @@
#
#
# The Nim Compiler
# (c) Copyright 2018 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements the canonalization for the various caching mechanisms.
import strutils, os, intsets, ropes, db_sqlite, msgs, options, types,
renderer, rodutils, std / sha1
var db: DbConn
proc getModuleId*(fullpath: string): int =
if gSymbolFiles != v2Sf: return getID()
let module = db.getRow(
sql"select id, fullHash from modules where fullpath = ?", fullpath)
let currentFullhash = $secureHashFile(fullpath)
if module[0].len == 0:
result = int db.insertID(sql"insert into modules(fullpath, interfHash, fullHash) values (?, ?)",
fullpath, "", currentFullhash)
else:
result = parseInt(module[0])
if currentFullhash == module[1]:
# not changed, so use the cached AST (even if it might be wrong
# due to its dependencies):
doAssert(result != 0)
result = -result
else:
db.exec(sql"update modules set fullHash = ? where id = ?", currentFullhash, module[0])
db.exec(sql"delete from types where module = ?", module[0])
db.exec(sql"delete from syms where module = ?", module[0])
db.exec(sql"delete from toplevelstmts where module = ?", module[0])
db.exec(sql"delete from statics where module = ?", module[0])
type
TRodWriter = object
module: PSym
sstack: seq[PSym] # a stack of symbols to process
tstack: seq[PType] # a stack of types to process
tmarks, smarks: IntSet
PRodWriter = var TRodWriter
proc initRodWriter(module: PSym): TRodWriter =
result = TRodWriter(module: module, sstack: @[], tstack: @[],
tmarks: initIntSet(), smarks: initIntSet())
when false:
proc getDefines(): string =
result = ""
for d in definedSymbolNames():
if result.len != 0: add(result, " ")
add(result, d)
proc addInclDep(w: PRodWriter, dep: string; info: TLineInfo) =
let resolved = dep.findModule(info.toFullPath)
encodeVInt(fileIdx(w, resolved), w.inclDeps)
add(w.inclDeps, " ")
encodeStr($secureHashFile(resolved), w.inclDeps)
add(w.inclDeps, rodNL)
const
rodNL = "\L"
proc pushType(w: PRodWriter, t: PType) =
if not containsOrIncl(w.tmarks, t.id):
w.tstack.add(t)
proc pushSym(w: PRodWriter, s: PSym) =
if not containsOrIncl(w.smarks, s.id):
w.sstack.add(s)
proc encodeNode(w: PRodWriter, fInfo: TLineInfo, n: PNode,
result: var string) =
if n == nil:
# nil nodes have to be stored too:
result.add("()")
return
result.add('(')
encodeVInt(ord(n.kind), result)
# we do not write comments for now
# Line information takes easily 20% or more of the filesize! Therefore we
# omit line information if it is the same as the parent's line information:
if fInfo.fileIndex != n.info.fileIndex:
result.add('?')
encodeVInt(n.info.col, result)
result.add(',')
encodeVInt(n.info.line, result)
result.add(',')
encodeVInt(n.info.fileIndex, result)
elif fInfo.line != n.info.line:
result.add('?')
encodeVInt(n.info.col, result)
result.add(',')
encodeVInt(n.info.line, result)
elif fInfo.col != n.info.col:
result.add('?')
encodeVInt(n.info.col, result)
# No need to output the file index, as this is the serialization of one
# file.
let f = n.flags * PersistentNodeFlags
if f != {}:
result.add('$')
encodeVInt(cast[int32](f), result)
if n.typ != nil:
result.add('^')
encodeVInt(n.typ.id, result)
pushType(w, n.typ)
case n.kind
of nkCharLit..nkUInt64Lit:
if n.intVal != 0:
result.add('!')
encodeVBiggestInt(n.intVal, result)
of nkFloatLit..nkFloat64Lit:
if n.floatVal != 0.0:
result.add('!')
encodeStr($n.floatVal, result)
of nkStrLit..nkTripleStrLit:
if n.strVal != "":
result.add('!')
encodeStr(n.strVal, result)
of nkIdent:
result.add('!')
encodeStr(n.ident.s, result)
of nkSym:
result.add('!')
encodeVInt(n.sym.id, result)
pushSym(w, n.sym)
else:
for i in countup(0, sonsLen(n) - 1):
encodeNode(w, n.info, n.sons[i], result)
add(result, ')')
proc encodeLoc(w: PRodWriter, loc: TLoc, result: var string) =
var oldLen = result.len
result.add('<')
if loc.k != low(loc.k): encodeVInt(ord(loc.k), result)
if loc.storage != low(loc.storage):
add(result, '*')
encodeVInt(ord(loc.storage), result)
if loc.flags != {}:
add(result, '$')
encodeVInt(cast[int32](loc.flags), result)
if loc.lode != nil:
add(result, '^')
encodeNode(w, unknownLineInfo(), loc.lode, result)
#encodeVInt(cast[int32](loc.t.id), result)
#pushType(w, loc.t)
if loc.r != nil:
add(result, '!')
encodeStr($loc.r, result)
if oldLen + 1 == result.len:
# no data was necessary, so remove the '<' again:
setLen(result, oldLen)
else:
add(result, '>')
proc encodeType(w: PRodWriter, t: PType, result: var string) =
if t == nil:
# nil nodes have to be stored too:
result.add("[]")
return
# we need no surrounding [] here because the type is in a line of its own
if t.kind == tyForward: internalError("encodeType: tyForward")
# for the new rodfile viewer we use a preceding [ so that the data section
# can easily be disambiguated:
add(result, '[')
encodeVInt(ord(t.kind), result)
add(result, '+')
encodeVInt(t.id, result)
if t.n != nil:
encodeNode(w, w.module.info, t.n, result)
if t.flags != {}:
add(result, '$')
encodeVInt(cast[int32](t.flags), result)
if t.callConv != low(t.callConv):
add(result, '?')
encodeVInt(ord(t.callConv), result)
if t.owner != nil:
add(result, '*')
encodeVInt(t.owner.id, result)
pushSym(w, t.owner)
if t.sym != nil:
add(result, '&')
encodeVInt(t.sym.id, result)
pushSym(w, t.sym)
if t.size != - 1:
add(result, '/')
encodeVBiggestInt(t.size, result)
if t.align != 2:
add(result, '=')
encodeVInt(t.align, result)
if t.lockLevel.ord != UnspecifiedLockLevel.ord:
add(result, '\14')
encodeVInt(t.lockLevel.int16, result)
if t.destructor != nil and t.destructor.id != 0:
add(result, '\15')
encodeVInt(t.destructor.id, result)
pushSym(w, t.destructor)
if t.deepCopy != nil:
add(result, '\16')
encodeVInt(t.deepcopy.id, result)
pushSym(w, t.deepcopy)
if t.assignment != nil:
add(result, '\17')
encodeVInt(t.assignment.id, result)
pushSym(w, t.assignment)
if t.sink != nil:
add(result, '\18')
encodeVInt(t.sink.id, result)
pushSym(w, t.sink)
for i, s in items(t.methods):
add(result, '\19')
encodeVInt(i, result)
add(result, '\20')
encodeVInt(s.id, result)
pushSym(w, s)
encodeLoc(w, t.loc, result)
for i in countup(0, sonsLen(t) - 1):
if t.sons[i] == nil:
add(result, "^()")
else:
add(result, '^')
encodeVInt(t.sons[i].id, result)
pushType(w, t.sons[i])
proc encodeLib(w: PRodWriter, lib: PLib, info: TLineInfo, result: var string) =
add(result, '|')
encodeVInt(ord(lib.kind), result)
add(result, '|')
encodeStr($lib.name, result)
add(result, '|')
encodeNode(w, info, lib.path, result)
proc encodeInstantiations(w: PRodWriter; s: seq[PInstantiation];
result: var string) =
for t in s:
result.add('\15')
encodeVInt(t.sym.id, result)
pushSym(w, t.sym)
for tt in t.concreteTypes:
result.add('\17')
encodeVInt(tt.id, result)
pushType(w, tt)
result.add('\20')
encodeVInt(t.compilesId, result)
proc encodeSym(w: PRodWriter, s: PSym, result: var string) =
if s == nil:
# nil nodes have to be stored too:
result.add("{}")
return
# we need no surrounding {} here because the symbol is in a line of its own
encodeVInt(ord(s.kind), result)
result.add('+')
encodeVInt(s.id, result)
result.add('&')
encodeStr(s.name.s, result)
if s.typ != nil:
result.add('^')
encodeVInt(s.typ.id, result)
pushType(w, s.typ)
result.add('?')
if s.info.col != -1'i16: encodeVInt(s.info.col, result)
result.add(',')
if s.info.line != -1'i16: encodeVInt(s.info.line, result)
result.add(',')
encodeVInt(s.info.fileIndex, result)
if s.owner != nil:
result.add('*')
encodeVInt(s.owner.id, result)
pushSym(w, s.owner)
if s.flags != {}:
result.add('$')
encodeVInt(cast[int32](s.flags), result)
if s.magic != mNone:
result.add('@')
encodeVInt(ord(s.magic), result)
result.add('!')
encodeVInt(cast[int32](s.options), result)
if s.position != 0:
result.add('%')
encodeVInt(s.position, result)
if s.offset != - 1:
result.add('`')
encodeVInt(s.offset, result)
encodeLoc(w, s.loc, result)
if s.annex != nil: encodeLib(w, s.annex, s.info, result)
if s.constraint != nil:
add(result, '#')
encodeNode(w, unknownLineInfo(), s.constraint, result)
case s.kind
of skType, skGenericParam:
for t in s.typeInstCache:
result.add('\14')
encodeVInt(t.id, result)
pushType(w, t)
of routineKinds:
encodeInstantiations(w, s.procInstCache, result)
if s.gcUnsafetyReason != nil:
result.add('\16')
encodeVInt(s.gcUnsafetyReason.id, result)
pushSym(w, s.gcUnsafetyReason)
of skModule, skPackage:
encodeInstantiations(w, s.usedGenerics, result)
# we don't serialize:
#tab*: TStrTable # interface table for modules
of skLet, skVar, skField, skForVar:
if s.guard != nil:
result.add('\18')
encodeVInt(s.guard.id, result)
pushSym(w, s.guard)
if s.bitsize != 0:
result.add('\19')
encodeVInt(s.bitsize, result)
else: discard
# lazy loading will soon reload the ast lazily, so the ast needs to be
# the last entry of a symbol:
if s.ast != nil:
# we used to attempt to save space here by only storing a dummy AST if
# it is not necessary, but Nim's heavy compile-time evaluation features
# make that unfeasible nowadays:
encodeNode(w, s.info, s.ast, result)
proc storeSym(w: PRodWriter; s: PSym) =
var buf = newStringOfCap(160)
encodeSym(w, s, buf)
# XXX only store the name for exported symbols in order to speed up lookup
# times once we enable the skStub logic.
db.exec(sql"insert into syms(nimid, module, name, data) values (?, ?, ?, ?)",
s.id, abs(w.module.id), s.name.s, buf)
proc storeType(w: PRodWriter; t: PType) =
var buf = newStringOfCap(160)
encodeType(w, t, buf)
db.exec(sql"insert into types(nimid, module, data) values (?, ?, ?)",
t.id, abs(w.module.id), buf)
var w = initRodWriter(nil)
proc storeNode*(module: PSym; n: PNode) =
if gSymbolFiles != v2Sf: return
w.module = module
var buf = newStringOfCap(160)
encodeNode(w, module.info, n, buf)
db.exec(sql"insert into toplevelstmts(module, data) values (?, ?)",
abs(module.id), buf)
var i = 0
while true:
if i > 10_000:
quit "loop never ends!"
if w.sstack.len > 0:
let s = w.sstack.pop()
when false:
echo "popped ", s.name.s, " ", s.id
storeSym(w, s)
elif w.tstack.len > 0:
let t = w.tstack.pop()
storeType(w, t)
when false:
echo "popped type ", typeToString(t), " ", t.id
else:
break
inc i
proc createDb() =
db.exec(sql"""
create table if not exists controlblock(
idgen integer not null
);
""")
db.exec(sql"""
create table if not exists modules(
id integer primary key,
fullpath varchar(8000) not null,
interfHash varchar(256) not null,
fullHash varchar(256) not null,
created timestamp not null default (DATETIME('now'))
);""")
db.exec(sql"""create unique index if not exists SymNameIx on modules(fullpath);""")
db.exec(sql"""
create table if not exists types(
id integer primary key,
nimid integer not null,
module integer not null,
data blob not null,
foreign key (module) references module(id)
);
""")
db.exec sql"create index TypeByModuleIdx on types(module);"
db.exec sql"create index TypeByNimIdIdx on types(nimid);"
db.exec(sql"""
create table if not exists syms(
id integer primary key,
nimid integer not null,
module integer not null,
name varchar(256) not null,
data blob not null,
foreign key (module) references module(id)
);
""")
db.exec sql"create index if not exists SymNameIx on syms(name);"
db.exec sql"create index SymByNameAndModuleIdx on syms(name, module);"
db.exec sql"create index SymByModuleIdx on syms(module);"
db.exec sql"create index SymByNimIdIdx on syms(nimid);"
db.exec(sql"""
create table if not exists toplevelstmts(
id integer primary key,
module integer not null,
data blob not null,
foreign key (module) references module(id)
);
""")
db.exec sql"create index TopLevelStmtByModuleIdx on toplevelstmts(module);"
db.exec(sql"""
create table if not exists statics(
id integer primary key,
module integer not null,
data blob not null,
foreign key (module) references module(id)
);
""")
db.exec sql"create index StaticsByModuleIdx on toplevelstmts(module);"
db.exec sql"insert into controlblock(idgen) values (0)"
proc setupModuleCache* =
if gSymbolFiles != v2Sf: return
let dbfile = getNimcacheDir() / "rodfiles.db"
if not fileExists(dbfile):
db = open(connection=dbfile, user="nim", password="",
database="nim")
createDb()
else:
db = open(connection=dbfile, user="nim", password="",
database="nim")
db.exec(sql"pragma journal_mode=off")
db.exec(sql"pragma SYNCHRONOUS=off")
db.exec(sql"pragma LOCKING_MODE=exclusive")
idgen.setId(parseInt db.getValue(
sql"select max(idgen) from controlblock"))

View File

@@ -813,6 +813,7 @@ proc rrGetSym(r: PRodReader, id: int, info: TLineInfo): PSym =
encodeVInt(id, x)
internalError(info, "missing from both indexes: +" & x)
var rd = getReader(moduleID)
doAssert rd != nil
d = iiTableGet(rd.index.tab, id)
if d != InvalidKey:
result = decodeSymSafePos(rd, d, info)
@@ -911,7 +912,7 @@ proc checkDep(fileIdx: int32; cache: IdentCache): TReasonForRecompile =
proc handleSymbolFile*(module: PSym; cache: IdentCache): PRodReader =
let fileIdx = module.fileIdx
if gSymbolFiles in {disabledSf, writeOnlySf}:
if gSymbolFiles in {disabledSf, writeOnlySf, v2Sf}:
module.id = getID()
return nil
idgen.loadMaxIds(options.gProjectPath / options.gProjectName)
@@ -1236,4 +1237,4 @@ proc viewFile(rodfile: string) =
outf.close
when isMainModule:
viewFile(paramStr(1).addFileExt(rodExt))
viewFile(paramStr(1).addFileExt(RodExt))

View File

@@ -67,6 +67,8 @@ proc decodeStr*(s: cstring, pos: var int): string =
const
chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
{.push overflowChecks: off.}
# since negative numbers require a leading '-' they use up 1 byte. Thus we
# subtract/add `vintDelta` here to save space for small negative numbers
# which are common in ROD files:
@@ -131,6 +133,8 @@ proc decodeVInt*(s: cstring, pos: var int): int =
proc decodeVBiggestInt*(s: cstring, pos: var int): BiggestInt =
decodeIntImpl()
{.pop.}
iterator decodeVIntArray*(s: cstring): int =
var i = 0
while s[i] != '\0':

View File

@@ -16,7 +16,7 @@ import
procfind, lookups, rodread, pragmas, passes, semdata, semtypinst, sigmatch,
intsets, transf, vmdef, vm, idgen, aliases, cgmeth, lambdalifting,
evaltempl, patterns, parampatterns, sempass2, nimfix.pretty, semmacrosanity,
semparallel, lowerings, pluginsupport, plugins.active
semparallel, lowerings, pluginsupport, plugins.active, rod
from modulegraphs import ModuleGraph
@@ -589,6 +589,7 @@ proc myProcess(context: PPassContext, n: PNode): PNode =
else:
result = ast.emptyNode
#if gCmd == cmdIdeTools: findSuggest(c, n)
rod.storeNode(c.module, result)
proc testExamples(c: PContext) =
let inp = toFullPath(c.module.info)

View File

@@ -567,7 +567,7 @@ proc typeToString(typ: PType, prefer: TPreferedDesc = preferName): string =
add(result, typeToString(t.sons[i]))
if i < sonsLen(t) - 1: add(result, ", ")
add(result, ')')
if t.sons[0] != nil: add(result, ": " & typeToString(t.sons[0]))
if t.len > 0 and t.sons[0] != nil: add(result, ": " & typeToString(t.sons[0]))
var prag = if t.callConv == ccDefault: "" else: CallingConvToStr[t.callConv]
if tfNoSideEffect in t.flags:
addSep(prag)