NIR: An immediate representation for Nim. WIP

This commit is contained in:
araq
2023-09-30 21:02:01 +02:00
parent 8f5b90f886
commit 9510e1a55c
6 changed files with 636 additions and 0 deletions

View File

@@ -13,6 +13,8 @@ type
vals: seq[T] # indexed by LitId
keys: seq[LitId] # indexed by hash(val)
proc initBiTable*[T](): BiTable[T] = BiTable[T](vals: @[], keys: @[])
proc nextTry(h, maxHash: Hash): Hash {.inline.} =
result = (h + 1) and maxHash

78
compiler/nir/cir.nim Normal file
View File

@@ -0,0 +1,78 @@
#
#
# The Nim Compiler
# (c) Copyright 2023 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# We produce C code as a list of tokens.
import std / assertions
import .. / ic / bitabs
type
Token = LitId # indexing into the tokens BiTable[string]
PredefinedToken = enum
IgnoreMe = "<unused>"
EmptyToken = ""
DeclPrefix = "" # the next token is the name of a definition
CurlyLe = "{"
CurlyRi = "}"
ParLe = "("
ParRi = ")"
BracketLe = "["
BracketRi = "]"
NewLine = "\n"
Semicolon = ";"
Comma = ", "
Space = " "
Colon = ":"
Dot = "."
Arrow = "->"
Star = "*"
Amp = "&"
AsgnOpr = " = "
ScopeOpr = "::"
ConstKeyword = "const "
StaticKeyword = "static "
NimString = "NimString"
StrLitPrefix = "(NimChar*)"
StrLitNamePrefix = "Qstr"
LoopKeyword = "while (true) "
WhileKeyword = "while ("
IfKeyword = "if ("
ElseKeyword = "else "
SwitchKeyword = "switch ("
CaseKeyword = "case "
DefaultKeyword = "default:"
BreakKeyword = "break"
NullPtr = "nullptr"
IfNot = "if (!("
ReturnKeyword = "return "
const
ModulePrefix = Token(int(ReturnKeyword)+1)
proc fillTokenTable(tab: var BiTable[string]) =
for e in EmptyToken..high(PredefinedToken):
let id = tab.getOrIncl $e
assert id == LitId(e)
type
GeneratedCode* = object
code: seq[LitId]
tokens: BiTable[string]
proc initGeneratedCode*(): GeneratedCode =
result = GeneratedCode(code: @[], tokens: initBiTable[string]())
fillTokenTable(result.tokens)
proc add*(g: var GeneratedCode; t: PredefinedToken) {.inline.} =
g.code.add Token(t)
proc add*(g: var GeneratedCode; s: string) {.inline.} =
g.code.add g.tokens.getOrIncl(s)

22
compiler/nir/nir.nim Normal file
View File

@@ -0,0 +1,22 @@
#
#
# The Nim Compiler
# (c) Copyright 2023 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Nim Intermediate Representation, designed to capture all of Nim's semantics without losing too much
## precious information. Can easily be translated into C. And to JavaScript, hopefully.
import nirtypes, nirinsts
type
Module* = object
types: TypeGraph
data: seq[Tree]
init: seq[Tree]
procs: seq[Tree]

148
compiler/nir/nirinsts.nim Normal file
View File

@@ -0,0 +1,148 @@
#
#
# The Nim Compiler
# (c) Copyright 2023 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## NIR instructions. Somewhat inspired by LLVM's instructions.
import std / assertions
import .. / ic / bitabs
import nirlineinfos
type
SymId* = distinct int
InstKind* = enum
Nop,
ImmediateVal,
IntVal,
StrVal,
SymDef,
SymUse,
ModuleId, # module ID
ModuleSymUse, # `module.x`
Label,
Goto,
GotoBack,
Typed, # with type ID
NilVal, # last atom
ArrayConstr,
ObjConstr,
Ret,
Yld,
Select,
SummonGlobal,
SummonThreadLocal,
Summon, # x = Summon Typed <Type ID>; x begins to live
Kill, # `Kill x`: scope end for `x`
Load,
Store,
ArrayAt, # addr(a[i])
FieldAt, # addr(obj.field)
Call,
IndirectCall,
CheckedCall, # call that can raise
CheckedIndirectCall, # call that can raise
CheckedAdd, # with overflow checking etc.
CheckedSub,
CheckedMul,
CheckedDiv,
CheckedMod,
Add,
Sub,
Mul,
Div,
Mod,
BitShl,
BitShr,
BitAnd,
BitOr,
BitXor,
BitNot,
Eq,
Le,
Lt,
Emit,
ProcDecl
const
LastAtomicValue = NilVal
InstKindBits = 8'u32
InstKindMask = (1'u32 shl InstKindBits) - 1'u32
type
Instr* = object # 8 bytes
x: uint32
info: PackedLineInfo
template kind*(n: Instr): InstKind = InstKind(n.x and InstKindMask)
template operand(n: Instr): uint32 = (n.x shr InstKindBits)
template toX(k: InstKind; operand: uint32): uint32 =
uint32(k) or (operand shl InstKindBits)
template toX(k: InstKind; operand: LitId): uint32 =
uint32(k) or (operand.uint32 shl InstKindBits)
type
Tree* = object
nodes: seq[Instr]
Values* = object
numbers: BiTable[int64]
strings: BiTable[string]
type
PatchPos* = distinct int
NodePos* = distinct int
const
InvalidPatchPos* = PatchPos(-1)
proc isValid(p: PatchPos): bool {.inline.} = p.int != -1
proc prepare(tree: var Tree; kind: InstKind): PatchPos =
result = PatchPos tree.nodes.len
tree.nodes.add Instr(x: toX(kind, 1'u32))
proc isAtom(tree: Tree; pos: int): bool {.inline.} = tree.nodes[pos].kind <= LastAtomicValue
proc isAtom(tree: Tree; pos: NodePos): bool {.inline.} = tree.nodes[pos.int].kind <= LastAtomicValue
proc patch(tree: var Tree; pos: PatchPos) =
let pos = pos.int
let k = tree.nodes[pos].kind
assert k > LastAtomicValue
let distance = int32(tree.nodes.len - pos)
assert distance > 0
tree.nodes[pos].x = toX(k, cast[uint32](distance))
proc len*(tree: Tree): int {.inline.} = tree.nodes.len
template rawSpan(n: Instr): int = int(operand(n))
proc nextChild(tree: Tree; pos: var int) {.inline.} =
if tree.nodes[pos].kind > LastAtomicValue:
assert tree.nodes[pos].operand > 0'u32
inc pos, tree.nodes[pos].rawSpan
else:
inc pos
iterator sons*(tree: Tree; n: NodePos): NodePos =
var pos = n.int
assert tree.nodes[pos].kind > LastAtomicValue
let last = pos + tree.nodes[pos].rawSpan
inc pos
while pos < last:
yield NodePos pos
nextChild tree, pos
template `[]`*(t: Tree; n: NodePos): Instr = t.nodes[n.int]

View File

@@ -0,0 +1,78 @@
#
#
# The Nim Compiler
# (c) Copyright 2023 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# For the line information we use 32 bits. They are used as follows:
# Bit 0 (AsideBit): If we have inline line information or not. If not, the
# remaining 31 bits are used as an index into a seq[(LitId, int, int)].
#
# We use 10 bits for the "file ID", this means a program can consist of as much
# as 1024 different files. (If it uses more files than that, the overflow bit
# would be set.)
# This means we have 21 bits left to encode the (line, col) pair. We use 7 bits for the column
# so 128 is the limit and 14 bits for the line number.
# The packed representation supports files with up to 16384 lines.
# Keep in mind that whenever any limit is reached the AsideBit is set and the real line
# information is kept in a side channel.
import std / assertions
const
AsideBit = 1
FileBits = 10
LineBits = 14
ColBits = 7
FileMax = (1 shl FileBits) - 1
LineMax = (1 shl LineBits) - 1
ColMax = (1 shl ColBits) - 1
static:
assert AsideBit + FileBits + LineBits + ColBits == 32
import .. / ic / bitabs # for LitId
type
PackedLineInfo* = distinct uint32
LineInfoManager* = object
aside*: seq[(LitId, int32, int32)]
proc pack*(m: var LineInfoManager; file: LitId; line, col: int32): PackedLineInfo =
if file.uint32 <= FileMax.uint32 and line <= LineMax and col <= ColMax:
let col = if col < 0'i32: 0'u32 else: col.uint32
let line = if line < 0'i32: 0'u32 else: line.uint32
# use inline representation:
result = PackedLineInfo((file.uint32 shl 1'u32) or (line shl uint32(AsideBit + FileBits)) or
(col shl uint32(AsideBit + FileBits + LineBits)))
else:
result = PackedLineInfo((m.aside.len shl 1) or AsideBit)
m.aside.add (file, line, col)
proc unpack*(m: LineInfoManager; i: PackedLineInfo): (LitId, int32, int32) =
let i = i.uint32
if (i and 1'u32) == 0'u32:
# inline representation:
result = (LitId((i shr 1'u32) and FileMax.uint32),
int32((i shr uint32(AsideBit + FileBits)) and LineMax.uint32),
int32((i shr uint32(AsideBit + FileBits + LineBits)) and ColMax.uint32))
else:
result = m.aside[int(i shr 1'u32)]
proc getFileId*(m: LineInfoManager; i: PackedLineInfo): LitId =
result = unpack(m, i)[0]
when isMainModule:
var m = LineInfoManager(aside: @[])
for i in 0'i32..<16388'i32:
for col in 0'i32..<100'i32:
let packed = pack(m, LitId(1023), i, col)
let u = unpack(m, packed)
assert u[0] == LitId(1023)
assert u[1] == i
assert u[2] == col
echo m.aside.len

308
compiler/nir/nirtypes.nim Normal file
View File

@@ -0,0 +1,308 @@
#
#
# The Nim Compiler
# (c) Copyright 2023 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Type system for NIR. Close to C's type system but without its quirks.
import std / assertions
import .. / ic / bitabs
type
NirTypeKind* = enum
VoidTy, IntTy, UIntTy, FloatTy, BoolTy, CharTy, NameVal, IntVal,
VarargsTy, # the `...` in a C prototype; also the last "atom"
APtrTy, # pointer to aliasable memory
UPtrTy, # pointer to unique/unaliasable memory
AArrayPtrTy, # pointer to array of aliasable memory
UArrayPtrTy, # pointer to array of unique/unaliasable memory
ArrayTy,
LastArrayTy, # array of unspecified size as a last field inside an object
ObjectTy,
UnionTy,
ProcTy,
ObjectDecl,
UnionDecl,
FieldDecl
const
TypeKindBits = 8'u32
TypeKindMask = (1'u32 shl TypeKindBits) - 1'u32
type
TypeNode* = object # 4 bytes
x: uint32
template kind*(n: TypeNode): NirTypeKind = NirTypeKind(n.x and TypeKindMask)
template operand(n: TypeNode): uint32 = (n.x shr TypeKindBits)
template toX(k: NirTypeKind; operand: uint32): uint32 =
uint32(k) or (operand shl TypeKindBits)
template toX(k: NirTypeKind; operand: LitId): uint32 =
uint32(k) or (operand.uint32 shl TypeKindBits)
type
TypeId* = distinct int
TypeGraph* = object
nodes: seq[TypeNode]
names: BiTable[string]
numbers: BiTable[uint64]
const
VoidId* = TypeId 0
Bool8Id* = TypeId 1
Char8Id* = TypeId 2
Int8Id* = TypeId 3
Int16Id* = TypeId 4
Int32Id* = TypeId 5
Int64Id* = TypeId 6
UInt8Id* = TypeId 7
UInt16Id* = TypeId 8
UInt32Id* = TypeId 9
UInt64Id* = TypeId 10
Float32Id* = TypeId 11
Float64Id* = TypeId 12
LastBuiltinId* = 12
proc initTypeGraph*(): TypeGraph =
result = TypeGraph(nodes: @[
TypeNode(x: toX(VoidTy, 0'u32)),
TypeNode(x: toX(BoolTy, 8'u32)),
TypeNode(x: toX(CharTy, 8'u32)),
TypeNode(x: toX(IntTy, 8'u32)),
TypeNode(x: toX(IntTy, 16'u32)),
TypeNode(x: toX(IntTy, 32'u32)),
TypeNode(x: toX(IntTy, 64'u32)),
TypeNode(x: toX(UIntTy, 8'u32)),
TypeNode(x: toX(UIntTy, 16'u32)),
TypeNode(x: toX(UIntTy, 32'u32)),
TypeNode(x: toX(UIntTy, 64'u32)),
TypeNode(x: toX(FloatTy, 32'u32)),
TypeNode(x: toX(FloatTy, 64'u32))
])
assert result.nodes.len == LastBuiltinId+1
type
TypePatchPos* = distinct int
const
InvalidTypePatchPos* = TypePatchPos(-1)
LastAtomicValue = VarargsTy
proc isValid(p: TypePatchPos): bool {.inline.} = p.int != -1
proc prepare(tree: var TypeGraph; kind: NirTypeKind): TypePatchPos =
result = TypePatchPos tree.nodes.len
tree.nodes.add TypeNode(x: toX(kind, 1'u32))
proc isAtom(tree: TypeGraph; pos: int): bool {.inline.} = tree.nodes[pos].kind <= LastAtomicValue
proc isAtom(tree: TypeGraph; pos: TypeId): bool {.inline.} = tree.nodes[pos.int].kind <= LastAtomicValue
proc patch(tree: var TypeGraph; pos: TypePatchPos) =
let pos = pos.int
let k = tree.nodes[pos].kind
assert k > LastAtomicValue
let distance = int32(tree.nodes.len - pos)
assert distance > 0
tree.nodes[pos].x = toX(k, cast[uint32](distance))
proc len*(tree: TypeGraph): int {.inline.} = tree.nodes.len
template rawSpan(n: TypeNode): int = int(operand(n))
proc nextChild(tree: TypeGraph; pos: var int) {.inline.} =
if tree.nodes[pos].kind > LastAtomicValue:
assert tree.nodes[pos].operand > 0'u32
inc pos, tree.nodes[pos].rawSpan
else:
inc pos
iterator sons*(tree: TypeGraph; n: TypeId): TypeId =
var pos = n.int
assert tree.nodes[pos].kind > LastAtomicValue
let last = pos + tree.nodes[pos].rawSpan
inc pos
while pos < last:
yield TypeId pos
nextChild tree, pos
template `[]`*(t: TypeGraph; n: TypeId): TypeNode = t.nodes[n.int]
proc elementType*(tree: TypeGraph; n: TypeId): TypeId {.inline.} =
assert tree[n].kind in {APtrTy, UPtrTy, AArrayPtrTy, UArrayPtrTy, ArrayTy, LastArrayTy}
result = TypeId(n.int+1)
proc kind*(tree: TypeGraph; n: TypeId): NirTypeKind {.inline.} = tree[n].kind
proc span(tree: TypeGraph; pos: int): int {.inline.} =
if tree.nodes[pos].kind <= LastAtomicValue: 1 else: int(tree.nodes[pos].operand)
proc sons2(tree: TypeGraph; n: TypeId): (TypeId, TypeId) =
assert(not isAtom(tree, n.int))
let a = n.int+1
let b = a + span(tree, a)
result = (TypeId a, TypeId b)
proc sons3(tree: TypeGraph; n: TypeId): (TypeId, TypeId, TypeId) =
assert(not isAtom(tree, n.int))
let a = n.int+1
let b = a + span(tree, a)
let c = b + span(tree, b)
result = (TypeId a, TypeId b, TypeId c)
proc arrayLen*(tree: TypeGraph; n: TypeId): BiggestUInt =
assert tree[n].kind == ArrayTy
result = tree.numbers[LitId tree[n].operand]
proc openType*(tree: var TypeGraph; kind: NirTypeKind): TypePatchPos =
assert kind in {APtrTy, UPtrTy, AArrayPtrTy, UArrayPtrTy,
ArrayTy, LastArrayTy, ProcTy, ObjectDecl, UnionDecl,
FieldDecl}
result = prepare(tree, kind)
proc sealType*(tree: var TypeGraph; p: TypePatchPos): TypeId =
# TODO: Search for an existing instance of this type in
# order to reduce memory consumption.
result = TypeId(p)
patch tree, p
proc nominalType*(tree: var TypeGraph; kind: NirTypeKind; name: string): TypeId =
assert kind in {ObjectTy, UnionTy}
result = TypeId tree.nodes.len
tree.nodes.add TypeNode(x: toX(kind, tree.names.getOrIncl(name)))
proc addBuiltinType*(g: var TypeGraph; id: TypeId) =
g.nodes.add g[id]
proc addType*(g: var TypeGraph; t: TypeId) =
let pos = t.int
let L = span(g, pos)
let d = g.nodes.len
g.nodes.setLen(d + L)
assert L > 0
for i in 0..<L:
g.nodes[d+i] = g.nodes[pos+i]
proc addArrayLen*(g: var TypeGraph; len: uint64) =
g.nodes.add TypeNode(x: toX(IntVal, g.numbers.getOrIncl(len)))
proc addName*(g: var TypeGraph; name: string) =
g.nodes.add TypeNode(x: toX(NameVal, g.names.getOrIncl(name)))
proc addField*(g: var TypeGraph; name: string; typ: TypeId) =
let f = g.openType FieldDecl
g.addType typ
g.addName name
discard sealType(g, f)
proc toString*(dest: var string; g: TypeGraph; i: TypeId) =
case g[i].kind
of VoidTy: dest.add "void"
of IntTy:
dest.add "i"
dest.addInt g[i].operand
of UIntTy:
dest.add "u"
dest.addInt g[i].operand
of FloatTy:
dest.add "f"
dest.addInt g[i].operand
of BoolTy:
dest.add "b"
dest.addInt g[i].operand
of CharTy:
dest.add "c"
dest.addInt g[i].operand
of NameVal:
dest.add g.names[LitId g[i].operand]
of IntVal:
dest.add $g.numbers[LitId g[i].operand]
of VarargsTy:
dest.add "..."
of APtrTy:
dest.add "aptr["
toString(dest, g, g.elementType(i))
dest.add "]"
of UPtrTy:
dest.add "uptr["
toString(dest, g, g.elementType(i))
dest.add "]"
of AArrayPtrTy:
dest.add "aArrayPtr["
toString(dest, g, g.elementType(i))
dest.add "]"
of UArrayPtrTy:
dest.add "uArrayPtr["
toString(dest, g, g.elementType(i))
dest.add "]"
of ArrayTy:
dest.add "Array["
let (elems, len) = g.sons2(i)
toString(dest, g, elems)
dest.add ", "
toString(dest, g, len)
dest.add "]"
of LastArrayTy:
# array of unspecified size as a last field inside an object
dest.add "LastArrayTy["
toString(dest, g, g.elementType(i))
dest.add "]"
of ObjectTy:
dest.add "object "
dest.add g.names[LitId g[i].operand]
of UnionTy:
dest.add "union "
dest.add g.names[LitId g[i].operand]
of ProcTy:
dest.add "proc["
for t in sons(g, i): toString(dest, g, t)
dest.add "]"
of ObjectDecl:
dest.add "object["
for t in sons(g, i):
toString(dest, g, t)
dest.add '\n'
dest.add "]"
of UnionDecl:
dest.add "union["
for t in sons(g, i):
toString(dest, g, t)
dest.add '\n'
dest.add "]"
of FieldDecl:
let (typ, name) = g.sons2(i)
toString(dest, g, typ)
dest.add ' '
toString(dest, g, name)
proc toString*(dest: var string; g: TypeGraph) =
var i = 0
while i < g.len:
toString(dest, g, TypeId i)
dest.add '\n'
nextChild g, i
proc `$`(g: TypeGraph): string =
result = ""
toString(result, g)
when isMainModule:
var g = initTypeGraph()
let a = g.openType ArrayTy
g.addBuiltinType Int8Id
g.addArrayLen 5'u64
let finalArrayType = sealType(g, a)
let obj = g.openType ObjectDecl
g.nodes.add TypeNode(x: toX(NameVal, g.names.getOrIncl("MyType")))
g.addField "p", finalArrayType
discard sealType(g, obj)
echo g