move PNode.comment to a side channel, reducing memory usage during compilation by a factor 1.25x (#18760)

* move PNode.comment so a side channel, reducing memory usage

* fix a bug

* fixup

* use sfHasComment to speedup comment lookups

* fix for IC

* Update compiler/parser.nim

Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
This commit is contained in:
Timothee Cour
2021-08-29 06:54:07 -07:00
committed by GitHub
parent 0ddd9519c0
commit fa7c1aa864
3 changed files with 67 additions and 27 deletions

View File

@@ -10,7 +10,7 @@
# abstract syntax tree + symbol table
import
lineinfos, hashes, options, ropes, idents, int128
lineinfos, hashes, options, ropes, idents, int128, tables
from strutils import toLowerAscii
export int128
@@ -497,6 +497,7 @@ type
nfExecuteOnReload # A top-level statement that will be executed during reloads
nfLastRead # this node is a last read
nfFirstWrite# this node is a first write
nfHasComment # node has a comment
TNodeFlags* = set[TNodeFlag]
TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 43)
@@ -774,7 +775,6 @@ type
ident*: PIdent
else:
sons*: TNodeSeq
comment*: string
TStrTable* = object # a table[PIdent] of PSym
counter*: int
@@ -991,6 +991,38 @@ type
TImplication* = enum
impUnknown, impNo, impYes
template nodeId(n: PNode): int = cast[int](n)
type Gconfig = object
# we put comments in a side channel to avoid increasing `sizeof(TNode)`, which
# reduces memory usage given that `PNode` is the most allocated type by far.
comments: Table[int, string] # nodeId => comment
useIc*: bool
var gconfig {.threadvar.}: Gconfig
proc setUseIc*(useIc: bool) = gconfig.useIc = useIc
proc comment*(n: PNode): string =
if nfHasComment in n.flags and not gconfig.useIc:
# IC doesn't track comments, see `packed_ast`, so this could fail
result = gconfig.comments[n.nodeId]
proc `comment=`*(n: PNode, a: string) =
let id = n.nodeId
if a.len > 0:
# if needed, we could periodically cleanup gconfig.comments when its size increases,
# to ensure only live nodes (and with nfHasComment) have an entry in gconfig.comments;
# for compiling compiler, the waste is very small:
# num calls to newNodeImpl: 14984160 (num of PNode allocations)
# size of gconfig.comments: 33585
# num of nodes with comments that were deleted and hence wasted: 3081
n.flags.incl nfHasComment
gconfig.comments[id] = a
elif nfHasComment in n.flags:
n.flags.excl nfHasComment
gconfig.comments.del(id)
# BUGFIX: a module is overloadable so that a proc can have the
# same name as an imported module. This is necessary because of
# the poor naming choices in the standard library.
@@ -1207,9 +1239,17 @@ when defined(useNodeIds):
const nodeIdToDebug* = -1 # 2322968
var gNodeId: int
proc newNode*(kind: TNodeKind): PNode =
## new node with unknown line info, no type, and no children
result = PNode(kind: kind, info: unknownLineInfo)
template newNodeImpl(info2) =
result = PNode(kind: kind, info: info2)
when false:
# this would add overhead, so we skip it; it results in a small amount of leaked entries
# for old PNode that gets re-allocated at the same address as a PNode that
# has `nfHasComment` set (and an entry in that table). Only `nfHasComment`
# should be used to test whether a PNode has a comment; gconfig.comments
# can contain extra entries for deleted PNode's with comments.
gconfig.comments.del(cast[int](result))
template setIdMaybe() =
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
@@ -1217,27 +1257,22 @@ proc newNode*(kind: TNodeKind): PNode =
writeStackTrace()
inc gNodeId
proc newNode*(kind: TNodeKind): PNode =
## new node with unknown line info, no type, and no children
newNodeImpl(unknownLineInfo)
setIdMaybe()
proc newNodeI*(kind: TNodeKind, info: TLineInfo): PNode =
## new node with line info, no type, and no children
result = PNode(kind: kind, info: info)
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
echo "KIND ", result.kind
writeStackTrace()
inc gNodeId
newNodeImpl(info)
setIdMaybe()
proc newNodeI*(kind: TNodeKind, info: TLineInfo, children: int): PNode =
## new node with line info, type, and children
result = PNode(kind: kind, info: info)
newNodeImpl(info)
if children > 0:
newSeq(result.sons, children)
when defined(useNodeIds):
result.id = gNodeId
if result.id == nodeIdToDebug:
echo "KIND ", result.kind
writeStackTrace()
inc gNodeId
setIdMaybe()
proc newNodeIT*(kind: TNodeKind, info: TLineInfo, typ: PType): PNode =
## new node with line info, type, and no children
@@ -1644,8 +1679,8 @@ proc copyNode*(src: PNode): PNode =
template transitionNodeKindCommon(k: TNodeKind) =
let obj {.inject.} = n[]
n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags,
comment: obj.comment)
n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags)
# n.comment = obj.comment # shouldn't be needed, the address doesnt' change
when defined(useNodeIds):
n.id = obj.id

View File

@@ -8,7 +8,7 @@
#
# This module handles the parsing of command line arguments.
from ast import setUseIc
# We do this here before the 'import' statement so 'defined' does not get
# confused with 'TGCMode.gcMarkAndSweep' etc.
@@ -862,6 +862,7 @@ proc processSwitch*(switch, arg: string, pass: TCmdLinePass, info: TLineInfo;
of "v2": conf.symbolFiles = v2Sf
of "stress": conf.symbolFiles = stressTest
else: localError(conf, info, "invalid option for --incremental: " & arg)
setUseIc(conf.symbolFiles != disabledSf)
of "skipcfg":
processOnOffSwitchG(conf, {optSkipSystemConfigFile}, arg, pass, info)
of "skipprojcfg":

View File

@@ -170,13 +170,15 @@ proc validInd(p: var Parser): bool {.inline.} =
proc rawSkipComment(p: var Parser, node: PNode) =
if p.tok.tokType == tkComment:
if node != nil:
var rhs = node.comment
when defined(nimpretty):
if p.tok.commentOffsetB > p.tok.commentOffsetA:
node.comment.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB)
rhs.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB)
else:
node.comment.add p.tok.literal
rhs.add p.tok.literal
else:
node.comment.add p.tok.literal
rhs.add p.tok.literal
node.comment = move rhs
else:
parMessage(p, errInternal, "skipComment")
getTok(p)
@@ -1824,8 +1826,10 @@ proc parseRoutine(p: var Parser, kind: TNodeKind): PNode =
if result.comment.len == 0:
# proc fn*(a: int): int = a ## foo
# => moves comment `foo` to `fn`
swap(result.comment, body[0].comment)
else: discard # xxx either `assert false` or issue a warning (otherwise we'll never know of this edge case)
result.comment = body[0].comment
body[0].comment = ""
else:
assert false, p.lex.config$body.info # avoids hard to track bugs, fail early.
proc newCommentStmt(p: var Parser): PNode =
#| commentStmt = COMMENT