mirror of
https://github.com/nim-lang/Nim.git
synced 2026-02-13 14:53:46 +00:00
move PNode.comment to a side channel, reducing memory usage during compilation by a factor 1.25x (#18760)
* move PNode.comment so a side channel, reducing memory usage * fix a bug * fixup * use sfHasComment to speedup comment lookups * fix for IC * Update compiler/parser.nim Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
This commit is contained in:
@@ -10,7 +10,7 @@
|
||||
# abstract syntax tree + symbol table
|
||||
|
||||
import
|
||||
lineinfos, hashes, options, ropes, idents, int128
|
||||
lineinfos, hashes, options, ropes, idents, int128, tables
|
||||
from strutils import toLowerAscii
|
||||
|
||||
export int128
|
||||
@@ -497,6 +497,7 @@ type
|
||||
nfExecuteOnReload # A top-level statement that will be executed during reloads
|
||||
nfLastRead # this node is a last read
|
||||
nfFirstWrite# this node is a first write
|
||||
nfHasComment # node has a comment
|
||||
|
||||
TNodeFlags* = set[TNodeFlag]
|
||||
TTypeFlag* = enum # keep below 32 for efficiency reasons (now: 43)
|
||||
@@ -774,7 +775,6 @@ type
|
||||
ident*: PIdent
|
||||
else:
|
||||
sons*: TNodeSeq
|
||||
comment*: string
|
||||
|
||||
TStrTable* = object # a table[PIdent] of PSym
|
||||
counter*: int
|
||||
@@ -991,6 +991,38 @@ type
|
||||
TImplication* = enum
|
||||
impUnknown, impNo, impYes
|
||||
|
||||
template nodeId(n: PNode): int = cast[int](n)
|
||||
|
||||
type Gconfig = object
|
||||
# we put comments in a side channel to avoid increasing `sizeof(TNode)`, which
|
||||
# reduces memory usage given that `PNode` is the most allocated type by far.
|
||||
comments: Table[int, string] # nodeId => comment
|
||||
useIc*: bool
|
||||
|
||||
var gconfig {.threadvar.}: Gconfig
|
||||
|
||||
proc setUseIc*(useIc: bool) = gconfig.useIc = useIc
|
||||
|
||||
proc comment*(n: PNode): string =
|
||||
if nfHasComment in n.flags and not gconfig.useIc:
|
||||
# IC doesn't track comments, see `packed_ast`, so this could fail
|
||||
result = gconfig.comments[n.nodeId]
|
||||
|
||||
proc `comment=`*(n: PNode, a: string) =
|
||||
let id = n.nodeId
|
||||
if a.len > 0:
|
||||
# if needed, we could periodically cleanup gconfig.comments when its size increases,
|
||||
# to ensure only live nodes (and with nfHasComment) have an entry in gconfig.comments;
|
||||
# for compiling compiler, the waste is very small:
|
||||
# num calls to newNodeImpl: 14984160 (num of PNode allocations)
|
||||
# size of gconfig.comments: 33585
|
||||
# num of nodes with comments that were deleted and hence wasted: 3081
|
||||
n.flags.incl nfHasComment
|
||||
gconfig.comments[id] = a
|
||||
elif nfHasComment in n.flags:
|
||||
n.flags.excl nfHasComment
|
||||
gconfig.comments.del(id)
|
||||
|
||||
# BUGFIX: a module is overloadable so that a proc can have the
|
||||
# same name as an imported module. This is necessary because of
|
||||
# the poor naming choices in the standard library.
|
||||
@@ -1207,9 +1239,17 @@ when defined(useNodeIds):
|
||||
const nodeIdToDebug* = -1 # 2322968
|
||||
var gNodeId: int
|
||||
|
||||
proc newNode*(kind: TNodeKind): PNode =
|
||||
## new node with unknown line info, no type, and no children
|
||||
result = PNode(kind: kind, info: unknownLineInfo)
|
||||
template newNodeImpl(info2) =
|
||||
result = PNode(kind: kind, info: info2)
|
||||
when false:
|
||||
# this would add overhead, so we skip it; it results in a small amount of leaked entries
|
||||
# for old PNode that gets re-allocated at the same address as a PNode that
|
||||
# has `nfHasComment` set (and an entry in that table). Only `nfHasComment`
|
||||
# should be used to test whether a PNode has a comment; gconfig.comments
|
||||
# can contain extra entries for deleted PNode's with comments.
|
||||
gconfig.comments.del(cast[int](result))
|
||||
|
||||
template setIdMaybe() =
|
||||
when defined(useNodeIds):
|
||||
result.id = gNodeId
|
||||
if result.id == nodeIdToDebug:
|
||||
@@ -1217,27 +1257,22 @@ proc newNode*(kind: TNodeKind): PNode =
|
||||
writeStackTrace()
|
||||
inc gNodeId
|
||||
|
||||
proc newNode*(kind: TNodeKind): PNode =
|
||||
## new node with unknown line info, no type, and no children
|
||||
newNodeImpl(unknownLineInfo)
|
||||
setIdMaybe()
|
||||
|
||||
proc newNodeI*(kind: TNodeKind, info: TLineInfo): PNode =
|
||||
## new node with line info, no type, and no children
|
||||
result = PNode(kind: kind, info: info)
|
||||
when defined(useNodeIds):
|
||||
result.id = gNodeId
|
||||
if result.id == nodeIdToDebug:
|
||||
echo "KIND ", result.kind
|
||||
writeStackTrace()
|
||||
inc gNodeId
|
||||
newNodeImpl(info)
|
||||
setIdMaybe()
|
||||
|
||||
proc newNodeI*(kind: TNodeKind, info: TLineInfo, children: int): PNode =
|
||||
## new node with line info, type, and children
|
||||
result = PNode(kind: kind, info: info)
|
||||
newNodeImpl(info)
|
||||
if children > 0:
|
||||
newSeq(result.sons, children)
|
||||
when defined(useNodeIds):
|
||||
result.id = gNodeId
|
||||
if result.id == nodeIdToDebug:
|
||||
echo "KIND ", result.kind
|
||||
writeStackTrace()
|
||||
inc gNodeId
|
||||
setIdMaybe()
|
||||
|
||||
proc newNodeIT*(kind: TNodeKind, info: TLineInfo, typ: PType): PNode =
|
||||
## new node with line info, type, and no children
|
||||
@@ -1644,8 +1679,8 @@ proc copyNode*(src: PNode): PNode =
|
||||
|
||||
template transitionNodeKindCommon(k: TNodeKind) =
|
||||
let obj {.inject.} = n[]
|
||||
n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags,
|
||||
comment: obj.comment)
|
||||
n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags)
|
||||
# n.comment = obj.comment # shouldn't be needed, the address doesnt' change
|
||||
when defined(useNodeIds):
|
||||
n.id = obj.id
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#
|
||||
|
||||
# This module handles the parsing of command line arguments.
|
||||
|
||||
from ast import setUseIc
|
||||
|
||||
# We do this here before the 'import' statement so 'defined' does not get
|
||||
# confused with 'TGCMode.gcMarkAndSweep' etc.
|
||||
@@ -862,6 +862,7 @@ proc processSwitch*(switch, arg: string, pass: TCmdLinePass, info: TLineInfo;
|
||||
of "v2": conf.symbolFiles = v2Sf
|
||||
of "stress": conf.symbolFiles = stressTest
|
||||
else: localError(conf, info, "invalid option for --incremental: " & arg)
|
||||
setUseIc(conf.symbolFiles != disabledSf)
|
||||
of "skipcfg":
|
||||
processOnOffSwitchG(conf, {optSkipSystemConfigFile}, arg, pass, info)
|
||||
of "skipprojcfg":
|
||||
|
||||
@@ -170,13 +170,15 @@ proc validInd(p: var Parser): bool {.inline.} =
|
||||
proc rawSkipComment(p: var Parser, node: PNode) =
|
||||
if p.tok.tokType == tkComment:
|
||||
if node != nil:
|
||||
var rhs = node.comment
|
||||
when defined(nimpretty):
|
||||
if p.tok.commentOffsetB > p.tok.commentOffsetA:
|
||||
node.comment.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB)
|
||||
rhs.add fileSection(p.lex.config, p.lex.fileIdx, p.tok.commentOffsetA, p.tok.commentOffsetB)
|
||||
else:
|
||||
node.comment.add p.tok.literal
|
||||
rhs.add p.tok.literal
|
||||
else:
|
||||
node.comment.add p.tok.literal
|
||||
rhs.add p.tok.literal
|
||||
node.comment = move rhs
|
||||
else:
|
||||
parMessage(p, errInternal, "skipComment")
|
||||
getTok(p)
|
||||
@@ -1824,8 +1826,10 @@ proc parseRoutine(p: var Parser, kind: TNodeKind): PNode =
|
||||
if result.comment.len == 0:
|
||||
# proc fn*(a: int): int = a ## foo
|
||||
# => moves comment `foo` to `fn`
|
||||
swap(result.comment, body[0].comment)
|
||||
else: discard # xxx either `assert false` or issue a warning (otherwise we'll never know of this edge case)
|
||||
result.comment = body[0].comment
|
||||
body[0].comment = ""
|
||||
else:
|
||||
assert false, p.lex.config$body.info # avoids hard to track bugs, fail early.
|
||||
|
||||
proc newCommentStmt(p: var Parser): PNode =
|
||||
#| commentStmt = COMMENT
|
||||
|
||||
Reference in New Issue
Block a user