mirror of
https://github.com/nim-lang/Nim.git
synced 2026-02-17 08:34:20 +00:00
ORC: prepare for another patent-pending optimization (#15996)
* ORC: prepare for another patent-pending optimization * bugfix * '=copy' for refs can take a cyclic parameter for more ORC optimizations * ORC: exploit the common 'it = it.next' pattern * can't hurt to check for nil * use an algorithm that is not obviously broken * restore the test case * final cleanups for --gc:orc
This commit is contained in:
@@ -38,6 +38,10 @@
|
||||
- Added `asyncdispatch.activeDescriptors` that returns the number of currently
|
||||
active async event handles/file descriptors
|
||||
|
||||
- ``--gc:orc`` is now 10% faster than previously for common workloads. If
|
||||
you have trouble with its changed behavior, compile with ``-d:nimOldOrc``.
|
||||
|
||||
|
||||
- `os.FileInfo` (returned by `getFileInfo`) now contains `blockSize`,
|
||||
determining preferred I/O block size for this file object.
|
||||
|
||||
|
||||
@@ -311,6 +311,43 @@ proc genSink(c: var Con; dest, ri: PNode, isDecl = false): PNode =
|
||||
# and copyMem(dest, source). This is efficient.
|
||||
result = newTree(nkStmtList, c.genDestroy(dest), newTree(nkFastAsgn, dest, ri))
|
||||
|
||||
proc isCriticalLink(dest: PNode): bool {.inline.} =
|
||||
#[
|
||||
Lins's idea that only "critical" links can introduce a cycle. This is
|
||||
critical for the performance gurantees that we strive for: If you
|
||||
traverse a data structure, no tracing will be performed at all.
|
||||
ORC is about this promise: The GC only touches the memory that the
|
||||
mutator touches too.
|
||||
|
||||
These constructs cannot possibly create cycles::
|
||||
|
||||
local = ...
|
||||
|
||||
new(x)
|
||||
dest = ObjectConstructor(field: noalias(dest))
|
||||
|
||||
But since 'ObjectConstructor' is already moved into 'dest' all we really have
|
||||
to look for is assignments to local variables.
|
||||
]#
|
||||
result = dest.kind != nkSym
|
||||
|
||||
proc finishCopy(c: var Con; result, dest: PNode; isFromSink: bool) =
|
||||
if c.graph.config.selectedGC == gcOrc:
|
||||
let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct})
|
||||
if cyclicType(t):
|
||||
result.add boolLit(c.graph, result.info, isFromSink or isCriticalLink(dest))
|
||||
|
||||
proc genMarkCyclic(c: var Con; result, dest: PNode) =
|
||||
if c.graph.config.selectedGC == gcOrc:
|
||||
let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct})
|
||||
if cyclicType(t):
|
||||
if t.kind == tyRef:
|
||||
result.add callCodegenProc(c.graph, "nimMarkCyclic", dest.info, dest)
|
||||
else:
|
||||
let xenv = genBuiltin(c.graph, mAccessEnv, "accessEnv", dest)
|
||||
xenv.typ = getSysType(c.graph, dest.info, tyPointer)
|
||||
result.add callCodegenProc(c.graph, "nimMarkCyclic", dest.info, xenv)
|
||||
|
||||
proc genCopyNoCheck(c: var Con; dest, ri: PNode): PNode =
|
||||
let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink})
|
||||
result = c.genOp(t, attachedAsgn, dest, ri)
|
||||
@@ -390,7 +427,9 @@ proc destructiveMoveVar(n: PNode; c: var Con; s: var Scope): PNode =
|
||||
v.add(vpart)
|
||||
|
||||
result.add v
|
||||
let wasMovedCall = c.genWasMoved(skipConv(n))
|
||||
let nn = skipConv(n)
|
||||
c.genMarkCyclic(result, nn)
|
||||
let wasMovedCall = c.genWasMoved(nn)
|
||||
result.add wasMovedCall
|
||||
result.add tempAsNode
|
||||
|
||||
@@ -405,6 +444,7 @@ proc passCopyToSink(n: PNode; c: var Con; s: var Scope): PNode =
|
||||
result.add c.genWasMoved(tmp)
|
||||
var m = c.genCopy(tmp, n)
|
||||
m.add p(n, c, s, normal)
|
||||
c.finishCopy(m, n, isFromSink = true)
|
||||
result.add m
|
||||
if isLValue(n) and not isCapturedVar(n) and n.typ.skipTypes(abstractInst).kind != tyRef and c.inSpawn == 0:
|
||||
message(c.graph.config, n.info, hintPerformance,
|
||||
@@ -667,6 +707,7 @@ proc pRaiseStmt(n: PNode, c: var Con; s: var Scope): PNode =
|
||||
let tmp = c.getTemp(s, n[0].typ, n.info)
|
||||
var m = c.genCopyNoCheck(tmp, n[0])
|
||||
m.add p(n[0], c, s, normal)
|
||||
c.finishCopy(m, n[0], isFromSink = false)
|
||||
result = newTree(nkStmtList, c.genWasMoved(tmp), m)
|
||||
var toDisarm = n[0]
|
||||
if toDisarm.kind == nkStmtListExpr: toDisarm = toDisarm.lastSon
|
||||
@@ -941,16 +982,18 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
|
||||
else:
|
||||
result = c.genCopy(dest, ri)
|
||||
result.add p(ri, c, s, consumed)
|
||||
c.finishCopy(result, dest, isFromSink = false)
|
||||
of nkBracket:
|
||||
# array constructor
|
||||
if ri.len > 0 and isDangerousSeq(ri.typ):
|
||||
result = c.genCopy(dest, ri)
|
||||
result.add p(ri, c, s, consumed)
|
||||
c.finishCopy(result, dest, isFromSink = false)
|
||||
else:
|
||||
result = c.genSink(dest, p(ri, c, s, consumed), isDecl)
|
||||
of nkObjConstr, nkTupleConstr, nkClosure, nkCharLit..nkNilLit:
|
||||
result = c.genSink(dest, p(ri, c, s, consumed), isDecl)
|
||||
of nkSym:
|
||||
of nkSym:
|
||||
if dest.kind == nkSym and dest.sym == ri.sym:
|
||||
# rule (self-assignment-removal):
|
||||
result = newNodeI(nkEmpty, dest.info)
|
||||
@@ -966,6 +1009,7 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
|
||||
else:
|
||||
result = c.genCopy(dest, ri)
|
||||
result.add p(ri, c, s, consumed)
|
||||
c.finishCopy(result, dest, isFromSink = false)
|
||||
of nkHiddenSubConv, nkHiddenStdConv, nkConv, nkObjDownConv, nkObjUpConv:
|
||||
result = c.genSink(dest, p(ri, c, s, sinkArg), isDecl)
|
||||
of nkStmtListExpr, nkBlockExpr, nkIfExpr, nkCaseStmt, nkTryStmt:
|
||||
@@ -983,6 +1027,7 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
|
||||
else:
|
||||
result = c.genCopy(dest, ri)
|
||||
result.add p(ri, c, s, consumed)
|
||||
c.finishCopy(result, dest, isFromSink = false)
|
||||
|
||||
proc computeUninit(c: var Con) =
|
||||
if not c.uninitComputed:
|
||||
|
||||
@@ -65,7 +65,7 @@ proc newAsgnStmt(le, ri: PNode): PNode =
|
||||
result[0] = le
|
||||
result[1] = ri
|
||||
|
||||
proc genBuiltin(g: ModuleGraph; magic: TMagic; name: string; i: PNode): PNode =
|
||||
proc genBuiltin*(g: ModuleGraph; magic: TMagic; name: string; i: PNode): PNode =
|
||||
result = newNodeI(nkCall, i.info)
|
||||
result.add createMagic(g, name, magic).newSymNode
|
||||
result.add i
|
||||
@@ -248,6 +248,19 @@ proc fillBodyObjT(c: var TLiftCtx; t: PType, body, x, y: PNode) =
|
||||
else:
|
||||
fillBodyObjTImpl(c, t, body, x, y)
|
||||
|
||||
proc boolLit*(g: ModuleGraph; info: TLineInfo; value: bool): PNode =
|
||||
result = newIntLit(g, info, ord value)
|
||||
result.typ = getSysType(g, info, tyBool)
|
||||
|
||||
proc getCycleParam(c: TLiftCtx): PNode =
|
||||
assert c.kind == attachedAsgn
|
||||
if c.fn.typ.len == 4:
|
||||
result = c.fn.typ.n.lastSon
|
||||
assert result.kind == nkSym
|
||||
assert result.sym.name.s == "cyclic"
|
||||
else:
|
||||
result = boolLit(c.g, c.info, true)
|
||||
|
||||
proc newHookCall(c: var TLiftCtx; op: PSym; x, y: PNode): PNode =
|
||||
#if sfError in op.flags:
|
||||
# localError(c.config, x.info, "usage of '$1' is a user-defined error" % op.name.s)
|
||||
@@ -261,6 +274,13 @@ proc newHookCall(c: var TLiftCtx; op: PSym; x, y: PNode): PNode =
|
||||
result.add x
|
||||
if y != nil:
|
||||
result.add y
|
||||
if op.typ.len == 4:
|
||||
assert y != nil
|
||||
if c.fn.typ.len == 4:
|
||||
result.add getCycleParam(c)
|
||||
else:
|
||||
# assume the worst: A cycle is created:
|
||||
result.add boolLit(c.g, y.info, true)
|
||||
|
||||
proc newOpCall(c: var TLiftCtx; op: PSym; x: PNode): PNode =
|
||||
result = newNodeIT(nkCall, x.info, op.typ[0])
|
||||
@@ -526,6 +546,12 @@ proc fillStrOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
of attachedTrace:
|
||||
discard "strings are atomic and have no inner elements that are to trace"
|
||||
|
||||
proc cyclicType*(t: PType): bool =
|
||||
case t.kind
|
||||
of tyRef: result = types.canFormAcycle(t.lastSon)
|
||||
of tyProc: result = t.callConv == ccClosure
|
||||
else: result = false
|
||||
|
||||
proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
#[ bug #15753 is really subtle. Usually the classical write barrier for reference
|
||||
counting looks like this::
|
||||
@@ -588,12 +614,13 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
body.add genIf(c, cond, actions)
|
||||
body.add newAsgnStmt(x, y)
|
||||
of attachedAsgn:
|
||||
body.add genIf(c, y, callCodegenProc(c.g,
|
||||
if isCyclic: "nimIncRefCyclic" else: "nimIncRef", c.info, y))
|
||||
if isCyclic:
|
||||
body.add genIf(c, y, callCodegenProc(c.g,
|
||||
"nimIncRefCyclic", c.info, y, getCycleParam(c)))
|
||||
body.add newAsgnStmt(x, y)
|
||||
body.add genIf(c, cond, actions)
|
||||
else:
|
||||
body.add genIf(c, y, callCodegenProc(c.g, "nimIncRef", c.info, y))
|
||||
body.add genIf(c, cond, actions)
|
||||
body.add newAsgnStmt(x, y)
|
||||
of attachedDestructor:
|
||||
@@ -649,14 +676,13 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
of attachedAsgn:
|
||||
let yenv = genBuiltin(c.g, mAccessEnv, "accessEnv", y)
|
||||
yenv.typ = getSysType(c.g, c.info, tyPointer)
|
||||
let incRefProc =
|
||||
if c.g.config.selectedGC == gcOrc: "nimIncRefCyclic"
|
||||
else: "nimIncRef"
|
||||
body.add genIf(c, yenv, callCodegenProc(c.g, incRefProc, c.info, yenv))
|
||||
if isCyclic:
|
||||
body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRefCyclic", c.info, yenv, getCycleParam(c)))
|
||||
body.add newAsgnStmt(x, y)
|
||||
body.add genIf(c, cond, actions)
|
||||
else:
|
||||
body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRef", c.info, yenv))
|
||||
|
||||
body.add genIf(c, cond, actions)
|
||||
body.add newAsgnStmt(x, y)
|
||||
of attachedDestructor:
|
||||
@@ -888,6 +914,13 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp
|
||||
if kind notin {attachedDestructor, attachedDispose}:
|
||||
result.typ.addParam src
|
||||
|
||||
if kind == attachedAsgn and g.config.selectedGC == gcOrc and
|
||||
cyclicType(typ.skipTypes(abstractInst)):
|
||||
let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"),
|
||||
nextId(idgen), result, info)
|
||||
cycleParam.typ = getSysType(g, info, tyBool)
|
||||
result.typ.addParam cycleParam
|
||||
|
||||
var n = newNodeI(nkProcDef, info, bodyPos+1)
|
||||
for i in 0..<n.len: n[i] = newNodeI(nkEmpty, info)
|
||||
n[namePos] = newSymNode(result)
|
||||
@@ -907,8 +940,8 @@ proc produceSym(g: ModuleGraph; c: PContext; typ: PType; kind: TTypeAttachedOp;
|
||||
if result == nil:
|
||||
result = symPrototype(g, typ, typ.owner, kind, info, idgen)
|
||||
|
||||
var a = TLiftCtx(info: info, g: g, kind: kind, c: c, asgnForType:typ, idgen: idgen)
|
||||
a.fn = result
|
||||
var a = TLiftCtx(info: info, g: g, kind: kind, c: c, asgnForType: typ, idgen: idgen,
|
||||
fn: result)
|
||||
|
||||
let dest = result.typ.n[1].sym
|
||||
let d = newDeref(newSymNode(dest))
|
||||
@@ -943,8 +976,8 @@ proc produceDestructorForDiscriminator*(g: ModuleGraph; typ: PType; field: PSym,
|
||||
info: TLineInfo; idgen: IdGenerator): PSym =
|
||||
assert(typ.skipTypes({tyAlias, tyGenericInst}).kind == tyObject)
|
||||
result = symPrototype(g, field.typ, typ.owner, attachedDestructor, info, idgen)
|
||||
var a = TLiftCtx(info: info, g: g, kind: attachedDestructor, asgnForType: typ, idgen: idgen)
|
||||
a.fn = result
|
||||
var a = TLiftCtx(info: info, g: g, kind: attachedDestructor, asgnForType: typ, idgen: idgen,
|
||||
fn: result)
|
||||
a.asgnForType = typ
|
||||
a.filterDiscriminator = field
|
||||
a.addMemReset = true
|
||||
|
||||
@@ -75,6 +75,8 @@ when defined(nimArcDebug):
|
||||
elif defined(nimArcIds):
|
||||
var gRefId: int
|
||||
|
||||
const traceId = -1
|
||||
|
||||
proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
let s = size + hdrSize
|
||||
@@ -126,13 +128,14 @@ proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
|
||||
when traceCollector:
|
||||
cprintf("[INCREF] %p\n", head(p))
|
||||
|
||||
proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
|
||||
# This is only used by the old RTTI mechanism and we know
|
||||
# that 'dest[]' is nil and needs no destruction. Which is really handy
|
||||
# as we cannot destroy the object reliably if it's an object of unknown
|
||||
# compile-time type.
|
||||
dest[] = src
|
||||
if src != nil: nimIncRef src
|
||||
when not defined(gcOrc) or defined(nimThinout):
|
||||
proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
|
||||
# This is only used by the old RTTI mechanism and we know
|
||||
# that 'dest[]' is nil and needs no destruction. Which is really handy
|
||||
# as we cannot destroy the object reliably if it's an object of unknown
|
||||
# compile-time type.
|
||||
dest[] = src
|
||||
if src != nil: nimIncRef src
|
||||
|
||||
when not defined(nimscript) and defined(nimArcDebug):
|
||||
proc deallocatedRefId*(p: pointer): int =
|
||||
|
||||
@@ -70,10 +70,12 @@ template color(c): untyped = c.rc and colorMask
|
||||
template setColor(c, col) =
|
||||
c.rc = c.rc and not colorMask or col
|
||||
|
||||
proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
|
||||
proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
|
||||
let h = head(p)
|
||||
inc h.rc, rcIncrement
|
||||
|
||||
proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = discard
|
||||
|
||||
type
|
||||
GcEnv = object
|
||||
traceStack: CellSeq
|
||||
|
||||
@@ -21,8 +21,7 @@ const
|
||||
colBlack = 0b000
|
||||
colGray = 0b001
|
||||
colWhite = 0b010
|
||||
colPurple = 0b011
|
||||
isCycleCandidate = 0b100 # cell is marked as a cycle candidate
|
||||
maybeCycle = 0b100 # possibly part of a cycle; this has to be a "sticky" bit
|
||||
jumpStackFlag = 0b1000
|
||||
colorMask = 0b011
|
||||
|
||||
@@ -39,11 +38,29 @@ template setColor(c, col) =
|
||||
else:
|
||||
c.rc = c.rc and not colorMask or col
|
||||
|
||||
proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
|
||||
const
|
||||
optimizedOrc = not defined(nimOldOrc)
|
||||
|
||||
proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
|
||||
let h = head(p)
|
||||
inc h.rc, rcIncrement
|
||||
#h.setColor colPurple # mark as potential cycle!
|
||||
h.setColor colBlack
|
||||
when optimizedOrc:
|
||||
if cyclic:
|
||||
h.rc = h.rc or maybeCycle
|
||||
|
||||
proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
|
||||
when optimizedOrc:
|
||||
if p != nil:
|
||||
let h = head(p)
|
||||
h.rc = h.rc or maybeCycle
|
||||
|
||||
proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
|
||||
# This is only used by the old RTTI mechanism and we know
|
||||
# that 'dest[]' is nil and needs no destruction. Which is really handy
|
||||
# as we cannot destroy the object reliably if it's an object of unknown
|
||||
# compile-time type.
|
||||
dest[] = src
|
||||
if src != nil: nimIncRefCyclic(src, true)
|
||||
|
||||
const
|
||||
useJumpStack = false # for thavlak the jump stack doesn't improve the performance at all
|
||||
@@ -113,14 +130,15 @@ var
|
||||
|
||||
proc unregisterCycle(s: Cell) =
|
||||
# swap with the last element. O(1)
|
||||
let idx = s.rootIdx
|
||||
let idx = s.rootIdx-1
|
||||
when false:
|
||||
if idx >= roots.len or idx < 0:
|
||||
cprintf("[Bug!] %ld\n", idx)
|
||||
quit 1
|
||||
roots.d[idx] = roots.d[roots.len-1]
|
||||
roots.d[idx][0].rootIdx = idx
|
||||
roots.d[idx][0].rootIdx = idx+1
|
||||
dec roots.len
|
||||
s.rootIdx = 0
|
||||
|
||||
proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
|
||||
#[
|
||||
@@ -245,7 +263,7 @@ proc collectWhite(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
|
||||
collectWhite(t)
|
||||
free(s) # watch out, a bug here!
|
||||
]#
|
||||
if s.color == colWhite and (s.rc and isCycleCandidate) == 0:
|
||||
if s.color == colWhite and s.rootIdx == 0:
|
||||
orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
|
||||
|
||||
s.setColor(colBlack)
|
||||
@@ -253,7 +271,7 @@ proc collectWhite(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
|
||||
trace(s, desc, j)
|
||||
while j.traceStack.len > 0:
|
||||
let (t, desc) = j.traceStack.pop()
|
||||
if t.color == colWhite and (t.rc and isCycleCandidate) == 0:
|
||||
if t.color == colWhite and t.rootIdx == 0:
|
||||
j.toFree.add(t, desc)
|
||||
t.setColor(colBlack)
|
||||
trace(t, desc, j)
|
||||
@@ -284,7 +302,7 @@ proc collectCyclesBacon(j: var GcEnv) =
|
||||
init j.toFree
|
||||
for i in 0 ..< roots.len:
|
||||
let s = roots.d[i][0]
|
||||
s.rc = s.rc and not isCycleCandidate
|
||||
s.rootIdx = 0
|
||||
collectWhite(s, roots.d[i][1], j)
|
||||
|
||||
for i in 0 ..< j.toFree.len:
|
||||
@@ -341,13 +359,14 @@ proc collectCycles() =
|
||||
getOccupiedMem())
|
||||
|
||||
proc registerCycle(s: Cell; desc: PNimTypeV2) =
|
||||
s.rootIdx = roots.len
|
||||
s.rootIdx = roots.len+1
|
||||
if roots.d == nil: init(roots)
|
||||
add(roots, s, desc)
|
||||
|
||||
if roots.len >= rootsThreshold:
|
||||
collectCycles()
|
||||
#writeCell("[added root]", s)
|
||||
when logOrc:
|
||||
writeCell("[added root]", s, desc)
|
||||
|
||||
proc GC_runOrc* =
|
||||
## Forces a cycle collection pass.
|
||||
@@ -379,17 +398,19 @@ proc GC_disableMarkAndSweep*() =
|
||||
## For ``--gc:orc`` an alias for ``GC_disableOrc``.
|
||||
GC_disableOrc()
|
||||
|
||||
when optimizedOrc:
|
||||
template markedAsCyclic(s: Cell): bool = (s.rc and maybeCycle) != 0
|
||||
else:
|
||||
template markedAsCyclic(s: Cell): bool = true
|
||||
|
||||
proc rememberCycle(isDestroyAction: bool; s: Cell; desc: PNimTypeV2) {.noinline.} =
|
||||
if isDestroyAction:
|
||||
if (s.rc and isCycleCandidate) != 0:
|
||||
s.rc = s.rc and not isCycleCandidate
|
||||
if s.rootIdx > 0:
|
||||
unregisterCycle(s)
|
||||
else:
|
||||
# do not call 'rememberCycle' again unless this cell
|
||||
# got an 'incRef' event:
|
||||
#s.setColor colGreen # XXX This is wrong!
|
||||
if (s.rc and isCycleCandidate) == 0:
|
||||
s.rc = s.rc or isCycleCandidate
|
||||
if s.rootIdx == 0 and markedAsCyclic(s):
|
||||
s.setColor colBlack
|
||||
registerCycle(s, desc)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
discard """
|
||||
outputsub: "(allocCount: 4016, deallocCount: 4014)"
|
||||
outputsub: "(allocCount: 4014, deallocCount: 4012)"
|
||||
cmd: "nim c --gc:orc -d:nimAllocStats $file"
|
||||
"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user