ORC: prepare for another patent-pending optimization (#15996)

* ORC: prepare for another patent-pending optimization

* bugfix

* '=copy' for refs can take a cyclic parameter for more ORC optimizations

* ORC: exploit the common 'it = it.next' pattern

* can't hurt to check for nil

* use an algorithm that is not obviously broken

* restore the test case

* final cleanups for --gc:orc
This commit is contained in:
Andreas Rumpf
2020-11-18 22:31:06 +01:00
committed by GitHub
parent baaa19b927
commit b9eb91cbb5
7 changed files with 147 additions and 39 deletions

View File

@@ -38,6 +38,10 @@
- Added `asyncdispatch.activeDescriptors` that returns the number of currently
active async event handles/file descriptors
- ``--gc:orc`` is now 10% faster than previously for common workloads. If
you have trouble with its changed behavior, compile with ``-d:nimOldOrc``.
- `os.FileInfo` (returned by `getFileInfo`) now contains `blockSize`,
determining preferred I/O block size for this file object.

View File

@@ -311,6 +311,43 @@ proc genSink(c: var Con; dest, ri: PNode, isDecl = false): PNode =
# and copyMem(dest, source). This is efficient.
result = newTree(nkStmtList, c.genDestroy(dest), newTree(nkFastAsgn, dest, ri))
proc isCriticalLink(dest: PNode): bool {.inline.} =
#[
Lins's idea that only "critical" links can introduce a cycle. This is
critical for the performance gurantees that we strive for: If you
traverse a data structure, no tracing will be performed at all.
ORC is about this promise: The GC only touches the memory that the
mutator touches too.
These constructs cannot possibly create cycles::
local = ...
new(x)
dest = ObjectConstructor(field: noalias(dest))
But since 'ObjectConstructor' is already moved into 'dest' all we really have
to look for is assignments to local variables.
]#
result = dest.kind != nkSym
proc finishCopy(c: var Con; result, dest: PNode; isFromSink: bool) =
if c.graph.config.selectedGC == gcOrc:
let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct})
if cyclicType(t):
result.add boolLit(c.graph, result.info, isFromSink or isCriticalLink(dest))
proc genMarkCyclic(c: var Con; result, dest: PNode) =
if c.graph.config.selectedGC == gcOrc:
let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink, tyDistinct})
if cyclicType(t):
if t.kind == tyRef:
result.add callCodegenProc(c.graph, "nimMarkCyclic", dest.info, dest)
else:
let xenv = genBuiltin(c.graph, mAccessEnv, "accessEnv", dest)
xenv.typ = getSysType(c.graph, dest.info, tyPointer)
result.add callCodegenProc(c.graph, "nimMarkCyclic", dest.info, xenv)
proc genCopyNoCheck(c: var Con; dest, ri: PNode): PNode =
let t = dest.typ.skipTypes({tyGenericInst, tyAlias, tySink})
result = c.genOp(t, attachedAsgn, dest, ri)
@@ -390,7 +427,9 @@ proc destructiveMoveVar(n: PNode; c: var Con; s: var Scope): PNode =
v.add(vpart)
result.add v
let wasMovedCall = c.genWasMoved(skipConv(n))
let nn = skipConv(n)
c.genMarkCyclic(result, nn)
let wasMovedCall = c.genWasMoved(nn)
result.add wasMovedCall
result.add tempAsNode
@@ -405,6 +444,7 @@ proc passCopyToSink(n: PNode; c: var Con; s: var Scope): PNode =
result.add c.genWasMoved(tmp)
var m = c.genCopy(tmp, n)
m.add p(n, c, s, normal)
c.finishCopy(m, n, isFromSink = true)
result.add m
if isLValue(n) and not isCapturedVar(n) and n.typ.skipTypes(abstractInst).kind != tyRef and c.inSpawn == 0:
message(c.graph.config, n.info, hintPerformance,
@@ -667,6 +707,7 @@ proc pRaiseStmt(n: PNode, c: var Con; s: var Scope): PNode =
let tmp = c.getTemp(s, n[0].typ, n.info)
var m = c.genCopyNoCheck(tmp, n[0])
m.add p(n[0], c, s, normal)
c.finishCopy(m, n[0], isFromSink = false)
result = newTree(nkStmtList, c.genWasMoved(tmp), m)
var toDisarm = n[0]
if toDisarm.kind == nkStmtListExpr: toDisarm = toDisarm.lastSon
@@ -941,16 +982,18 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
else:
result = c.genCopy(dest, ri)
result.add p(ri, c, s, consumed)
c.finishCopy(result, dest, isFromSink = false)
of nkBracket:
# array constructor
if ri.len > 0 and isDangerousSeq(ri.typ):
result = c.genCopy(dest, ri)
result.add p(ri, c, s, consumed)
c.finishCopy(result, dest, isFromSink = false)
else:
result = c.genSink(dest, p(ri, c, s, consumed), isDecl)
of nkObjConstr, nkTupleConstr, nkClosure, nkCharLit..nkNilLit:
result = c.genSink(dest, p(ri, c, s, consumed), isDecl)
of nkSym:
of nkSym:
if dest.kind == nkSym and dest.sym == ri.sym:
# rule (self-assignment-removal):
result = newNodeI(nkEmpty, dest.info)
@@ -966,6 +1009,7 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
else:
result = c.genCopy(dest, ri)
result.add p(ri, c, s, consumed)
c.finishCopy(result, dest, isFromSink = false)
of nkHiddenSubConv, nkHiddenStdConv, nkConv, nkObjDownConv, nkObjUpConv:
result = c.genSink(dest, p(ri, c, s, sinkArg), isDecl)
of nkStmtListExpr, nkBlockExpr, nkIfExpr, nkCaseStmt, nkTryStmt:
@@ -983,6 +1027,7 @@ proc moveOrCopy(dest, ri: PNode; c: var Con; s: var Scope, isDecl = false): PNod
else:
result = c.genCopy(dest, ri)
result.add p(ri, c, s, consumed)
c.finishCopy(result, dest, isFromSink = false)
proc computeUninit(c: var Con) =
if not c.uninitComputed:

View File

@@ -65,7 +65,7 @@ proc newAsgnStmt(le, ri: PNode): PNode =
result[0] = le
result[1] = ri
proc genBuiltin(g: ModuleGraph; magic: TMagic; name: string; i: PNode): PNode =
proc genBuiltin*(g: ModuleGraph; magic: TMagic; name: string; i: PNode): PNode =
result = newNodeI(nkCall, i.info)
result.add createMagic(g, name, magic).newSymNode
result.add i
@@ -248,6 +248,19 @@ proc fillBodyObjT(c: var TLiftCtx; t: PType, body, x, y: PNode) =
else:
fillBodyObjTImpl(c, t, body, x, y)
proc boolLit*(g: ModuleGraph; info: TLineInfo; value: bool): PNode =
result = newIntLit(g, info, ord value)
result.typ = getSysType(g, info, tyBool)
proc getCycleParam(c: TLiftCtx): PNode =
assert c.kind == attachedAsgn
if c.fn.typ.len == 4:
result = c.fn.typ.n.lastSon
assert result.kind == nkSym
assert result.sym.name.s == "cyclic"
else:
result = boolLit(c.g, c.info, true)
proc newHookCall(c: var TLiftCtx; op: PSym; x, y: PNode): PNode =
#if sfError in op.flags:
# localError(c.config, x.info, "usage of '$1' is a user-defined error" % op.name.s)
@@ -261,6 +274,13 @@ proc newHookCall(c: var TLiftCtx; op: PSym; x, y: PNode): PNode =
result.add x
if y != nil:
result.add y
if op.typ.len == 4:
assert y != nil
if c.fn.typ.len == 4:
result.add getCycleParam(c)
else:
# assume the worst: A cycle is created:
result.add boolLit(c.g, y.info, true)
proc newOpCall(c: var TLiftCtx; op: PSym; x: PNode): PNode =
result = newNodeIT(nkCall, x.info, op.typ[0])
@@ -526,6 +546,12 @@ proc fillStrOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
of attachedTrace:
discard "strings are atomic and have no inner elements that are to trace"
proc cyclicType*(t: PType): bool =
case t.kind
of tyRef: result = types.canFormAcycle(t.lastSon)
of tyProc: result = t.callConv == ccClosure
else: result = false
proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
#[ bug #15753 is really subtle. Usually the classical write barrier for reference
counting looks like this::
@@ -588,12 +614,13 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
body.add genIf(c, cond, actions)
body.add newAsgnStmt(x, y)
of attachedAsgn:
body.add genIf(c, y, callCodegenProc(c.g,
if isCyclic: "nimIncRefCyclic" else: "nimIncRef", c.info, y))
if isCyclic:
body.add genIf(c, y, callCodegenProc(c.g,
"nimIncRefCyclic", c.info, y, getCycleParam(c)))
body.add newAsgnStmt(x, y)
body.add genIf(c, cond, actions)
else:
body.add genIf(c, y, callCodegenProc(c.g, "nimIncRef", c.info, y))
body.add genIf(c, cond, actions)
body.add newAsgnStmt(x, y)
of attachedDestructor:
@@ -649,14 +676,13 @@ proc atomicClosureOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
of attachedAsgn:
let yenv = genBuiltin(c.g, mAccessEnv, "accessEnv", y)
yenv.typ = getSysType(c.g, c.info, tyPointer)
let incRefProc =
if c.g.config.selectedGC == gcOrc: "nimIncRefCyclic"
else: "nimIncRef"
body.add genIf(c, yenv, callCodegenProc(c.g, incRefProc, c.info, yenv))
if isCyclic:
body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRefCyclic", c.info, yenv, getCycleParam(c)))
body.add newAsgnStmt(x, y)
body.add genIf(c, cond, actions)
else:
body.add genIf(c, yenv, callCodegenProc(c.g, "nimIncRef", c.info, yenv))
body.add genIf(c, cond, actions)
body.add newAsgnStmt(x, y)
of attachedDestructor:
@@ -888,6 +914,13 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp
if kind notin {attachedDestructor, attachedDispose}:
result.typ.addParam src
if kind == attachedAsgn and g.config.selectedGC == gcOrc and
cyclicType(typ.skipTypes(abstractInst)):
let cycleParam = newSym(skParam, getIdent(g.cache, "cyclic"),
nextId(idgen), result, info)
cycleParam.typ = getSysType(g, info, tyBool)
result.typ.addParam cycleParam
var n = newNodeI(nkProcDef, info, bodyPos+1)
for i in 0..<n.len: n[i] = newNodeI(nkEmpty, info)
n[namePos] = newSymNode(result)
@@ -907,8 +940,8 @@ proc produceSym(g: ModuleGraph; c: PContext; typ: PType; kind: TTypeAttachedOp;
if result == nil:
result = symPrototype(g, typ, typ.owner, kind, info, idgen)
var a = TLiftCtx(info: info, g: g, kind: kind, c: c, asgnForType:typ, idgen: idgen)
a.fn = result
var a = TLiftCtx(info: info, g: g, kind: kind, c: c, asgnForType: typ, idgen: idgen,
fn: result)
let dest = result.typ.n[1].sym
let d = newDeref(newSymNode(dest))
@@ -943,8 +976,8 @@ proc produceDestructorForDiscriminator*(g: ModuleGraph; typ: PType; field: PSym,
info: TLineInfo; idgen: IdGenerator): PSym =
assert(typ.skipTypes({tyAlias, tyGenericInst}).kind == tyObject)
result = symPrototype(g, field.typ, typ.owner, attachedDestructor, info, idgen)
var a = TLiftCtx(info: info, g: g, kind: attachedDestructor, asgnForType: typ, idgen: idgen)
a.fn = result
var a = TLiftCtx(info: info, g: g, kind: attachedDestructor, asgnForType: typ, idgen: idgen,
fn: result)
a.asgnForType = typ
a.filterDiscriminator = field
a.addMemReset = true

View File

@@ -75,6 +75,8 @@ when defined(nimArcDebug):
elif defined(nimArcIds):
var gRefId: int
const traceId = -1
proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
let hdrSize = align(sizeof(RefHeader), alignment)
let s = size + hdrSize
@@ -126,13 +128,14 @@ proc nimIncRef(p: pointer) {.compilerRtl, inl.} =
when traceCollector:
cprintf("[INCREF] %p\n", head(p))
proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
# This is only used by the old RTTI mechanism and we know
# that 'dest[]' is nil and needs no destruction. Which is really handy
# as we cannot destroy the object reliably if it's an object of unknown
# compile-time type.
dest[] = src
if src != nil: nimIncRef src
when not defined(gcOrc) or defined(nimThinout):
proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
# This is only used by the old RTTI mechanism and we know
# that 'dest[]' is nil and needs no destruction. Which is really handy
# as we cannot destroy the object reliably if it's an object of unknown
# compile-time type.
dest[] = src
if src != nil: nimIncRef src
when not defined(nimscript) and defined(nimArcDebug):
proc deallocatedRefId*(p: pointer): int =

View File

@@ -70,10 +70,12 @@ template color(c): untyped = c.rc and colorMask
template setColor(c, col) =
c.rc = c.rc and not colorMask or col
proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
let h = head(p)
inc h.rc, rcIncrement
proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} = discard
type
GcEnv = object
traceStack: CellSeq

View File

@@ -21,8 +21,7 @@ const
colBlack = 0b000
colGray = 0b001
colWhite = 0b010
colPurple = 0b011
isCycleCandidate = 0b100 # cell is marked as a cycle candidate
maybeCycle = 0b100 # possibly part of a cycle; this has to be a "sticky" bit
jumpStackFlag = 0b1000
colorMask = 0b011
@@ -39,11 +38,29 @@ template setColor(c, col) =
else:
c.rc = c.rc and not colorMask or col
proc nimIncRefCyclic(p: pointer) {.compilerRtl, inl.} =
const
optimizedOrc = not defined(nimOldOrc)
proc nimIncRefCyclic(p: pointer; cyclic: bool) {.compilerRtl, inl.} =
let h = head(p)
inc h.rc, rcIncrement
#h.setColor colPurple # mark as potential cycle!
h.setColor colBlack
when optimizedOrc:
if cyclic:
h.rc = h.rc or maybeCycle
proc nimMarkCyclic(p: pointer) {.compilerRtl, inl.} =
when optimizedOrc:
if p != nil:
let h = head(p)
h.rc = h.rc or maybeCycle
proc unsureAsgnRef(dest: ptr pointer, src: pointer) {.inline.} =
# This is only used by the old RTTI mechanism and we know
# that 'dest[]' is nil and needs no destruction. Which is really handy
# as we cannot destroy the object reliably if it's an object of unknown
# compile-time type.
dest[] = src
if src != nil: nimIncRefCyclic(src, true)
const
useJumpStack = false # for thavlak the jump stack doesn't improve the performance at all
@@ -113,14 +130,15 @@ var
proc unregisterCycle(s: Cell) =
# swap with the last element. O(1)
let idx = s.rootIdx
let idx = s.rootIdx-1
when false:
if idx >= roots.len or idx < 0:
cprintf("[Bug!] %ld\n", idx)
quit 1
roots.d[idx] = roots.d[roots.len-1]
roots.d[idx][0].rootIdx = idx
roots.d[idx][0].rootIdx = idx+1
dec roots.len
s.rootIdx = 0
proc scanBlack(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
#[
@@ -245,7 +263,7 @@ proc collectWhite(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
collectWhite(t)
free(s) # watch out, a bug here!
]#
if s.color == colWhite and (s.rc and isCycleCandidate) == 0:
if s.color == colWhite and s.rootIdx == 0:
orcAssert(j.traceStack.len == 0, "collectWhite: trace stack not empty")
s.setColor(colBlack)
@@ -253,7 +271,7 @@ proc collectWhite(s: Cell; desc: PNimTypeV2; j: var GcEnv) =
trace(s, desc, j)
while j.traceStack.len > 0:
let (t, desc) = j.traceStack.pop()
if t.color == colWhite and (t.rc and isCycleCandidate) == 0:
if t.color == colWhite and t.rootIdx == 0:
j.toFree.add(t, desc)
t.setColor(colBlack)
trace(t, desc, j)
@@ -284,7 +302,7 @@ proc collectCyclesBacon(j: var GcEnv) =
init j.toFree
for i in 0 ..< roots.len:
let s = roots.d[i][0]
s.rc = s.rc and not isCycleCandidate
s.rootIdx = 0
collectWhite(s, roots.d[i][1], j)
for i in 0 ..< j.toFree.len:
@@ -341,13 +359,14 @@ proc collectCycles() =
getOccupiedMem())
proc registerCycle(s: Cell; desc: PNimTypeV2) =
s.rootIdx = roots.len
s.rootIdx = roots.len+1
if roots.d == nil: init(roots)
add(roots, s, desc)
if roots.len >= rootsThreshold:
collectCycles()
#writeCell("[added root]", s)
when logOrc:
writeCell("[added root]", s, desc)
proc GC_runOrc* =
## Forces a cycle collection pass.
@@ -379,17 +398,19 @@ proc GC_disableMarkAndSweep*() =
## For ``--gc:orc`` an alias for ``GC_disableOrc``.
GC_disableOrc()
when optimizedOrc:
template markedAsCyclic(s: Cell): bool = (s.rc and maybeCycle) != 0
else:
template markedAsCyclic(s: Cell): bool = true
proc rememberCycle(isDestroyAction: bool; s: Cell; desc: PNimTypeV2) {.noinline.} =
if isDestroyAction:
if (s.rc and isCycleCandidate) != 0:
s.rc = s.rc and not isCycleCandidate
if s.rootIdx > 0:
unregisterCycle(s)
else:
# do not call 'rememberCycle' again unless this cell
# got an 'incRef' event:
#s.setColor colGreen # XXX This is wrong!
if (s.rc and isCycleCandidate) == 0:
s.rc = s.rc or isCycleCandidate
if s.rootIdx == 0 and markedAsCyclic(s):
s.setColor colBlack
registerCycle(s, desc)

View File

@@ -1,5 +1,5 @@
discard """
outputsub: "(allocCount: 4016, deallocCount: 4014)"
outputsub: "(allocCount: 4014, deallocCount: 4012)"
cmd: "nim c --gc:orc -d:nimAllocStats $file"
"""