Reworked gc support for coroutines. Nim now bootstraps with -d:nimCoroutines

Added gc test to coro.nim
Lots of misc improvements and comments in coro.nim
This commit is contained in:
Rokas Kupstys
2017-02-13 19:42:47 +02:00
parent ce4e9846f0
commit c3d1b732d6
3 changed files with 231 additions and 164 deletions

View File

@@ -26,9 +26,9 @@ include system/timers
const defaultStackSize = 512 * 1024
proc GC_addStack(starts: pointer) {.cdecl, importc.}
proc GC_removeStack(starts: pointer) {.cdecl, importc.}
proc GC_setCurrentStack(starts, pos: pointer) {.cdecl, importc.}
proc GC_addStack(bottom: pointer) {.cdecl, importc.}
proc GC_removeStack(bottom: pointer) {.cdecl, importc.}
proc GC_setActiveStack(bottom: pointer) {.cdecl, importc.}
const
CORO_BACKEND_UCONTEXT = 0
@@ -50,11 +50,6 @@ when coroBackend == CORO_BACKEND_FIBERS:
import windows.winlean
type
Context = pointer
Fiber {.final, pure.} = object
parameter: pointer
pad1: pointer
stackStart: pointer
stackEnd: pointer
elif coroBackend == CORO_BACKEND_UCONTEXT:
type
@@ -123,8 +118,8 @@ const
type
Stack = object
start: pointer
ends: pointer
top: pointer # Top of the stack. Pointer used for deallocating stack if we own it.
bottom: pointer # Very bottom of the stack, acts as unique stack identifier.
size: int
Coroutine = ref object
@@ -164,44 +159,59 @@ proc runCurrentTask()
proc switchTo(current, to: Coroutine) =
## Switches execution from `current` into `to` context.
to.lastRun = getTicks()
# Execution will switch to another fiber now.
when coroBackend == CORO_BACKEND_FIBERS:
SwitchToFiber(to.execContext)
elif coroBackend == CORO_BACKEND_UCONTEXT:
discard swapcontext(current.execContext, to.execContext)
elif coroBackend == CORO_BACKEND_SETJMP:
var res = setjmp(current.execContext)
if res == 0:
if to.state == CORO_EXECUTING:
# Coroutine is resumed.
longjmp(to.execContext, 1)
elif to.state == CORO_CREATED:
# Coroutine is started.
coroExecWithStack(runCurrentTask, to.stack.ends)
doAssert false
else:
{.error: "Invalid coroutine backend set.".}
# Execution was just resumed. Set active stack to current one.
GC_setCurrentStack(current.stack.start)
# Update position of current stack so gc invoked from another stack knows how much to scan.
GC_setActiveStack(current.stack.bottom)
var frame = getFrameState()
block:
# Execution will switch to another fiber now. We do not need to update current stack
when coroBackend == CORO_BACKEND_FIBERS:
SwitchToFiber(to.execContext)
elif coroBackend == CORO_BACKEND_UCONTEXT:
discard swapcontext(current.execContext, to.execContext)
elif coroBackend == CORO_BACKEND_SETJMP:
var res = setjmp(current.execContext)
if res == 0:
if to.state == CORO_EXECUTING:
# Coroutine is resumed.
longjmp(to.execContext, 1)
elif to.state == CORO_CREATED:
# Coroutine is started.
coroExecWithStack(runCurrentTask, to.stack.bottom)
doAssert false
else:
{.error: "Invalid coroutine backend set.".}
# Execution was just resumed. Restore frame information and set active stack.
setFrameState(frame)
GC_setActiveStack(current.stack.bottom)
proc suspend*(sleepTime: float=0) =
## Stops coroutine execution and resumes no sooner than after ``sleeptime`` seconds.
## Until then other coroutines are executed.
var current = getCurrent()
current.sleepTime = sleepTime
var frame = getFrameState()
switchTo(current, ctx.loop)
setFrameState(frame)
proc runCurrentTask() =
## Starts execution of current coroutine and updates it's state through coroutine's life.
var current = getCurrent()
# Execution of new fiber just started. Since it was entered not through `switchTo` we
# have to set active stack here as well.
GC_setCurrentStack(current.stack.start)
current.state = CORO_EXECUTING
current.fn() # Start coroutine execution
current.state = CORO_FINISHED
var sp {.volatile.}: pointer
sp = addr(sp)
block:
var current = getCurrent()
current.stack.bottom = sp
# Execution of new fiber just started. Since it was entered not through `switchTo` we
# have to set active stack here as well. GC_removeStack() has to be called in main loop
# because we still need stack available in final suspend(0) call from which we will not
# return.
GC_addStack(sp)
# Activate current stack because we are executing in a new coroutine.
GC_setActiveStack(sp)
current.state = CORO_EXECUTING
try:
current.fn() # Start coroutine execution
except:
echo "Unhandled exception in coroutine."
writeStackTrace()
current.state = CORO_FINISHED
suspend(0) # Exit coroutine without returning from coroExecWithStack()
doAssert false
@@ -215,25 +225,20 @@ proc start*(c: proc(), stacksize: int=defaultStackSize) =
when coroBackend == CORO_BACKEND_FIBERS:
coro.execContext = CreateFiberEx(stacksize, stacksize,
FIBER_FLAG_FLOAT_SWITCH, (proc(p: pointer): void {.stdcall.} = runCurrentTask()), nil)
var fiber = cast[ptr Fiber](coro.execContext)
coro.stack.start = fiber.stackStart
coro.stack.ends = fiber.stackEnd
coro.stack.size = stacksize
else:
var stack: pointer
while stack == nil:
stack = alloc0(stacksize)
coro.stack.start = stack
coro.stack.ends = cast[pointer](cast[ByteAddress](stack) + stacksize)
coro.stack.top = stack
when coroBackend == CORO_BACKEND_UCONTEXT:
discard getcontext(coro.execContext)
coro.execContext.uc_stack.ss_sp = coro.stack.ends
coro.execContext.uc_stack.ss_sp = cast[pointer](cast[ByteAddress](stack) + stacksize)
coro.execContext.uc_stack.ss_size = coro.stack.size
coro.execContext.uc_link = addr ctx.loop.execContext
makecontext(coro.execContext, runCurrentTask, 0)
coro.stack.size = stacksize
coro.state = CORO_CREATED
GC_addStack(coro.stack.ends)
ctx.coroutines.append(coro)
proc run*() =
@@ -248,9 +253,7 @@ proc run*() =
var remaining = current.sleepTime - (float(getTicks() - current.lastRun) / 1_000_000_000)
if remaining <= 0:
# Save main loop context. Suspending coroutine will resume after this statement with
var frame = getFrameState()
switchTo(ctx.loop, current)
setFrameState(frame)
else:
if minDelay > 0 and remaining > 0:
minDelay = min(remaining, minDelay)
@@ -258,19 +261,19 @@ proc run*() =
minDelay = remaining
if current.state == CORO_FINISHED:
GC_removeStack(current.stack.start)
var next = ctx.current.prev
if next == nil:
# If first coroutine ends then `prev` is nil even if more coroutines
# are to be scheduled.
next = ctx.current.next
ctx.coroutines.remove(ctx.current)
GC_removeStack(current.stack.bottom)
when coroBackend == CORO_BACKEND_FIBERS:
DeleteFiber(coro.execContext)
DeleteFiber(current.execContext)
else:
dealloc(current.stack.start)
current.stack.start = nil
current.stack.ends = nil
dealloc(current.stack.top)
current.stack.top = nil
current.stack.bottom = nil
ctx.current = next
elif ctx.current == nil or ctx.current.next == nil:
ctx.current = ctx.coroutines.head
@@ -358,3 +361,25 @@ when isMainModule:
run()
doAssert order == @[0, 0, 1, 2, 1, 3, 4, 2, 3, 4]
doAssert stackCheckValue == 1100220033
type Foo = ref object
number: int
GC_fullCollect()
var occupiedMemory = getOccupiedMem()
i = 0
var objects = newSeq[Foo](100)
proc terstGc(id: int, sleep: float) =
for n in 0..<50:
objects[i] = Foo(number: n)
i += 1
start(proc() = terstIterators(1, 0.01))
start(proc() = terstIterators(2, 0.021))
run()
doAssert occupiedMemory < getOccupiedMem()
objects = nil
GC_fullCollect()
doAssert occupiedMemory >= getOccupiedMem()

View File

@@ -63,18 +63,18 @@ type
cycleTableSize: int # max entries in cycle table
maxPause: int64 # max measured GC pause in nanoseconds
GcStack {.final.} = object
GcStack {.final, pure.} = object
prev: ptr GcStack
next: ptr GcStack
starts: pointer
bottom: pointer
pos: pointer
maxStackSize: int
GcHeap {.final, pure.} = object # this contains the zero count and
# non-zero count table
when defined(nimCoroutines):
stack: ptr GcStack
stackActive: ptr GcStack
stack: GcStack
activeStack: ptr GcStack
else:
stackBottom: pointer
cycleThreshold: int
@@ -120,6 +120,53 @@ template gcAssert(cond: bool, msg: string) =
#echo x[]
quit 1
when defined(nimCoroutines):
iterator items(first: var GcStack): ptr GcStack =
var item = addr(first)
while true:
yield item
item = item.next
if item == addr(first):
break
proc append(first: var GcStack, stack: ptr GcStack) =
## Append stack to the ring of stacks.
first.prev.next = stack
stack.prev = first.prev
first.prev = stack
stack.next = addr(first)
proc append(first: var GcStack): ptr GcStack =
## Allocate new GcStack object, append it to the ring of stacks and return it.
result = cast[ptr GcStack](alloc0(sizeof(GcStack)))
first.append(result)
proc remove(first: var GcStack, stack: ptr GcStack) =
## Remove stack from ring of stacks.
gcAssert(addr(first) != stack, "Main application stack can not be removed")
if addr(first) == stack or stack == nil:
return
stack.prev.next = stack.next
stack.next.prev = stack.prev
dealloc(stack)
proc remove(stack: ptr GcStack) =
gch.stack.remove(stack)
proc find(first: var GcStack, bottom: pointer): ptr GcStack =
## Find stack struct based on bottom pointer. If `bottom` is nil then main
## thread stack is is returned.
if bottom == nil:
return addr(gch.stack)
for stack in first.items():
if stack.bottom == bottom:
return stack
proc len(stack: var GcStack): int =
for _ in stack.items():
result = result + 1
proc addZCT(s: var CellSeq, c: PCell) {.noinline.} =
if (c.refcount and ZctFlag) == 0:
c.refcount = c.refcount or ZctFlag
@@ -823,7 +870,10 @@ proc collectCTBody(gch: var GcHeap) =
let t0 = getticks()
sysAssert(allocInv(gch.region), "collectCT: begin")
when not defined(nimCoroutines):
when defined(nimCoroutines):
for stack in gch.stack.items():
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stack.stackSize())
else:
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
sysAssert(gch.decStack.len == 0, "collectCT")
prepareForInteriorPointerChecking(gch.region)
@@ -849,19 +899,11 @@ proc collectCTBody(gch: var GcHeap) =
if gch.maxPause > 0 and duration > gch.maxPause:
c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration)
when defined(nimCoroutines):
proc currentStackSizes(): int =
for stack in items(gch.stack):
result = result + stackSize(stack.starts, stack.pos)
proc collectCT(gch: var GcHeap) =
# stackMarkCosts prevents some pathological behaviour: Stack marking
# becomes more expensive with large stacks and large stacks mean that
# cells with RC=0 are more likely to be kept alive by the stack.
when defined(nimCoroutines):
let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
else:
let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
if (gch.zct.len >= stackMarkCosts or (cycleGC and
getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
gch.recGcLock == 0:
@@ -946,7 +988,7 @@ when not defined(useNimRtl):
when defined(nimCoroutines):
result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
for stack in items(gch.stack):
result = result & "[GC] stack " & stack.starts.repr & "[GC] max stack size " & $stack.maxStackSize & "\n"
result = result & "[GC] stack " & stack.bottom.repr & "[GC] max stack size " & cast[pointer](stack.maxStackSize).repr & "\n"
else:
result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
GC_enable()

View File

@@ -68,58 +68,25 @@ proc len(stack: ptr GcStack): int =
s = s.next
when defined(nimCoroutines):
proc stackSize(stackBottom: pointer, pos: pointer=nil): int {.noinline.} =
var sp: pointer
if pos == nil:
var stackTop {.volatile.}: pointer
sp = addr(stackTop)
else:
sp = pos
result = abs(cast[int](sp) - cast[int](stackBottom))
proc GC_addStack*(starts: pointer) {.cdecl, exportc.} =
var sp {.volatile.}: pointer
var stack = cast[ptr GcStack](alloc0(sizeof(GcStack)))
stack.starts = starts
stack.pos = addr sp
if gch.stack == nil:
gch.stack = stack
else:
stack.next = gch.stack
gch.stack.prev = stack
gch.stack = stack
# c_fprintf(stdout, "[GC] added stack 0x%016X\n", starts)
proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} =
var stack = gch.stack
while stack != nil:
if stack.starts == starts:
if stack.prev == nil:
if stack.next != nil:
stack.next.prev = nil
gch.stack = stack.next
else:
stack.prev.next = stack.next
if stack.next != nil:
stack.next.prev = stack.prev
dealloc(stack)
# echo "[GC] removed stack ", starts.repr
break
proc stackSize(stack: ptr GcStack): int {.noinline.} =
if stack.pos != nil:
when defined(stackIncreases):
result = cast[ByteAddress](stack.pos) -% cast[ByteAddress](stack.bottom)
else:
stack = stack.next
result = cast[ByteAddress](stack.bottom) -% cast[ByteAddress](stack.pos)
else:
result = 0
proc GC_setCurrentStack*(starts: pointer) {.cdecl, exportc.} =
var pos {.volatile.}: pointer
pos = addr(pos)
var stack = gch.stack
while stack != nil:
if stack.starts == starts:
stack.pos = pos
stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts, pos))
gch.stackActive = stack
return
stack = stack.next
gcAssert(false, "Current stack position does not belong to registered stack")
proc setPosition(stack: ptr GcStack, position: pointer) =
stack.pos = position
stack.maxStackSize = max(stack.maxStackSize, stack.stackSize())
proc setPosition(stack: var GcStack, position: pointer) =
setPosition(addr(stack), position)
proc stackSize(): int {.noinline.} =
for stack in gch.stack.items():
result = result + stack.stackSize()
else:
proc stackSize(): int {.noinline.} =
var stackTop {.volatile.}: pointer
@@ -180,18 +147,50 @@ elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
else:
const stackIncreases = false
{.push stack_trace: off.}
when defined(nimCoroutines):
proc GC_addStack(bottom: pointer) {.cdecl, exportc.} =
# c_fprintf(stdout, "GC_addStack: %p;\n", bottom)
var stack = gch.stack.append()
stack.bottom = bottom
stack.setPosition(bottom)
proc GC_removeStack(bottom: pointer) {.cdecl, exportc.} =
# c_fprintf(stdout, "GC_removeStack: %p;\n", bottom)
gch.stack.find(bottom).remove()
proc GC_setActiveStack(bottom: pointer) {.cdecl, exportc.} =
## Sets active stack and updates current stack position.
# c_fprintf(stdout, "GC_setActiveStack: %p;\n", bottom)
var sp {.volatile.}: pointer
gch.activeStack = gch.stack.find(bottom)
gch.activeStack.setPosition(addr(sp))
when not defined(useNimRtl):
{.push stack_trace: off.}
proc setStackBottom(theStackBottom: pointer) =
#c_fprintf(stdout, "stack bottom: %p;\n", theStackBottom)
# the first init must be the one that defines the stack bottom:
when defined(nimCoroutines):
if gch.stack == nil:
# `setStackBottom()` gets called multiple times from main thread.
# Add it only once.
GC_addStack(theStackBottom)
GC_setCurrentStack(theStackBottom)
else:
when defined(nimCoroutines):
proc setStackBottom(theStackBottom: pointer) =
# Initializes main stack of the thread.
if gch.stack.next == nil:
# Main stack was not initialized yet
gch.stack.next = addr(gch.stack)
gch.stack.prev = addr(gch.stack)
gch.stack.bottom = theStackBottom
gch.stack.maxStackSize = 0
gch.activeStack = addr(gch.stack)
else:
var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
var b = cast[ByteAddress](gch.stack.bottom)
#c_fprintf(stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
when stackIncreases:
gch.stack.bottom = cast[pointer](min(a, b))
else:
gch.stack.bottom = cast[pointer](max(a, b))
gch.stack.setPosition(theStackBottom)
else:
proc setStackBottom(theStackBottom: pointer) =
#c_fprintf(stdout, "stack bottom: %p;\n", theStackBottom)
# the first init must be the one that defines the stack bottom:
if gch.stackBottom == nil: gch.stackBottom = theStackBottom
else:
var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
@@ -201,7 +200,7 @@ when not defined(useNimRtl):
gch.stackBottom = cast[pointer](min(a, b))
else:
gch.stackBottom = cast[pointer](max(a, b))
{.pop.}
{.pop.}
when defined(sparc): # For SPARC architecture.
when defined(nimCoroutines):
@@ -272,12 +271,10 @@ else:
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
for stack in items(gch.stack):
var b = cast[ByteAddress](stack.starts)
var a = cast[ByteAddress](stack.starts) - stack.maxStackSize
var x = cast[ByteAddress](p)
if a <=% x and x <=% b:
return true
var b = cast[ByteAddress](gch.activeStack.bottom)
var a = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
template forEachStackSlot(gch, gcMark: untyped) {.dirty.} =
# We use a jmp_buf buffer that is in the C stack.
@@ -285,35 +282,38 @@ else:
# that 'setjmp' will save registers in the C stack.
type PStackSlice = ptr array[0..7, pointer]
var registers {.noinit.}: C_JmpBuf
discard c_setjmp(registers)
gch.stackActive.pos = addr(registers)
for stack in items(gch.stack):
stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts))
var max = cast[ByteAddress](stack.starts)
var sp = cast[ByteAddress](stack.pos)
when defined(amd64):
if stack == gch.stackActive:
# words within the jmp_buf structure may not be properly aligned.
let regEnd = sp +% sizeof(registers)
while sp <% regEnd:
gcMark(gch, cast[PPointer](sp)[])
gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
sp = sp +% sizeof(pointer)
# loop unrolled:
while sp <% max - 8*sizeof(pointer):
gcMark(gch, cast[PStackSlice](sp)[0])
gcMark(gch, cast[PStackSlice](sp)[1])
gcMark(gch, cast[PStackSlice](sp)[2])
gcMark(gch, cast[PStackSlice](sp)[3])
gcMark(gch, cast[PStackSlice](sp)[4])
gcMark(gch, cast[PStackSlice](sp)[5])
gcMark(gch, cast[PStackSlice](sp)[6])
gcMark(gch, cast[PStackSlice](sp)[7])
sp = sp +% sizeof(pointer)*8
# last few entries:
while sp <=% max:
gcMark(gch, cast[PPointer](sp)[])
sp = sp +% sizeof(pointer)
# Update position of stack gc is executing in.
gch.activeStack.setPosition(addr(registers))
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
for stack in gch.stack.items():
var max = cast[ByteAddress](stack.bottom)
var sp = cast[ByteAddress](addr(registers))
when defined(amd64):
if stack == gch.activeStack:
# words within the jmp_buf structure may not be properly aligned.
let regEnd = sp +% sizeof(registers)
while sp <% regEnd:
gcMark(gch, cast[PPointer](sp)[])
gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
sp = sp +% sizeof(pointer)
# Make sure sp is word-aligned
sp = sp and not (sizeof(pointer) - 1)
# loop unrolled:
while sp <% max - 8*sizeof(pointer):
gcMark(gch, cast[PStackSlice](sp)[0])
gcMark(gch, cast[PStackSlice](sp)[1])
gcMark(gch, cast[PStackSlice](sp)[2])
gcMark(gch, cast[PStackSlice](sp)[3])
gcMark(gch, cast[PStackSlice](sp)[4])
gcMark(gch, cast[PStackSlice](sp)[5])
gcMark(gch, cast[PStackSlice](sp)[6])
gcMark(gch, cast[PStackSlice](sp)[7])
sp = sp +% sizeof(pointer)*8
# last few entries:
while sp <=% max:
gcMark(gch, cast[PPointer](sp)[])
sp = sp +% sizeof(pointer)
else:
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer