From c3d1b732d63b0d5e03cf194cf2c7475d344c8bc6 Mon Sep 17 00:00:00 2001 From: Rokas Kupstys Date: Mon, 13 Feb 2017 19:42:47 +0200 Subject: [PATCH] Reworked gc support for coroutines. Nim now bootstraps with -d:nimCoroutines Added gc test to coro.nim Lots of misc improvements and comments in coro.nim --- lib/pure/coro.nim | 129 +++++++++++++++----------- lib/system/gc.nim | 72 ++++++++++++--- lib/system/gc_common.nim | 194 +++++++++++++++++++-------------------- 3 files changed, 231 insertions(+), 164 deletions(-) diff --git a/lib/pure/coro.nim b/lib/pure/coro.nim index c06dcfb049..494d6c6dcd 100644 --- a/lib/pure/coro.nim +++ b/lib/pure/coro.nim @@ -26,9 +26,9 @@ include system/timers const defaultStackSize = 512 * 1024 -proc GC_addStack(starts: pointer) {.cdecl, importc.} -proc GC_removeStack(starts: pointer) {.cdecl, importc.} -proc GC_setCurrentStack(starts, pos: pointer) {.cdecl, importc.} +proc GC_addStack(bottom: pointer) {.cdecl, importc.} +proc GC_removeStack(bottom: pointer) {.cdecl, importc.} +proc GC_setActiveStack(bottom: pointer) {.cdecl, importc.} const CORO_BACKEND_UCONTEXT = 0 @@ -50,11 +50,6 @@ when coroBackend == CORO_BACKEND_FIBERS: import windows.winlean type Context = pointer - Fiber {.final, pure.} = object - parameter: pointer - pad1: pointer - stackStart: pointer - stackEnd: pointer elif coroBackend == CORO_BACKEND_UCONTEXT: type @@ -123,8 +118,8 @@ const type Stack = object - start: pointer - ends: pointer + top: pointer # Top of the stack. Pointer used for deallocating stack if we own it. + bottom: pointer # Very bottom of the stack, acts as unique stack identifier. size: int Coroutine = ref object @@ -164,44 +159,59 @@ proc runCurrentTask() proc switchTo(current, to: Coroutine) = ## Switches execution from `current` into `to` context. to.lastRun = getTicks() - # Execution will switch to another fiber now. - when coroBackend == CORO_BACKEND_FIBERS: - SwitchToFiber(to.execContext) - elif coroBackend == CORO_BACKEND_UCONTEXT: - discard swapcontext(current.execContext, to.execContext) - elif coroBackend == CORO_BACKEND_SETJMP: - var res = setjmp(current.execContext) - if res == 0: - if to.state == CORO_EXECUTING: - # Coroutine is resumed. - longjmp(to.execContext, 1) - elif to.state == CORO_CREATED: - # Coroutine is started. - coroExecWithStack(runCurrentTask, to.stack.ends) - doAssert false - else: - {.error: "Invalid coroutine backend set.".} - # Execution was just resumed. Set active stack to current one. - GC_setCurrentStack(current.stack.start) + # Update position of current stack so gc invoked from another stack knows how much to scan. + GC_setActiveStack(current.stack.bottom) + var frame = getFrameState() + block: + # Execution will switch to another fiber now. We do not need to update current stack + when coroBackend == CORO_BACKEND_FIBERS: + SwitchToFiber(to.execContext) + elif coroBackend == CORO_BACKEND_UCONTEXT: + discard swapcontext(current.execContext, to.execContext) + elif coroBackend == CORO_BACKEND_SETJMP: + var res = setjmp(current.execContext) + if res == 0: + if to.state == CORO_EXECUTING: + # Coroutine is resumed. + longjmp(to.execContext, 1) + elif to.state == CORO_CREATED: + # Coroutine is started. + coroExecWithStack(runCurrentTask, to.stack.bottom) + doAssert false + else: + {.error: "Invalid coroutine backend set.".} + # Execution was just resumed. Restore frame information and set active stack. + setFrameState(frame) + GC_setActiveStack(current.stack.bottom) proc suspend*(sleepTime: float=0) = ## Stops coroutine execution and resumes no sooner than after ``sleeptime`` seconds. ## Until then other coroutines are executed. var current = getCurrent() current.sleepTime = sleepTime - var frame = getFrameState() switchTo(current, ctx.loop) - setFrameState(frame) proc runCurrentTask() = ## Starts execution of current coroutine and updates it's state through coroutine's life. - var current = getCurrent() - # Execution of new fiber just started. Since it was entered not through `switchTo` we - # have to set active stack here as well. - GC_setCurrentStack(current.stack.start) - current.state = CORO_EXECUTING - current.fn() # Start coroutine execution - current.state = CORO_FINISHED + var sp {.volatile.}: pointer + sp = addr(sp) + block: + var current = getCurrent() + current.stack.bottom = sp + # Execution of new fiber just started. Since it was entered not through `switchTo` we + # have to set active stack here as well. GC_removeStack() has to be called in main loop + # because we still need stack available in final suspend(0) call from which we will not + # return. + GC_addStack(sp) + # Activate current stack because we are executing in a new coroutine. + GC_setActiveStack(sp) + current.state = CORO_EXECUTING + try: + current.fn() # Start coroutine execution + except: + echo "Unhandled exception in coroutine." + writeStackTrace() + current.state = CORO_FINISHED suspend(0) # Exit coroutine without returning from coroExecWithStack() doAssert false @@ -215,25 +225,20 @@ proc start*(c: proc(), stacksize: int=defaultStackSize) = when coroBackend == CORO_BACKEND_FIBERS: coro.execContext = CreateFiberEx(stacksize, stacksize, FIBER_FLAG_FLOAT_SWITCH, (proc(p: pointer): void {.stdcall.} = runCurrentTask()), nil) - var fiber = cast[ptr Fiber](coro.execContext) - coro.stack.start = fiber.stackStart - coro.stack.ends = fiber.stackEnd coro.stack.size = stacksize else: var stack: pointer while stack == nil: stack = alloc0(stacksize) - coro.stack.start = stack - coro.stack.ends = cast[pointer](cast[ByteAddress](stack) + stacksize) + coro.stack.top = stack when coroBackend == CORO_BACKEND_UCONTEXT: discard getcontext(coro.execContext) - coro.execContext.uc_stack.ss_sp = coro.stack.ends + coro.execContext.uc_stack.ss_sp = cast[pointer](cast[ByteAddress](stack) + stacksize) coro.execContext.uc_stack.ss_size = coro.stack.size coro.execContext.uc_link = addr ctx.loop.execContext makecontext(coro.execContext, runCurrentTask, 0) coro.stack.size = stacksize coro.state = CORO_CREATED - GC_addStack(coro.stack.ends) ctx.coroutines.append(coro) proc run*() = @@ -248,9 +253,7 @@ proc run*() = var remaining = current.sleepTime - (float(getTicks() - current.lastRun) / 1_000_000_000) if remaining <= 0: # Save main loop context. Suspending coroutine will resume after this statement with - var frame = getFrameState() switchTo(ctx.loop, current) - setFrameState(frame) else: if minDelay > 0 and remaining > 0: minDelay = min(remaining, minDelay) @@ -258,19 +261,19 @@ proc run*() = minDelay = remaining if current.state == CORO_FINISHED: - GC_removeStack(current.stack.start) var next = ctx.current.prev if next == nil: # If first coroutine ends then `prev` is nil even if more coroutines # are to be scheduled. next = ctx.current.next ctx.coroutines.remove(ctx.current) + GC_removeStack(current.stack.bottom) when coroBackend == CORO_BACKEND_FIBERS: - DeleteFiber(coro.execContext) + DeleteFiber(current.execContext) else: - dealloc(current.stack.start) - current.stack.start = nil - current.stack.ends = nil + dealloc(current.stack.top) + current.stack.top = nil + current.stack.bottom = nil ctx.current = next elif ctx.current == nil or ctx.current.next == nil: ctx.current = ctx.coroutines.head @@ -358,3 +361,25 @@ when isMainModule: run() doAssert order == @[0, 0, 1, 2, 1, 3, 4, 2, 3, 4] doAssert stackCheckValue == 1100220033 + + type Foo = ref object + number: int + + GC_fullCollect() + var occupiedMemory = getOccupiedMem() + + i = 0 + var objects = newSeq[Foo](100) + proc terstGc(id: int, sleep: float) = + for n in 0..<50: + objects[i] = Foo(number: n) + i += 1 + + start(proc() = terstIterators(1, 0.01)) + start(proc() = terstIterators(2, 0.021)) + run() + + doAssert occupiedMemory < getOccupiedMem() + objects = nil + GC_fullCollect() + doAssert occupiedMemory >= getOccupiedMem() diff --git a/lib/system/gc.nim b/lib/system/gc.nim index eaf68c0c4e..e989ec111f 100644 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -63,18 +63,18 @@ type cycleTableSize: int # max entries in cycle table maxPause: int64 # max measured GC pause in nanoseconds - GcStack {.final.} = object + GcStack {.final, pure.} = object prev: ptr GcStack next: ptr GcStack - starts: pointer + bottom: pointer pos: pointer maxStackSize: int GcHeap {.final, pure.} = object # this contains the zero count and # non-zero count table when defined(nimCoroutines): - stack: ptr GcStack - stackActive: ptr GcStack + stack: GcStack + activeStack: ptr GcStack else: stackBottom: pointer cycleThreshold: int @@ -120,6 +120,53 @@ template gcAssert(cond: bool, msg: string) = #echo x[] quit 1 +when defined(nimCoroutines): + iterator items(first: var GcStack): ptr GcStack = + var item = addr(first) + while true: + yield item + item = item.next + if item == addr(first): + break + + proc append(first: var GcStack, stack: ptr GcStack) = + ## Append stack to the ring of stacks. + first.prev.next = stack + stack.prev = first.prev + first.prev = stack + stack.next = addr(first) + + proc append(first: var GcStack): ptr GcStack = + ## Allocate new GcStack object, append it to the ring of stacks and return it. + result = cast[ptr GcStack](alloc0(sizeof(GcStack))) + first.append(result) + + proc remove(first: var GcStack, stack: ptr GcStack) = + ## Remove stack from ring of stacks. + gcAssert(addr(first) != stack, "Main application stack can not be removed") + if addr(first) == stack or stack == nil: + return + stack.prev.next = stack.next + stack.next.prev = stack.prev + dealloc(stack) + + proc remove(stack: ptr GcStack) = + gch.stack.remove(stack) + + proc find(first: var GcStack, bottom: pointer): ptr GcStack = + ## Find stack struct based on bottom pointer. If `bottom` is nil then main + ## thread stack is is returned. + if bottom == nil: + return addr(gch.stack) + + for stack in first.items(): + if stack.bottom == bottom: + return stack + + proc len(stack: var GcStack): int = + for _ in stack.items(): + result = result + 1 + proc addZCT(s: var CellSeq, c: PCell) {.noinline.} = if (c.refcount and ZctFlag) == 0: c.refcount = c.refcount or ZctFlag @@ -823,7 +870,10 @@ proc collectCTBody(gch: var GcHeap) = let t0 = getticks() sysAssert(allocInv(gch.region), "collectCT: begin") - when not defined(nimCoroutines): + when defined(nimCoroutines): + for stack in gch.stack.items(): + gch.stat.maxStackSize = max(gch.stat.maxStackSize, stack.stackSize()) + else: gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize()) sysAssert(gch.decStack.len == 0, "collectCT") prepareForInteriorPointerChecking(gch.region) @@ -849,19 +899,11 @@ proc collectCTBody(gch: var GcHeap) = if gch.maxPause > 0 and duration > gch.maxPause: c_fprintf(stdout, "[GC] missed deadline: %ld\n", duration) -when defined(nimCoroutines): - proc currentStackSizes(): int = - for stack in items(gch.stack): - result = result + stackSize(stack.starts, stack.pos) - proc collectCT(gch: var GcHeap) = # stackMarkCosts prevents some pathological behaviour: Stack marking # becomes more expensive with large stacks and large stacks mean that # cells with RC=0 are more likely to be kept alive by the stack. - when defined(nimCoroutines): - let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold) - else: - let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold) + let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold) if (gch.zct.len >= stackMarkCosts or (cycleGC and getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and gch.recGcLock == 0: @@ -946,7 +988,7 @@ when not defined(useNimRtl): when defined(nimCoroutines): result = result & "[GC] number of stacks: " & $gch.stack.len & "\n" for stack in items(gch.stack): - result = result & "[GC] stack " & stack.starts.repr & "[GC] max stack size " & $stack.maxStackSize & "\n" + result = result & "[GC] stack " & stack.bottom.repr & "[GC] max stack size " & cast[pointer](stack.maxStackSize).repr & "\n" else: result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n" GC_enable() diff --git a/lib/system/gc_common.nim b/lib/system/gc_common.nim index feea454e7a..c5714eef64 100644 --- a/lib/system/gc_common.nim +++ b/lib/system/gc_common.nim @@ -68,58 +68,25 @@ proc len(stack: ptr GcStack): int = s = s.next when defined(nimCoroutines): - proc stackSize(stackBottom: pointer, pos: pointer=nil): int {.noinline.} = - var sp: pointer - if pos == nil: - var stackTop {.volatile.}: pointer - sp = addr(stackTop) - else: - sp = pos - result = abs(cast[int](sp) - cast[int](stackBottom)) - - proc GC_addStack*(starts: pointer) {.cdecl, exportc.} = - var sp {.volatile.}: pointer - var stack = cast[ptr GcStack](alloc0(sizeof(GcStack))) - stack.starts = starts - stack.pos = addr sp - if gch.stack == nil: - gch.stack = stack - else: - stack.next = gch.stack - gch.stack.prev = stack - gch.stack = stack - # c_fprintf(stdout, "[GC] added stack 0x%016X\n", starts) - - proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} = - var stack = gch.stack - while stack != nil: - if stack.starts == starts: - if stack.prev == nil: - if stack.next != nil: - stack.next.prev = nil - gch.stack = stack.next - else: - stack.prev.next = stack.next - if stack.next != nil: - stack.next.prev = stack.prev - dealloc(stack) - # echo "[GC] removed stack ", starts.repr - break + proc stackSize(stack: ptr GcStack): int {.noinline.} = + if stack.pos != nil: + when defined(stackIncreases): + result = cast[ByteAddress](stack.pos) -% cast[ByteAddress](stack.bottom) else: - stack = stack.next + result = cast[ByteAddress](stack.bottom) -% cast[ByteAddress](stack.pos) + else: + result = 0 - proc GC_setCurrentStack*(starts: pointer) {.cdecl, exportc.} = - var pos {.volatile.}: pointer - pos = addr(pos) - var stack = gch.stack - while stack != nil: - if stack.starts == starts: - stack.pos = pos - stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts, pos)) - gch.stackActive = stack - return - stack = stack.next - gcAssert(false, "Current stack position does not belong to registered stack") + proc setPosition(stack: ptr GcStack, position: pointer) = + stack.pos = position + stack.maxStackSize = max(stack.maxStackSize, stack.stackSize()) + + proc setPosition(stack: var GcStack, position: pointer) = + setPosition(addr(stack), position) + + proc stackSize(): int {.noinline.} = + for stack in gch.stack.items(): + result = result + stack.stackSize() else: proc stackSize(): int {.noinline.} = var stackTop {.volatile.}: pointer @@ -180,18 +147,50 @@ elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or else: const stackIncreases = false +{.push stack_trace: off.} +when defined(nimCoroutines): + proc GC_addStack(bottom: pointer) {.cdecl, exportc.} = + # c_fprintf(stdout, "GC_addStack: %p;\n", bottom) + var stack = gch.stack.append() + stack.bottom = bottom + stack.setPosition(bottom) + + proc GC_removeStack(bottom: pointer) {.cdecl, exportc.} = + # c_fprintf(stdout, "GC_removeStack: %p;\n", bottom) + gch.stack.find(bottom).remove() + + proc GC_setActiveStack(bottom: pointer) {.cdecl, exportc.} = + ## Sets active stack and updates current stack position. + # c_fprintf(stdout, "GC_setActiveStack: %p;\n", bottom) + var sp {.volatile.}: pointer + gch.activeStack = gch.stack.find(bottom) + gch.activeStack.setPosition(addr(sp)) + when not defined(useNimRtl): - {.push stack_trace: off.} - proc setStackBottom(theStackBottom: pointer) = - #c_fprintf(stdout, "stack bottom: %p;\n", theStackBottom) - # the first init must be the one that defines the stack bottom: - when defined(nimCoroutines): - if gch.stack == nil: - # `setStackBottom()` gets called multiple times from main thread. - # Add it only once. - GC_addStack(theStackBottom) - GC_setCurrentStack(theStackBottom) - else: + when defined(nimCoroutines): + proc setStackBottom(theStackBottom: pointer) = + # Initializes main stack of the thread. + if gch.stack.next == nil: + # Main stack was not initialized yet + gch.stack.next = addr(gch.stack) + gch.stack.prev = addr(gch.stack) + gch.stack.bottom = theStackBottom + gch.stack.maxStackSize = 0 + gch.activeStack = addr(gch.stack) + else: + var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2 + var b = cast[ByteAddress](gch.stack.bottom) + #c_fprintf(stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom) + when stackIncreases: + gch.stack.bottom = cast[pointer](min(a, b)) + else: + gch.stack.bottom = cast[pointer](max(a, b)) + gch.stack.setPosition(theStackBottom) + + else: + proc setStackBottom(theStackBottom: pointer) = + #c_fprintf(stdout, "stack bottom: %p;\n", theStackBottom) + # the first init must be the one that defines the stack bottom: if gch.stackBottom == nil: gch.stackBottom = theStackBottom else: var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2 @@ -201,7 +200,7 @@ when not defined(useNimRtl): gch.stackBottom = cast[pointer](min(a, b)) else: gch.stackBottom = cast[pointer](max(a, b)) - {.pop.} +{.pop.} when defined(sparc): # For SPARC architecture. when defined(nimCoroutines): @@ -272,12 +271,10 @@ else: proc isOnStack(p: pointer): bool = var stackTop {.volatile.}: pointer stackTop = addr(stackTop) - for stack in items(gch.stack): - var b = cast[ByteAddress](stack.starts) - var a = cast[ByteAddress](stack.starts) - stack.maxStackSize - var x = cast[ByteAddress](p) - if a <=% x and x <=% b: - return true + var b = cast[ByteAddress](gch.activeStack.bottom) + var a = cast[ByteAddress](stackTop) + var x = cast[ByteAddress](p) + result = a <=% x and x <=% b template forEachStackSlot(gch, gcMark: untyped) {.dirty.} = # We use a jmp_buf buffer that is in the C stack. @@ -285,35 +282,38 @@ else: # that 'setjmp' will save registers in the C stack. type PStackSlice = ptr array[0..7, pointer] var registers {.noinit.}: C_JmpBuf - discard c_setjmp(registers) - gch.stackActive.pos = addr(registers) - for stack in items(gch.stack): - stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts)) - var max = cast[ByteAddress](stack.starts) - var sp = cast[ByteAddress](stack.pos) - when defined(amd64): - if stack == gch.stackActive: - # words within the jmp_buf structure may not be properly aligned. - let regEnd = sp +% sizeof(registers) - while sp <% regEnd: - gcMark(gch, cast[PPointer](sp)[]) - gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[]) - sp = sp +% sizeof(pointer) - # loop unrolled: - while sp <% max - 8*sizeof(pointer): - gcMark(gch, cast[PStackSlice](sp)[0]) - gcMark(gch, cast[PStackSlice](sp)[1]) - gcMark(gch, cast[PStackSlice](sp)[2]) - gcMark(gch, cast[PStackSlice](sp)[3]) - gcMark(gch, cast[PStackSlice](sp)[4]) - gcMark(gch, cast[PStackSlice](sp)[5]) - gcMark(gch, cast[PStackSlice](sp)[6]) - gcMark(gch, cast[PStackSlice](sp)[7]) - sp = sp +% sizeof(pointer)*8 - # last few entries: - while sp <=% max: - gcMark(gch, cast[PPointer](sp)[]) - sp = sp +% sizeof(pointer) + # Update position of stack gc is executing in. + gch.activeStack.setPosition(addr(registers)) + if c_setjmp(registers) == 0'i32: # To fill the C stack with registers. + for stack in gch.stack.items(): + var max = cast[ByteAddress](stack.bottom) + var sp = cast[ByteAddress](addr(registers)) + when defined(amd64): + if stack == gch.activeStack: + # words within the jmp_buf structure may not be properly aligned. + let regEnd = sp +% sizeof(registers) + while sp <% regEnd: + gcMark(gch, cast[PPointer](sp)[]) + gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[]) + sp = sp +% sizeof(pointer) + # Make sure sp is word-aligned + sp = sp and not (sizeof(pointer) - 1) + # loop unrolled: + while sp <% max - 8*sizeof(pointer): + gcMark(gch, cast[PStackSlice](sp)[0]) + gcMark(gch, cast[PStackSlice](sp)[1]) + gcMark(gch, cast[PStackSlice](sp)[2]) + gcMark(gch, cast[PStackSlice](sp)[3]) + gcMark(gch, cast[PStackSlice](sp)[4]) + gcMark(gch, cast[PStackSlice](sp)[5]) + gcMark(gch, cast[PStackSlice](sp)[6]) + gcMark(gch, cast[PStackSlice](sp)[7]) + sp = sp +% sizeof(pointer)*8 + # last few entries: + while sp <=% max: + gcMark(gch, cast[PPointer](sp)[]) + sp = sp +% sizeof(pointer) + else: proc isOnStack(p: pointer): bool = var stackTop {.volatile.}: pointer