Coroutine support for i386/amd64 platforms unix/windows OSes markAndSweep/refCounting GCs.

This commit is contained in:
rku
2015-07-31 17:18:16 +03:00
parent df0e1a515b
commit 43bfda057b
13 changed files with 843 additions and 320 deletions

View File

@@ -48,6 +48,7 @@ path="$lib/windows"
path="$lib/posix"
path="$lib/js"
path="$lib/pure/unidecode"
path="$lib/arch"
@if nimbabel:
babelpath="$home/.babel/pkgs/"

59
lib/arch/arch.nim Normal file
View File

@@ -0,0 +1,59 @@
#
#
# Nim's Runtime Library
# (c) Copyright 2015 Rokas Kupstys
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
when defined(windows):
const
ABI* = "ms"
elif defined(unix):
const
ABI* = "unix"
else:
{.error: "Unsupported ABI".}
when defined(amd64):
when defined(unix):
# unix (sysv) ABI
type
JmpBufReg* {.pure.} = enum
BX, BP, R12, R13, R14, R15, SP, IP, TOTAL
elif defined(windows):
# ms ABI
type
JmpBufReg* {.pure.} = enum
BX, BP, R12, R13, R14, R15, SP, IP, SI, DI, TOTAL
type
Reg* {.pure.} = enum
AX, BX, CX, DX, SI, DI, BP, SP, IP, R8, R9, R10, R11, R12, R13, R14, R15, TOTAL
elif defined(i386):
# identical fastcall calling convention on all x86 OS
type
JmpBufReg* {.pure.} = enum
BX, SI, DI, BP, SP, IP, TOTAL
Reg* {.pure.} = enum
AX, BX, CX, BP, SP, DI, SI, TOTAL
else:
{.error: "Unsupported architecture".}
{.compile: "./" & ABI & "_" & hostCPU & ".asm"}
type
JmpBuf* = array[JmpBufReg.TOTAL, pointer]
Registers* = array[Reg.TOTAL, pointer]
proc getRegisters*(ctx: var Registers) {.importc: "narch_$1", fastcall.}
proc setjmp*(ctx: var JmpBuf): int {.importc: "narch_$1", fastcall.}
proc longjmp*(ctx: JmpBuf, ret=1) {.importc: "narch_$1", fastcall.}
proc coroSwitchStack*(sp: pointer) {.importc: "narch_$1", fastcall.}
proc coroRestoreStack*() {.importc: "narch_$1", fastcall.}

79
lib/arch/i386.asm Normal file
View File

@@ -0,0 +1,79 @@
;
;
; Nim's Runtime Library
; (c) Copyright 2015 Rokas Kupstys
;
; See the file "copying.txt", included in this
; distribution, for details about the copyright.
;
section ".text" executable
public narch_getRegisters
public @narch_getRegisters@4
public narch_setjmp
public @narch_setjmp@4
public narch_longjmp
public @narch_longjmp@8
public narch_coroSwitchStack
public @narch_coroSwitchStack@4
public narch_coroRestoreStack
public @narch_coroRestoreStack@0
@narch_getRegisters@4:
narch_getRegisters:
mov [ecx], eax
mov [ecx+4], ebx
mov [ecx+8], ecx
mov [ecx+0Ch], ebp
mov [ecx+10h], esp
mov [ecx+14h], edi
mov [ecx+18h], esi
ret
@narch_setjmp@4:
narch_setjmp:
; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
mov [ecx], ebx
mov [ecx+4], esi
mov [ecx+8], edi
mov [ecx+0Ch], ebp
lea eax, [esp+4]
mov [ecx+10h], eax
mov eax, [esp]
mov [ecx+14h], eax
xor eax, eax
ret
@narch_longjmp@8:
narch_longjmp:
; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
mov eax, edx
test eax, eax
jnz @F
inc eax
@@:
mov ebx, [ecx]
mov esi, [ecx+4]
mov edi, [ecx+8]
mov ebp, [ecx+0Ch]
mov esp, [ecx+10h]
mov edx, [ecx+14h]
jmp edx
@narch_coroSwitchStack@4:
narch_coroSwitchStack:
pop eax ; return address
mov edx, esp ; old esp for saving
mov esp, ecx ; swap stack with one passed to func
push edx ; store old stack pointer on newly switched stack
jmp eax ; return
@narch_coroRestoreStack@0:
narch_coroRestoreStack:
pop eax ; return address
pop esp ; resture old stack pointer
jmp eax ; return

90
lib/arch/ms_amd64.asm Normal file
View File

@@ -0,0 +1,90 @@
;
;
; Nim's Runtime Library
; (c) Copyright 2015 Rokas Kupstys
;
; See the file "copying.txt", included in this
; distribution, for details about the copyright.
;
format MS64 COFF
section ".text" executable align 16
public narch_getRegisters
public narch_setjmp
public narch_longjmp
public narch_coroSwitchStack
public narch_coroRestoreStack
narch_getRegisters:
mov [rcx], rax
mov [rcx+8], rbx
mov [rcx+10h], rcx
mov [rcx+18h], rdx
mov [rcx+20h], rsi
mov [rcx+28h], rdi
mov [rcx+30h], rbp
mov [rcx+38h], rsp
mov rax, [rsp]
mov [rcx+40h], rax ; rip
mov [rcx+48h], r8
mov [rcx+50h], r9
mov [rcx+58h], r10
mov [rcx+60h], r11
mov [rcx+68h], r12
mov [rcx+70h], r13
mov [rcx+78h], r14
mov [rcx+80h], r15
ret
narch_setjmp:
; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
mov [rcx], rbx ; rcx is jmp_buf, move registers onto it
mov [rcx+8], rbp
mov [rcx+10h], r12
mov [rcx+18h], r13
mov [rcx+20h], r14
mov [rcx+28h], r15
lea rdx, [rsp+8] ; this is our rsp WITHOUT current ret addr
mov [rcx+30h], rdx
mov rdx, [rsp] ; save return addr ptr for new rip
mov [rcx+38h], rdx
mov [rcx+40h], rsi
mov [rcx+48h], rdi
xor rax, rax ; always return 0
ret
narch_longjmp:
; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
mov rax, rdx ; val will be longjmp return
test rax, rax
jnz @F
inc rax ; if val==0, val=1 per longjmp semantics
@@:
mov rbx, [rcx] ; rax is the jmp_buf, restore regs from it
mov rbp, [rcx+8]
mov r12, [rcx+10h]
mov r13, [rcx+18h]
mov r14, [rcx+20h]
mov r15, [rcx+28h]
mov rsp, [rcx+30h] ; this ends up being the stack pointer
mov rdx, [rcx+38h] ; this is the instruction pointer
jmp rdx ; goto saved address without altering rsp
narch_coroSwitchStack:
pop rax ; return address
mov rdx, rsp ; old rsp for saving
mov rsp, rcx ; swap stack with one passed to func
push rdx ; store old stack pointer on newly switched stack
sub rsp, 28h ; stack alignment + shadow space
jmp rax ; return
narch_coroRestoreStack:
pop rax ; return address
add rsp, 28h ; stack alignment + shadow space
pop rsp ; resture old stack pointer
jmp rax ; return

12
lib/arch/ms_i386.asm Normal file
View File

@@ -0,0 +1,12 @@
;
;
; Nim's Runtime Library
; (c) Copyright 2015 Rokas Kupstys
;
; See the file "copying.txt", included in this
; distribution, for details about the copyright.
;
format MS COFF
include 'i386.asm'

89
lib/arch/unix_amd64.asm Normal file
View File

@@ -0,0 +1,89 @@
;
;
; Nim's Runtime Library
; (c) Copyright 2015 Rokas Kupstys
;
; See the file "copying.txt", included in this
; distribution, for details about the copyright.
;
format ELF64
section ".text" executable align 16
public narch_getRegisters
public narch_setjmp
public narch_longjmp
public narch_coroSwitchStack
public narch_coroRestoreStack
narch_getRegisters:
mov [rdi], rax
mov [rdi+8], rbx
mov [rdi+10h], rcx
mov [rdi+18h], rdx
mov [rdi+20h], rsi
mov [rdi+28h], rdi
mov [rdi+30h], rbp
mov [rdi+38h], rsp
mov rax, [rsp]
mov [rdi+40h], rax ; rip
mov [rdi+48h], r8
mov [rdi+50h], r9
mov [rdi+58h], r10
mov [rdi+60h], r11
mov [rdi+68h], r12
mov [rdi+70h], r13
mov [rdi+78h], r14
mov [rdi+80h], r15
ret
narch_setjmp:
; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
mov [rdi], rbx ; rdi is jmp_buf, move registers onto it
mov [rdi+8], rbp
mov [rdi+10h], r12
mov [rdi+18h], r13
mov [rdi+20h], r14
mov [rdi+28h], r15
lea rdx, [rsp+8] ; this is our rsp WITHOUT current ret addr
mov [rdi+30h], rdx
mov rdx, [rsp] ; save return addr ptr for new rip
mov [rdi+38h], rdx
xor rax, rax ; always return 0
ret
narch_longjmp:
; Based on code from musl libc Copyright © 2005-2014 Rich Felker, et al.
mov rax, rsi ; val will be longjmp return
test rax, rax
jnz @F
inc rax ; if val==0, val=1 per longjmp semantics
@@:
mov rbx, [rdi] ; rdi is the jmp_buf, restore regs from it
mov rbp, [rdi+8]
mov r12, [rdi+10h]
mov r13, [rdi+18h]
mov r14, [rdi+20h]
mov r15, [rdi+28h]
mov rsp, [rdi+30h] ; this ends up being the stack pointer
mov rdx, [rdi+38h] ; this is the instruction pointer
jmp rdx ; goto saved address without altering rsp
narch_coroSwitchStack:
pop rsi ; return address
mov rdx, rsp ; old rsp for saving
mov rsp, rdi ; swap stack with one passed to func
push rdx ; store old stack pointer on newly switched stack
sub rsp, 8h ; stack alignment
jmp rsi ; return
narch_coroRestoreStack:
pop rsi ; return address
add rsp, 8h ; stack alignment
pop rsp ; resture old stack pointer
jmp rsi ; return

12
lib/arch/unix_i386.asm Normal file
View File

@@ -0,0 +1,12 @@
;
;
; Nim's Runtime Library
; (c) Copyright 2015 Rokas Kupstys
;
; See the file "copying.txt", included in this
; distribution, for details about the copyright.
;
format ELF
include 'i386.asm'

View File

@@ -110,18 +110,31 @@ __clang__
# endif
# define N_LIB_IMPORT extern __declspec(dllimport)
#else
# define N_CDECL(rettype, name) rettype name
# define N_STDCALL(rettype, name) rettype name
# define N_SYSCALL(rettype, name) rettype name
# define N_FASTCALL(rettype, name) rettype name
# define N_SAFECALL(rettype, name) rettype name
/* function pointers with calling convention: */
# define N_CDECL_PTR(rettype, name) rettype (*name)
# define N_STDCALL_PTR(rettype, name) rettype (*name)
# define N_SYSCALL_PTR(rettype, name) rettype (*name)
# define N_FASTCALL_PTR(rettype, name) rettype (*name)
# define N_SAFECALL_PTR(rettype, name) rettype (*name)
# if defined(__GNUC__)
# define N_CDECL(rettype, name) rettype name
# define N_STDCALL(rettype, name) rettype name
# define N_SYSCALL(rettype, name) rettype name
# define N_FASTCALL(rettype, name) __attribute__((fastcall)) rettype name
# define N_SAFECALL(rettype, name) rettype name
/* function pointers with calling convention: */
# define N_CDECL_PTR(rettype, name) rettype (*name)
# define N_STDCALL_PTR(rettype, name) rettype (*name)
# define N_SYSCALL_PTR(rettype, name) rettype (*name)
# define N_FASTCALL_PTR(rettype, name) __attribute__((fastcall)) rettype (*name)
# define N_SAFECALL_PTR(rettype, name) rettype (*name)
# else
# define N_CDECL(rettype, name) rettype name
# define N_STDCALL(rettype, name) rettype name
# define N_SYSCALL(rettype, name) rettype name
# define N_FASTCALL(rettype, name) rettype name
# define N_SAFECALL(rettype, name) rettype name
/* function pointers with calling convention: */
# define N_CDECL_PTR(rettype, name) rettype (*name)
# define N_STDCALL_PTR(rettype, name) rettype (*name)
# define N_SYSCALL_PTR(rettype, name) rettype (*name)
# define N_FASTCALL_PTR(rettype, name) rettype (*name)
# define N_SAFECALL_PTR(rettype, name) rettype (*name)
# endif
# ifdef __cplusplus
# define N_LIB_EXPORT extern "C"
# else

145
lib/pure/coro.nim Normal file
View File

@@ -0,0 +1,145 @@
#
#
# Nim's Runtime Library
# (c) Copyright 2015 Rokas Kupstys
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
when not defined(nimCoroutines):
{.error: "Coroutines require -d:nimCoroutines".}
import os, times
import macros
import arch
import lists
const coroDefaultStackSize = 512 * 1024
type Coroutine = ref object
# prev: ptr Coroutine
# next: ptr Coroutine
ctx: JmpBuf
fn: proc()
started: bool
lastRun: float
sleepTime: float
stack: pointer
stacksize: int
var coroutines = initDoublyLinkedList[Coroutine]()
var current: Coroutine
var mainCtx: JmpBuf
proc GC_addStack(starts: pointer) {.cdecl, importc.}
proc GC_removeStack(starts: pointer) {.cdecl, importc.}
proc GC_setCurrentStack(starts, pos: pointer) {.cdecl, importc.}
proc coroStart*(c: proc(), stacksize: int=coroDefaultStackSize) =
## Adds coroutine to event loop. It does not run immediately.
var coro = Coroutine()
coro.fn = c
while coro.stack == nil:
coro.stack = alloc0(stacksize)
coro.stacksize = stacksize
coroutines.append(coro)
{.push stackTrace: off.}
proc coroYield*(sleepTime: float=0) =
## Stops coroutine execution and resumes no sooner than after ``sleeptime`` seconds.
## Until then other coroutines are executed.
var oldFrame = getFrame()
var sp {.volatile.}: pointer
GC_setCurrentStack(current.stack, cast[pointer](addr sp))
current.sleepTime = sleep_time
current.lastRun = epochTime()
if setjmp(current.ctx) == 0:
longjmp(mainCtx, 1)
setFrame(oldFrame)
{.pop.}
proc coroRun*() =
## Starts main event loop which exits when all coroutines exit. Calling this proc
## starts execution of first coroutine.
var node = coroutines.head
var minDelay: float = 0
var frame: PFrame
while node != nil:
var coro = node.value
current = coro
os.sleep(int(minDelay * 1000))
var remaining = coro.sleepTime - (epochTime() - coro.lastRun);
if remaining <= 0:
remaining = 0
let res = setjmp(mainCtx)
if res == 0:
frame = getFrame()
if coro.started: # coroutine resumes
longjmp(coro.ctx, 1)
else:
coro.started = true # coroutine starts
var stackEnd = cast[pointer](cast[ByteAddress](coro.stack) + coro.stacksize)
GC_addStack(coro.stack)
coroSwitchStack(stackEnd)
coro.fn()
coroRestoreStack()
GC_removeStack(coro.stack)
var next = node.prev
coroutines.remove(node)
dealloc(coro.stack)
node = next
setFrame(frame)
else:
setFrame(frame)
elif remaining > 0:
if minDelay > 0 and remaining > 0:
minDelay = min(remaining, minDelay)
else:
minDelay = remaining
if node == nil or node.next == nil:
node = coroutines.head
else:
node = node.next
proc coroAlive*(c: proc()): bool =
## Returns ``true`` if coroutine has not returned, ``false`` otherwise.
for coro in items(coroutines):
if coro.fn == c:
return true
proc coroWait*(c: proc(), interval=0.01) =
## Returns only after coroutine ``c`` has returned. ``interval`` is time in seconds how often.
while coroAlive(c):
coroYield interval
when isMainModule:
var stackCheckValue = 1100220033
proc c2()
proc c1() =
for i in 0 .. 3:
echo "c1"
coroYield 0.05
echo "c1 exits"
proc c2() =
for i in 0 .. 3:
echo "c2"
coroYield 0.025
coroWait(c1)
echo "c2 exits"
coroStart(c1)
coroStart(c2)
coroRun()
echo "done ", stackCheckValue

View File

@@ -44,10 +44,12 @@ var
# a global variable for the root of all try blocks
currException {.threadvar.}: ref Exception
proc getFrame*(): PFrame {.compilerRtl, inl.} = framePtr
proc popFrame {.compilerRtl, inl.} =
framePtr = framePtr.prev
proc setFrame(s: PFrame) {.compilerRtl, inl.} =
proc setFrame*(s: PFrame) {.compilerRtl, inl.} =
framePtr = s
proc pushSafePoint(s: PSafePoint) {.compilerRtl, inl.} =

View File

@@ -16,6 +16,9 @@
# Special care has been taken to avoid recursion as far as possible to avoid
# stack overflows when traversing deep datastructures. It is well-suited
# for soft real time applications (like games).
import arch
{.push profiler:off.}
const
@@ -64,8 +67,16 @@ type
cycleTableSize: int # max entries in cycle table
maxPause: int64 # max measured GC pause in nanoseconds
GcStack {.final.} = object
prev: ptr GcStack
next: ptr GcStack
starts: pointer
pos: pointer
maxStackSize: int
GcHeap {.final, pure.} = object # this contains the zero count and
# non-zero count table
stack: ptr GcStack
stackBottom: pointer
cycleThreshold: int
when useCellIds:
@@ -154,7 +165,7 @@ template gcTrace(cell, state: expr): stmt {.immediate.} =
# forward declarations:
proc collectCT(gch: var GcHeap) {.benign.}
proc isOnStack*(p: pointer): bool {.noinline, benign.}
proc isOnStack(p: pointer): bool {.noinline, benign.}
proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
proc doOperation(p: pointer, op: WalkOp) {.benign.}
proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
@@ -293,20 +304,6 @@ proc initGC() =
when useMarkForDebug or useBackupGc:
init(gch.marked)
var
localGcInitialized {.rtlThreadVar.}: bool
proc setupForeignThreadGc*() =
## call this if you registered a callback that will be run from a thread not
## under your control. This has a cheap thread-local guard, so the GC for
## this thread will only be initialized once per thread, no matter how often
## it is called.
if not localGcInitialized:
localGcInitialized = true
var stackTop {.volatile.}: pointer
setStackBottom(addr(stackTop))
initGC()
when useMarkForDebug or useBackupGc:
type
GlobalMarkerProc = proc () {.nimcall, benign.}
@@ -816,138 +813,7 @@ proc markThreadStacks(gch: var GcHeap) =
sp = sp +% sizeof(pointer)
it = it.next
# ----------------- stack management --------------------------------------
# inspired from Smart Eiffel
when defined(sparc):
const stackIncreases = false
elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
const stackIncreases = true
else:
const stackIncreases = false
when not defined(useNimRtl):
{.push stack_trace: off.}
proc setStackBottom(theStackBottom: pointer) =
#c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
# the first init must be the one that defines the stack bottom:
if gch.stackBottom == nil: gch.stackBottom = theStackBottom
else:
var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
var b = cast[ByteAddress](gch.stackBottom)
#c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
when stackIncreases:
gch.stackBottom = cast[pointer](min(a, b))
else:
gch.stackBottom = cast[pointer](max(a, b))
{.pop.}
proc stackSize(): int {.noinline.} =
var stackTop {.volatile.}: pointer
result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
when defined(sparc): # For SPARC architecture.
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var b = cast[TAddress](gch.stackBottom)
var a = cast[TAddress](stackTop)
var x = cast[TAddress](p)
result = a <=% x and x <=% b
template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
when defined(sparcv9):
asm """"flushw \n" """
else:
asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """
var
max = gch.stackBottom
sp: PPointer
stackTop: array[0..1, pointer]
sp = addr(stackTop[0])
# Addresses decrease as the stack grows.
while sp <= max:
gcMark(gch, sp[])
sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
elif defined(ELATE):
{.error: "stack marking code is to be written for this architecture".}
elif stackIncreases:
# ---------------------------------------------------------------------------
# Generic code for architectures where addresses increase as the stack grows.
# ---------------------------------------------------------------------------
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var a = cast[TAddress](gch.stackBottom)
var b = cast[TAddress](stackTop)
var x = cast[TAddress](p)
result = a <=% x and x <=% b
var
jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
# a little hack to get the size of a JmpBuf in the generated C code
# in a platform independent way
template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
var registers: C_JmpBuf
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
var max = cast[TAddress](gch.stackBottom)
var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
# sp will traverse the JMP_BUF as well (jmp_buf size is added,
# otherwise sp would be below the registers structure).
while sp >=% max:
gcMark(gch, cast[ppointer](sp)[])
sp = sp -% sizeof(pointer)
else:
# ---------------------------------------------------------------------------
# Generic code for architectures where addresses decrease as the stack grows.
# ---------------------------------------------------------------------------
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var b = cast[ByteAddress](gch.stackBottom)
var a = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
# We use a jmp_buf buffer that is in the C stack.
# Used to traverse the stack and registers assuming
# that 'setjmp' will save registers in the C stack.
type PStackSlice = ptr array [0..7, pointer]
var registers {.noinit.}: C_JmpBuf
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
var max = cast[ByteAddress](gch.stackBottom)
var sp = cast[ByteAddress](addr(registers))
when defined(amd64):
# words within the jmp_buf structure may not be properly aligned.
let regEnd = sp +% sizeof(registers)
while sp <% regEnd:
gcMark(gch, cast[PPointer](sp)[])
gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
sp = sp +% sizeof(pointer)
# Make sure sp is word-aligned
sp = sp and not (sizeof(pointer) - 1)
# loop unrolled:
while sp <% max - 8*sizeof(pointer):
gcMark(gch, cast[PStackSlice](sp)[0])
gcMark(gch, cast[PStackSlice](sp)[1])
gcMark(gch, cast[PStackSlice](sp)[2])
gcMark(gch, cast[PStackSlice](sp)[3])
gcMark(gch, cast[PStackSlice](sp)[4])
gcMark(gch, cast[PStackSlice](sp)[5])
gcMark(gch, cast[PStackSlice](sp)[6])
gcMark(gch, cast[PStackSlice](sp)[7])
sp = sp +% sizeof(pointer)*8
# last few entries:
while sp <=% max:
gcMark(gch, cast[PPointer](sp)[])
sp = sp +% sizeof(pointer)
include gc_common
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
forEachStackSlot(gch, gcMark)
@@ -956,10 +822,6 @@ when useMarkForDebug or useBackupGc:
proc markStackAndRegistersForSweep(gch: var GcHeap) =
forEachStackSlot(gch, stackMarkS)
# ----------------------------------------------------------------------------
# end of non-portable code
# ----------------------------------------------------------------------------
proc collectZCT(gch: var GcHeap): bool =
# Note: Freeing may add child objects to the ZCT! So essentially we do
# deep freeing, which is bad for incremental operation. In order to
@@ -1033,7 +895,8 @@ proc collectCTBody(gch: var GcHeap) =
let t0 = getticks()
sysAssert(allocInv(gch.region), "collectCT: begin")
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
when not defined(nimCoroutines):
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
sysAssert(gch.decStack.len == 0, "collectCT")
prepareForInteriorPointerChecking(gch.region)
markStackAndRegisters(gch)
@@ -1064,11 +927,19 @@ when useMarkForDebug or useBackupGc:
markStackAndRegistersForSweep(gch)
markGlobals(gch)
when defined(nimCoroutines):
proc currentStackSizes(): int =
for stack in items(gch.stack):
result = result + stackSize(stack.starts, stack.pos)
proc collectCT(gch: var GcHeap) =
# stackMarkCosts prevents some pathological behaviour: Stack marking
# becomes more expensive with large stacks and large stacks mean that
# cells with RC=0 are more likely to be kept alive by the stack.
let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
when defined(nimCoroutines):
let stackMarkCosts = max(currentStackSizes() div (16*sizeof(int)), ZctThreshold)
else:
let stackMarkCosts = max(stackSize() div (16*sizeof(int)), ZctThreshold)
if (gch.zct.len >= stackMarkCosts or (cycleGC and
getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
gch.recGcLock == 0:
@@ -1137,8 +1008,13 @@ when not defined(useNimRtl):
"[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
"[GC] zct capacity: " & $gch.zct.cap & "\n" &
"[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
"[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
"[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
when defined(nimCoroutines):
result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
for stack in items(gch.stack):
result = result & "[GC] stack " & stack.starts.repr & "[GC] max stack size " & $stack.maxStackSize & "\n"
else:
result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
GC_enable()
{.pop.}

275
lib/system/gc_common.nim Normal file
View File

@@ -0,0 +1,275 @@
#
#
# Nim's Runtime Library
# (c) Copyright 2015 Rokas Kupstys
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
proc len(stack: ptr GcStack): int =
if stack == nil:
return 0
var s = stack
result = 1
while s.next != nil:
inc(result)
s = s.next
when defined(nimCoroutines):
proc stackSize(stackBottom: pointer, pos: pointer=nil): int {.noinline.} =
var sp: pointer
if pos == nil:
var stackTop {.volatile.}: pointer
sp = addr(stackTop)
else:
sp = pos
result = abs(cast[int](sp) - cast[int](stackBottom))
proc GC_addStack*(starts: pointer) {.cdecl, exportc.} =
var sp {.volatile.}: pointer
var stack = cast[ptr GcStack](alloc0(sizeof(GcStack)))
stack.starts = starts
stack.pos = addr sp
if gch.stack == nil:
gch.stack = stack
else:
stack.next = gch.stack
gch.stack.prev = stack
gch.stack = stack
# c_fprintf(c_stdout, "[GC] added stack 0x%016X\n", starts)
proc GC_removeStack*(starts: pointer) {.cdecl, exportc.} =
var stack = gch.stack
while stack != nil:
if stack.starts == starts:
if stack.prev == nil:
if stack.next != nil:
stack.next.prev = nil
gch.stack = stack.next
else:
stack.prev.next = stack.next
if stack.next != nil:
stack.next.prev = stack.prev
dealloc(stack)
# echo "[GC] removed stack ", starts.repr
break
else:
stack = stack.next
proc GC_setCurrentStack*(starts, pos: pointer) {.cdecl, exportc.} =
var stack = gch.stack
while stack != nil:
if stack.starts == starts:
stack.pos = pos
stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts, pos))
return
stack = stack.next
gcAssert(false, "Current stack position does not belong to registered stack")
else:
proc stackSize(): int {.noinline.} =
var stackTop {.volatile.}: pointer
result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
iterator items(stack: ptr GcStack): ptr GcStack =
var s = stack
while not isNil(s):
yield s
s = s.next
var
localGcInitialized {.rtlThreadVar.}: bool
proc setupForeignThreadGc*() =
## call this if you registered a callback that will be run from a thread not
## under your control. This has a cheap thread-local guard, so the GC for
## this thread will only be initialized once per thread, no matter how often
## it is called.
if not localGcInitialized:
localGcInitialized = true
var stackTop {.volatile.}: pointer
setStackBottom(addr(stackTop))
initGC()
# ----------------- stack management --------------------------------------
# inspired from Smart Eiffel
when defined(sparc):
const stackIncreases = false
elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
const stackIncreases = true
else:
const stackIncreases = false
when not defined(useNimRtl):
{.push stack_trace: off.}
proc setStackBottom(theStackBottom: pointer) =
#c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
# the first init must be the one that defines the stack bottom:
when defined(nimCoroutines):
GC_addStack(theStackBottom)
else:
if gch.stackBottom == nil: gch.stackBottom = theStackBottom
else:
var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
var b = cast[ByteAddress](gch.stackBottom)
#c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
when stackIncreases:
gch.stackBottom = cast[pointer](min(a, b))
else:
gch.stackBottom = cast[pointer](max(a, b))
{.pop.}
when defined(sparc): # For SPARC architecture.
when defined(nimCoroutines):
{.error: "Nim coroutines are not supported on this platform."}
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var b = cast[TAddress](gch.stackBottom)
var a = cast[TAddress](stackTop)
var x = cast[TAddress](p)
result = a <=% x and x <=% b
template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
when defined(sparcv9):
asm """"flushw \n" """
else:
asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """
var
max = gch.stackBottom
sp: PPointer
stackTop: array[0..1, pointer]
sp = addr(stackTop[0])
# Addresses decrease as the stack grows.
while sp <= max:
gcMark(gch, sp[])
sp = cast[PPointer](cast[TAddress](sp) +% sizeof(pointer))
elif defined(ELATE):
{.error: "stack marking code is to be written for this architecture".}
elif stackIncreases:
# ---------------------------------------------------------------------------
# Generic code for architectures where addresses increase as the stack grows.
# ---------------------------------------------------------------------------
when defined(nimCoroutines):
{.error: "Nim coroutines are not supported on this platform."}
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var a = cast[TAddress](gch.stackBottom)
var b = cast[TAddress](stackTop)
var x = cast[TAddress](p)
result = a <=% x and x <=% b
var
jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
# a little hack to get the size of a JmpBuf in the generated C code
# in a platform independent way
template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
var registers: C_JmpBuf
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
var max = cast[TAddress](gch.stackBottom)
var sp = cast[TAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
# sp will traverse the JMP_BUF as well (jmp_buf size is added,
# otherwise sp would be below the registers structure).
while sp >=% max:
gcMark(gch, cast[ppointer](sp)[])
sp = sp -% sizeof(pointer)
else:
# ---------------------------------------------------------------------------
# Generic code for architectures where addresses decrease as the stack grows.
# ---------------------------------------------------------------------------
when defined(nimCoroutines):
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
for stack in items(gch.stack):
var b = cast[ByteAddress](stack.starts)
var a = cast[ByteAddress](stack.starts) - stack.maxStackSize
var x = cast[ByteAddress](p)
if a <=% x and x <=% b:
return true
template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
# We use a jmp_buf buffer that is in the C stack.
# Used to traverse the stack and registers assuming
# that 'setjmp' will save registers in the C stack.
type PStackSlice = ptr array [0..7, pointer]
var registers {.noinit.}: Registers
getRegisters(registers)
for i in registers.low .. registers.high:
gcMark(gch, cast[PPointer](registers[i]))
for stack in items(gch.stack):
stack.maxStackSize = max(stack.maxStackSize, stackSize(stack.starts))
var max = cast[ByteAddress](stack.starts)
var sp = cast[ByteAddress](stack.pos)
# loop unrolled:
while sp <% max - 8*sizeof(pointer):
gcMark(gch, cast[PStackSlice](sp)[0])
gcMark(gch, cast[PStackSlice](sp)[1])
gcMark(gch, cast[PStackSlice](sp)[2])
gcMark(gch, cast[PStackSlice](sp)[3])
gcMark(gch, cast[PStackSlice](sp)[4])
gcMark(gch, cast[PStackSlice](sp)[5])
gcMark(gch, cast[PStackSlice](sp)[6])
gcMark(gch, cast[PStackSlice](sp)[7])
sp = sp +% sizeof(pointer)*8
# last few entries:
while sp <=% max:
gcMark(gch, cast[PPointer](sp)[])
sp = sp +% sizeof(pointer)
else:
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var b = cast[ByteAddress](gch.stackBottom)
var a = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
template forEachStackSlot(gch, gcMark: expr) {.immediate, dirty.} =
# We use a jmp_buf buffer that is in the C stack.
# Used to traverse the stack and registers assuming
# that 'setjmp' will save registers in the C stack.
type PStackSlice = ptr array [0..7, pointer]
var registers {.noinit.}: C_JmpBuf
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
var max = cast[ByteAddress](gch.stackBottom)
var sp = cast[ByteAddress](addr(registers))
when defined(amd64):
# words within the jmp_buf structure may not be properly aligned.
let regEnd = sp +% sizeof(registers)
while sp <% regEnd:
gcMark(gch, cast[PPointer](sp)[])
gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
sp = sp +% sizeof(pointer)
# Make sure sp is word-aligned
sp = sp and not (sizeof(pointer) - 1)
# loop unrolled:
while sp <% max - 8*sizeof(pointer):
gcMark(gch, cast[PStackSlice](sp)[0])
gcMark(gch, cast[PStackSlice](sp)[1])
gcMark(gch, cast[PStackSlice](sp)[2])
gcMark(gch, cast[PStackSlice](sp)[3])
gcMark(gch, cast[PStackSlice](sp)[4])
gcMark(gch, cast[PStackSlice](sp)[5])
gcMark(gch, cast[PStackSlice](sp)[6])
gcMark(gch, cast[PStackSlice](sp)[7])
sp = sp +% sizeof(pointer)*8
# last few entries:
while sp <=% max:
gcMark(gch, cast[PPointer](sp)[])
sp = sp +% sizeof(pointer)
# ----------------------------------------------------------------------------
# end of non-portable code
# ----------------------------------------------------------------------------

View File

@@ -9,6 +9,9 @@
# A simple mark&sweep garbage collector for Nim. Define the
# symbol ``gcUseBitvectors`` to generate a variant of this GC.
import arch
{.push profiler:off.}
const
@@ -44,8 +47,16 @@ type
maxStackSize: int # max stack size
freedObjects: int # max entries in cycle table
GcStack {.final.} = object
prev: ptr GcStack
next: ptr GcStack
starts: pointer
pos: pointer
maxStackSize: int
GcHeap = object # this contains the zero count and
# non-zero count table
stack: ptr GcStack
stackBottom: pointer
cycleThreshold: int
when useCellIds:
@@ -118,7 +129,6 @@ when BitsPerPage mod (sizeof(int)*8) != 0:
# forward declarations:
proc collectCT(gch: var GcHeap) {.benign.}
proc isOnStack*(p: pointer): bool {.noinline, benign.}
proc forAllChildren(cell: PCell, op: WalkOp) {.benign.}
proc doOperation(p: pointer, op: WalkOp) {.benign.}
proc forAllChildrenAux(dest: pointer, mt: PNimType, op: WalkOp) {.benign.}
@@ -169,20 +179,6 @@ proc initGC() =
init(gch.allocated)
init(gch.marked)
var
localGcInitialized {.rtlThreadVar.}: bool
proc setupForeignThreadGc*() =
## call this if you registered a callback that will be run from a thread not
## under your control. This has a cheap thread-local guard, so the GC for
## this thread will only be initialized once per thread, no matter how often
## it is called.
if not localGcInitialized:
localGcInitialized = true
var stackTop {.volatile.}: pointer
setStackBottom(addr(stackTop))
initGC()
proc forAllSlotsAux(dest: pointer, n: ptr TNimNode, op: WalkOp) {.benign.} =
var d = cast[ByteAddress](dest)
case n.kind
@@ -407,145 +403,14 @@ proc gcMark(gch: var GcHeap, p: pointer) {.inline.} =
if objStart != nil:
mark(gch, objStart)
# ----------------- stack management --------------------------------------
# inspired from Smart Eiffel
include gc_common
when defined(sparc):
const stackIncreases = false
elif defined(hppa) or defined(hp9000) or defined(hp9000s300) or
defined(hp9000s700) or defined(hp9000s800) or defined(hp9000s820):
const stackIncreases = true
else:
const stackIncreases = false
when not defined(useNimRtl):
{.push stack_trace: off.}
proc setStackBottom(theStackBottom: pointer) =
#c_fprintf(c_stdout, "stack bottom: %p;\n", theStackBottom)
# the first init must be the one that defines the stack bottom:
if gch.stackBottom == nil: gch.stackBottom = theStackBottom
else:
var a = cast[ByteAddress](theStackBottom) # and not PageMask - PageSize*2
var b = cast[ByteAddress](gch.stackBottom)
#c_fprintf(c_stdout, "old: %p new: %p;\n",gch.stackBottom,theStackBottom)
when stackIncreases:
gch.stackBottom = cast[pointer](min(a, b))
else:
gch.stackBottom = cast[pointer](max(a, b))
{.pop.}
proc stackSize(): int {.noinline.} =
var stackTop {.volatile.}: pointer
result = abs(cast[int](addr(stackTop)) - cast[int](gch.stackBottom))
when defined(sparc): # For SPARC architecture.
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var b = cast[ByteAddress](gch.stackBottom)
var a = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
when defined(sparcv9):
asm """"flushw \n" """
else:
asm """"ta 0x3 ! ST_FLUSH_WINDOWS\n" """
var
max = gch.stackBottom
sp: PPointer
stackTop: array[0..1, pointer]
sp = addr(stackTop[0])
# Addresses decrease as the stack grows.
while sp <= max:
gcMark(gch, sp[])
sp = cast[ppointer](cast[ByteAddress](sp) +% sizeof(pointer))
elif defined(ELATE):
{.error: "stack marking code is to be written for this architecture".}
elif stackIncreases:
# ---------------------------------------------------------------------------
# Generic code for architectures where addresses increase as the stack grows.
# ---------------------------------------------------------------------------
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var a = cast[ByteAddress](gch.stackBottom)
var b = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
var
jmpbufSize {.importc: "sizeof(jmp_buf)", nodecl.}: int
# a little hack to get the size of a JmpBuf in the generated C code
# in a platform independent way
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
var registers: C_JmpBuf
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
var max = cast[ByteAddress](gch.stackBottom)
var sp = cast[ByteAddress](addr(registers)) +% jmpbufSize -% sizeof(pointer)
# sp will traverse the JMP_BUF as well (jmp_buf size is added,
# otherwise sp would be below the registers structure).
while sp >=% max:
gcMark(gch, cast[ppointer](sp)[])
sp = sp -% sizeof(pointer)
else:
# ---------------------------------------------------------------------------
# Generic code for architectures where addresses decrease as the stack grows.
# ---------------------------------------------------------------------------
proc isOnStack(p: pointer): bool =
var stackTop {.volatile.}: pointer
stackTop = addr(stackTop)
var b = cast[ByteAddress](gch.stackBottom)
var a = cast[ByteAddress](stackTop)
var x = cast[ByteAddress](p)
result = a <=% x and x <=% b
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
# We use a jmp_buf buffer that is in the C stack.
# Used to traverse the stack and registers assuming
# that 'setjmp' will save registers in the C stack.
type PStackSlice = ptr array [0..7, pointer]
var registers {.noinit.}: C_JmpBuf
if c_setjmp(registers) == 0'i32: # To fill the C stack with registers.
var max = cast[ByteAddress](gch.stackBottom)
var sp = cast[ByteAddress](addr(registers))
when defined(amd64):
# words within the jmp_buf structure may not be properly aligned.
let regEnd = sp +% sizeof(registers)
while sp <% regEnd:
gcMark(gch, cast[PPointer](sp)[])
gcMark(gch, cast[PPointer](sp +% sizeof(pointer) div 2)[])
sp = sp +% sizeof(pointer)
# Make sure sp is word-aligned
sp = sp and not (sizeof(pointer) - 1)
# loop unrolled:
while sp <% max - 8*sizeof(pointer):
gcMark(gch, cast[PStackSlice](sp)[0])
gcMark(gch, cast[PStackSlice](sp)[1])
gcMark(gch, cast[PStackSlice](sp)[2])
gcMark(gch, cast[PStackSlice](sp)[3])
gcMark(gch, cast[PStackSlice](sp)[4])
gcMark(gch, cast[PStackSlice](sp)[5])
gcMark(gch, cast[PStackSlice](sp)[6])
gcMark(gch, cast[PStackSlice](sp)[7])
sp = sp +% sizeof(pointer)*8
# last few entries:
while sp <=% max:
gcMark(gch, cast[PPointer](sp)[])
sp = sp +% sizeof(pointer)
# ----------------------------------------------------------------------------
# end of non-portable code
# ----------------------------------------------------------------------------
proc markStackAndRegisters(gch: var GcHeap) {.noinline, cdecl.} =
forEachStackSlot(gch, gcMark)
proc collectCTBody(gch: var GcHeap) =
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
when not defined(nimCoroutines):
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
prepareForInteriorPointerChecking(gch.region)
markStackAndRegisters(gch)
markGlobals(gch)
@@ -599,8 +464,13 @@ when not defined(useNimRtl):
"[GC] occupied memory: " & $getOccupiedMem() & "\n" &
"[GC] collections: " & $gch.stat.collections & "\n" &
"[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
"[GC] freed objects: " & $gch.stat.freedObjects & "\n" &
"[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
"[GC] freed objects: " & $gch.stat.freedObjects & "\n"
when defined(nimCoroutines):
result = result & "[GC] number of stacks: " & $gch.stack.len & "\n"
for stack in items(gch.stack):
result = result & "[GC] stack " & stack.starts.repr & "[GC] max stack size " & $stack.maxStackSize & "\n"
else:
result = result & "[GC] max stack size: " & $gch.stat.maxStackSize & "\n"
GC_enable()
{.pop.}