GC with realtime support

This commit is contained in:
Araq
2012-04-21 03:19:43 +02:00
parent 8319e2411d
commit 4aba7421f5
15 changed files with 261 additions and 27 deletions

View File

@@ -103,6 +103,9 @@ proc HandleCmdLine() =
execExternalProgram(ex & ' ' & arguments)
#GC_disableMarkAndSweep()
when defined(GC_setMaxPause):
GC_setMaxPause 2_000
condsyms.InitDefines()
HandleCmdLine()
quit(options.gExitcode)

View File

@@ -22,6 +22,10 @@ The documentation consists of several documents:
- | `Manual <manual.html>`_
| The Nimrod manual is a draft that will evolve into a proper specification.
- | `GC <gc.html>`_
| Additional documentation about Nimrod's GC and how to operate it in a
| realtime setting.
- | `Source code filters <filters.html>`_
| The Nimrod compiler supports source code filters as a simple yet powerful
builtin templating system.

58
doc/gc.txt Normal file
View File

@@ -0,0 +1,58 @@
==========================
Nimrod's Garbage Collector
==========================
Introduction
============
This document describes how the GC works and how to tune it for (soft)
realtime systems.
The basic algorithm is *Deferrent Reference Counting* with cycle detection.
References on the stack are not counted for better performance (and easier C
code generation). The GC **never** scans the whole heap but it may scan the
delta-subgraph of the heap that changed since its last run.
The GC is only triggered in a memory allocation operation. It it not triggered
by some timer or background thread.
Realtime support
================
To enable realtime support, the switch `useRealtimeGC`:idx: needs to be
defined. With this switch the GC supports the following operations:
.. code-block:: nimrod
proc GC_setMaxPause*(MaxPauseInUs: int)
proc GC_step*(us: int, strongAdvice = false)
After calling ``GC_setMaxPause`` any GC run tries to finish within
``MaxPauseInUs`` microseconds. XXX complete documentation
Time measurement
----------------
The GC's way of measing time uses (see ``lib/system/timers.nim`` for the
implementation):
1) ``QueryPerformanceCounter`` and ``QueryPerformanceFrequency`` on Windows.
2) ``mach_absolute_time`` on Mac OS X.
3) ``gettimeofday`` on Posix systems.
As such it supports a resolution of nano seconds internally; however the API
uses microseconds for convenience.
Define the symbol ``reportMissedDeadlines`` to make the GC output whenever it
missed a deadline. The reporting will be enhances and supported by the API in
later versions of the collector.
Tweaking the GC
===============
To be written.

View File

@@ -142,6 +142,9 @@ Define Effect
``useNimRtl`` Compile and link against ``nimrtl.dll``.
``useMalloc`` Makes Nimrod use C's `malloc`:idx: instead of Nimrod's
own memory manager. This only works with ``gc:none``.
``useRealtimeGC`` Enables support of Nimrod's GC for *soft* realtime
systems. See the documentation of the `gc <gc.html>`_
for further information.
================== =========================================================

View File

@@ -15,9 +15,8 @@
# together with Christoper's partial mark-sweep garbage collector.
#
# Special care has been taken to avoid recursion as far as possible to avoid
# stack overflows when traversing deep datastructures. This is comparable to
# an incremental and generational GC. It should be well-suited for soft real
# time applications (like games).
# stack overflows when traversing deep datastructures. It is well-suited
# for soft real time applications (like games).
const
CycleIncrease = 2 # is a multiplicative increase
@@ -25,6 +24,10 @@ const
ZctThreshold = 500 # we collect garbage if the ZCT's size
# reaches this threshold
# this seems to be a good value
withRealTime = defined(useRealtimeGC)
when withRealTime:
include "system/timers"
const
rcIncrement = 0b1000 # so that lowest 3 bits are not touched
@@ -53,6 +56,7 @@ type
maxStackSize: int # max stack size
maxStackCells: int # max stack cells in ``decStack``
cycleTableSize: int # max entries in cycle table
maxPause: int64 # max measured GC pause in nanoseconds
TGcHeap {.final, pure.} = object # this contains the zero count and
# non-zero count table
@@ -63,6 +67,8 @@ type
cycleRoots: TCellSet
tempStack: TCellSeq # temporary stack for recursion elimination
recGcLock: int # prevent recursion via finalizers; no thread lock
when withRealTime:
maxPause: TNanos # max allowed pause in nanoseconds; active if > 0
region: TMemRegion # garbage collected region
stat: TGcStat
@@ -173,8 +179,6 @@ when traceGC:
template gcTrace(cell, state: expr): stmt {.immediate.} =
when traceGC: traceCell(cell, state)
# -----------------------------------------------------------------------------
# forward declarations:
proc collectCT(gch: var TGcHeap)
proc IsOnStack*(p: pointer): bool {.noinline.}
@@ -741,12 +745,18 @@ else:
# end of non-portable code
# ----------------------------------------------------------------------------
proc CollectZCT(gch: var TGcHeap) =
proc CollectZCT(gch: var TGcHeap): bool =
# Note: Freeing may add child objects to the ZCT! So essentially we do
# deep freeing, which is bad for incremental operation. In order to
# avoid a deep stack, we move objects to keep the ZCT small.
# This is performance critical!
const workPackage = 100
var L = addr(gch.zct.len)
when withRealtime:
var steps = workPackage
var t0: TTicks
if gch.maxPause > 0: t0 = getticks()
while L[] > 0:
var c = gch.zct.d[0]
sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
@@ -756,6 +766,7 @@ proc CollectZCT(gch: var TGcHeap) =
c.refcount = c.refcount and not colorMask
gch.zct.d[0] = gch.zct.d[L[] - 1]
dec(L[])
when withRealtime: dec steps
if c.refcount <% rcIncrement:
# It may have a RC > 0, if it is in the hardware stack or
# it has not been removed yet from the ZCT. This is because
@@ -775,6 +786,17 @@ proc CollectZCT(gch: var TGcHeap) =
else:
sysAssert(c.typ != nil, "collectZCT 2")
zeroMem(c, sizeof(TCell))
when withRealtime:
if steps == 0:
steps = workPackage
if gch.maxPause > 0:
let duration = getticks() - t0
# the GC's measuring is not accurate and needs some cleanup actions
# (stack unmarking), so subtract some short amount of time in to
# order to miss deadlines less often:
if duration >= gch.maxPause - 50_000:
return false
result = true
proc unmarkStackAndRegisters(gch: var TGcHeap) =
var d = gch.decStack.d
@@ -788,30 +810,64 @@ proc unmarkStackAndRegisters(gch: var TGcHeap) =
sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
gch.decStack.len = 0
proc collectCT(gch: var TGcHeap) =
if (gch.zct.len >= ZctThreshold or (cycleGC and
getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
gch.recGcLock == 0:
sysAssert(allocInv(gch.region), "collectCT: begin")
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
sysAssert(gch.decStack.len == 0, "collectCT")
prepareForInteriorPointerChecking(gch.region)
markStackAndRegisters(gch)
markThreadStacks(gch)
gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
inc(gch.stat.stackScans)
collectZCT(gch)
proc collectCTBody(gch: var TGcHeap) =
when withRealtime:
let t0 = getticks()
sysAssert(allocInv(gch.region), "collectCT: begin")
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
sysAssert(gch.decStack.len == 0, "collectCT")
prepareForInteriorPointerChecking(gch.region)
markStackAndRegisters(gch)
markThreadStacks(gch)
gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
inc(gch.stat.stackScans)
if collectZCT(gch):
when cycleGC:
if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
collectCycles(gch)
collectZCT(gch)
discard collectZCT(gch)
inc(gch.stat.cycleCollections)
gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
cycleIncrease)
gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
unmarkStackAndRegisters(gch)
sysAssert(allocInv(gch.region), "collectCT: end")
unmarkStackAndRegisters(gch)
sysAssert(allocInv(gch.region), "collectCT: end")
when withRealtime:
let duration = getticks() - t0
gch.stat.maxPause = max(gch.stat.maxPause, duration)
when defined(reportMissedDeadlines):
if gch.maxPause > 0 and duration > gch.maxPause:
c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
proc collectCT(gch: var TGcHeap) =
if (gch.zct.len >= ZctThreshold or (cycleGC and
getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
gch.recGcLock == 0:
collectCTBody(gch)
when withRealtime:
proc toNano(x: int): TNanos {.inline.} =
result = x * 1000
proc GC_setMaxPause*(MaxPauseInUs: int) =
gch.maxPause = MaxPauseInUs.toNano
proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
acquire(gch)
var oldThreshold = gch.cycleThreshold
# disable cycle collection:
gch.cycleThreshold = high(gch.cycleThreshold)-1
gch.maxPause = us.toNano
if strongAdvice:
if gch.recGcLock == 0: collectCTBody(gch)
else:
collectCT(gch)
gch.cycleThreshold = oldThreshold
release(gch)
proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
when not defined(useNimRtl):
proc GC_disable() =
@@ -858,6 +914,7 @@ when not defined(useNimRtl):
"[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
"[GC] zct capacity: " & $gch.zct.cap & "\n" &
"[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
"[GC] max stack size: " & $gch.stat.maxStackSize
"[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
"[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
when traceGC: writeLeakage()
GC_enable()

92
lib/system/timers.nim Normal file
View File

@@ -0,0 +1,92 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Timer support for the realtime GC. Based on
## `<https://github.com/jckarter/clay/blob/master/compiler/src/hirestimer.cpp>`_
type
TTicks = distinct int64
TNanos = int64
when defined(windows):
proc QueryPerformanceCounter(res: var TTicks) {.
importc: "QueryPerformanceCounter", stdcall, dynlib: "kernel32".}
proc QueryPerformanceFrequency(res: var int64) {.
importc: "QueryPerformanceFrequency", stdcall, dynlib: "kernel32".}
proc getTicks(): TTicks {.inline.} =
QueryPerformanceCounter(result)
proc `-`(a, b: TTicks): TNanos =
var frequency: int64
QueryPerformanceFrequency(frequency)
var performanceCounterRate = 1000000000.0 / toFloat(frequency.int)
result = ((a.int64 - b.int64).int.toFloat * performanceCounterRate).TNanos
elif defined(macosx):
type
TMachTimebaseInfoData {.pure, final,
importc: "mach_timebase_info_data_t",
header: "<mach/mach_time.h>".} = object
numer, denom: int32
proc mach_absolute_time(): int64 {.importc, header: "<mach/mach.h>".}
proc mach_timebase_info(info: var TMachTimebaseInfoData) {.importc,
header: "<mach/mach_time.h>".}
proc getTicks(): TTicks {.inline.} =
result = TTicks(mach_absolute_time())
proc `-`(a, b: TTicks): TNanos =
var timeBaseInfo: TMachTimebaseInfoData
mach_timebase_info(timeBaseInfo)
result = (a.int64 - b.int64) * timeBaseInfo.numer div timeBaseInfo.denom
elif defined(posixRealtime):
type
TClockid {.importc: "clockid_t", header: "<time.h>", final.} = object
TTimeSpec {.importc: "struct timespec", header: "<time.h>",
final, pure.} = object ## struct timespec
tv_sec: int ## Seconds.
tv_nsec: int ## Nanoseconds.
var
CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: TClockid
proc clock_gettime(clkId: TClockid, tp: var TTimespec) {.
importc: "clock_gettime", header: "<time.h>".}
proc getTicks(): TTicks =
var t: TTimespec
clock_gettime(CLOCK_REALTIME, t)
result = TTicks(int64(t.tv_sec) * 1000000000'i64 + int64(t.tv_nsec))
proc `-`(a, b: TTicks): TNanos {.borrow.}
else:
# fallback Posix implementation:
type
Ttimeval {.importc: "struct timeval", header: "<sys/select.h>",
final, pure.} = object ## struct timeval
tv_sec: int ## Seconds.
tv_usec: int ## Microseconds.
proc posix_gettimeofday(tp: var Ttimeval, unused: pointer = nil) {.
importc: "gettimeofday", header: "<sys/time.h>".}
proc getTicks(): TTicks =
var t: Ttimeval
posix_gettimeofday(t)
result = TTicks(int64(t.tv_sec) * 1000_000_000'i64 +
int64(t.tv_usec) * 1000'i64)
proc `-`(a, b: TTicks): TNanos {.borrow.}

View File

@@ -161,5 +161,8 @@ proc main() =
var elapsed = epochTime() - t
PrintDiagnostics()
echo("Completed in " & $elapsed & "ms. Success!")
when defined(GC_setMaxPause):
GC_setMaxPause 2_000
main()

View File

@@ -2,6 +2,9 @@ discard """
outputsub: "no leak: "
"""
when defined(GC_setMaxPause):
GC_setMaxPause 2_000
type
TTestObj = object of TObject
x: string

View File

@@ -2,6 +2,9 @@ discard """
outputsub: "no leak: "
"""
when defined(GC_setMaxPause):
GC_setMaxPause 2_000
type
TTestObj = object of TObject
x: string

View File

@@ -2,6 +2,9 @@ discard """
outputsub: "no leak: "
"""
when defined(GC_setMaxPause):
GC_setMaxPause 2_000
type
TSomething = object
s: string

View File

@@ -122,6 +122,8 @@ proc runGcTests(r: var TResults, options: string) =
template test(filename: expr): stmt =
runSingleTest(r, "tests/gc" / filename, options)
runSingleTest(r, "tests/gc" / filename, options & " -d:release")
runSingleTest(r, "tests/gc" / filename, options &
" -d:release -d:useRealtimeGC")
test "gcbench"
test "gcleak"

View File

@@ -1,7 +1,8 @@
version 0.9.0
=============
- make GC realtime capable: GC_step(ms: int)
- complete GC's documentation
- make ``cookies`` part of the stdlib's documentation
- make templates hygienic by default
- ``=`` should be overloadable; requires specialization for ``=``
- fix remaining generics bugs

View File

@@ -43,7 +43,7 @@ Nimrod is efficient
* Native code generation (currently via compilation to C), not dependent on a
virtual machine: **Nimrod produces small executables without dependencies
for easy redistribution.**
* A fast **non-tracing** garbage collector that should be well suited for soft
* A fast **non-tracing** garbage collector that supports soft
real-time systems (like games).
* System programming features: Ability to manage your own memory and access the
hardware directly. Pointers to garbage collected memory are distinguished

View File

@@ -35,6 +35,8 @@ Library Additions
- Added a wrapper for ``libsvm``.
- Added a wrapper for ``mongodb``.
- Added ``terminal.isatty``.
- The GC supports (soft) realtime systems via ``GC_setMaxPause``
and ``GC_step`` procs.
Changes affecting backwards compatibility

View File

@@ -23,7 +23,7 @@ file: ticker
[Documentation]
doc: "endb;intern;apis;lib;manual;tut1;tut2;nimrodc;overview;filters"
doc: "tools;c2nim;niminst;nimgrep"
pdf: "manual;lib;tut1;tut2;nimrodc;c2nim;niminst"
pdf: "manual;lib;tut1;tut2;nimrodc;c2nim;niminst;gc"
srcdoc: "core/macros;pure/marshal;core/typeinfo"
srcdoc: "impure/graphics;impure/re;pure/sockets"
srcdoc: "system.nim;system/threads.nim;system/channels.nim"