mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-19 14:00:35 +00:00
GC with realtime support
This commit is contained in:
@@ -103,6 +103,9 @@ proc HandleCmdLine() =
|
||||
execExternalProgram(ex & ' ' & arguments)
|
||||
|
||||
#GC_disableMarkAndSweep()
|
||||
|
||||
when defined(GC_setMaxPause):
|
||||
GC_setMaxPause 2_000
|
||||
condsyms.InitDefines()
|
||||
HandleCmdLine()
|
||||
quit(options.gExitcode)
|
||||
|
||||
@@ -22,6 +22,10 @@ The documentation consists of several documents:
|
||||
- | `Manual <manual.html>`_
|
||||
| The Nimrod manual is a draft that will evolve into a proper specification.
|
||||
|
||||
- | `GC <gc.html>`_
|
||||
| Additional documentation about Nimrod's GC and how to operate it in a
|
||||
| realtime setting.
|
||||
|
||||
- | `Source code filters <filters.html>`_
|
||||
| The Nimrod compiler supports source code filters as a simple yet powerful
|
||||
builtin templating system.
|
||||
|
||||
58
doc/gc.txt
Normal file
58
doc/gc.txt
Normal file
@@ -0,0 +1,58 @@
|
||||
==========================
|
||||
Nimrod's Garbage Collector
|
||||
==========================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document describes how the GC works and how to tune it for (soft)
|
||||
realtime systems.
|
||||
|
||||
The basic algorithm is *Deferrent Reference Counting* with cycle detection.
|
||||
References on the stack are not counted for better performance (and easier C
|
||||
code generation). The GC **never** scans the whole heap but it may scan the
|
||||
delta-subgraph of the heap that changed since its last run.
|
||||
|
||||
|
||||
The GC is only triggered in a memory allocation operation. It it not triggered
|
||||
by some timer or background thread.
|
||||
|
||||
|
||||
Realtime support
|
||||
================
|
||||
|
||||
To enable realtime support, the switch `useRealtimeGC`:idx: needs to be
|
||||
defined. With this switch the GC supports the following operations:
|
||||
|
||||
.. code-block:: nimrod
|
||||
proc GC_setMaxPause*(MaxPauseInUs: int)
|
||||
proc GC_step*(us: int, strongAdvice = false)
|
||||
|
||||
After calling ``GC_setMaxPause`` any GC run tries to finish within
|
||||
``MaxPauseInUs`` microseconds. XXX complete documentation
|
||||
|
||||
|
||||
|
||||
Time measurement
|
||||
----------------
|
||||
|
||||
The GC's way of measing time uses (see ``lib/system/timers.nim`` for the
|
||||
implementation):
|
||||
|
||||
1) ``QueryPerformanceCounter`` and ``QueryPerformanceFrequency`` on Windows.
|
||||
2) ``mach_absolute_time`` on Mac OS X.
|
||||
3) ``gettimeofday`` on Posix systems.
|
||||
|
||||
As such it supports a resolution of nano seconds internally; however the API
|
||||
uses microseconds for convenience.
|
||||
|
||||
|
||||
Define the symbol ``reportMissedDeadlines`` to make the GC output whenever it
|
||||
missed a deadline. The reporting will be enhances and supported by the API in
|
||||
later versions of the collector.
|
||||
|
||||
|
||||
Tweaking the GC
|
||||
===============
|
||||
|
||||
To be written.
|
||||
@@ -142,6 +142,9 @@ Define Effect
|
||||
``useNimRtl`` Compile and link against ``nimrtl.dll``.
|
||||
``useMalloc`` Makes Nimrod use C's `malloc`:idx: instead of Nimrod's
|
||||
own memory manager. This only works with ``gc:none``.
|
||||
``useRealtimeGC`` Enables support of Nimrod's GC for *soft* realtime
|
||||
systems. See the documentation of the `gc <gc.html>`_
|
||||
for further information.
|
||||
================== =========================================================
|
||||
|
||||
|
||||
|
||||
@@ -15,9 +15,8 @@
|
||||
# together with Christoper's partial mark-sweep garbage collector.
|
||||
#
|
||||
# Special care has been taken to avoid recursion as far as possible to avoid
|
||||
# stack overflows when traversing deep datastructures. This is comparable to
|
||||
# an incremental and generational GC. It should be well-suited for soft real
|
||||
# time applications (like games).
|
||||
# stack overflows when traversing deep datastructures. It is well-suited
|
||||
# for soft real time applications (like games).
|
||||
|
||||
const
|
||||
CycleIncrease = 2 # is a multiplicative increase
|
||||
@@ -25,6 +24,10 @@ const
|
||||
ZctThreshold = 500 # we collect garbage if the ZCT's size
|
||||
# reaches this threshold
|
||||
# this seems to be a good value
|
||||
withRealTime = defined(useRealtimeGC)
|
||||
|
||||
when withRealTime:
|
||||
include "system/timers"
|
||||
|
||||
const
|
||||
rcIncrement = 0b1000 # so that lowest 3 bits are not touched
|
||||
@@ -53,6 +56,7 @@ type
|
||||
maxStackSize: int # max stack size
|
||||
maxStackCells: int # max stack cells in ``decStack``
|
||||
cycleTableSize: int # max entries in cycle table
|
||||
maxPause: int64 # max measured GC pause in nanoseconds
|
||||
|
||||
TGcHeap {.final, pure.} = object # this contains the zero count and
|
||||
# non-zero count table
|
||||
@@ -63,6 +67,8 @@ type
|
||||
cycleRoots: TCellSet
|
||||
tempStack: TCellSeq # temporary stack for recursion elimination
|
||||
recGcLock: int # prevent recursion via finalizers; no thread lock
|
||||
when withRealTime:
|
||||
maxPause: TNanos # max allowed pause in nanoseconds; active if > 0
|
||||
region: TMemRegion # garbage collected region
|
||||
stat: TGcStat
|
||||
|
||||
@@ -173,8 +179,6 @@ when traceGC:
|
||||
template gcTrace(cell, state: expr): stmt {.immediate.} =
|
||||
when traceGC: traceCell(cell, state)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# forward declarations:
|
||||
proc collectCT(gch: var TGcHeap)
|
||||
proc IsOnStack*(p: pointer): bool {.noinline.}
|
||||
@@ -741,12 +745,18 @@ else:
|
||||
# end of non-portable code
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
proc CollectZCT(gch: var TGcHeap) =
|
||||
proc CollectZCT(gch: var TGcHeap): bool =
|
||||
# Note: Freeing may add child objects to the ZCT! So essentially we do
|
||||
# deep freeing, which is bad for incremental operation. In order to
|
||||
# avoid a deep stack, we move objects to keep the ZCT small.
|
||||
# This is performance critical!
|
||||
const workPackage = 100
|
||||
var L = addr(gch.zct.len)
|
||||
|
||||
when withRealtime:
|
||||
var steps = workPackage
|
||||
var t0: TTicks
|
||||
if gch.maxPause > 0: t0 = getticks()
|
||||
while L[] > 0:
|
||||
var c = gch.zct.d[0]
|
||||
sysAssert(isAllocatedPtr(gch.region, c), "CollectZCT: isAllocatedPtr")
|
||||
@@ -756,6 +766,7 @@ proc CollectZCT(gch: var TGcHeap) =
|
||||
c.refcount = c.refcount and not colorMask
|
||||
gch.zct.d[0] = gch.zct.d[L[] - 1]
|
||||
dec(L[])
|
||||
when withRealtime: dec steps
|
||||
if c.refcount <% rcIncrement:
|
||||
# It may have a RC > 0, if it is in the hardware stack or
|
||||
# it has not been removed yet from the ZCT. This is because
|
||||
@@ -775,6 +786,17 @@ proc CollectZCT(gch: var TGcHeap) =
|
||||
else:
|
||||
sysAssert(c.typ != nil, "collectZCT 2")
|
||||
zeroMem(c, sizeof(TCell))
|
||||
when withRealtime:
|
||||
if steps == 0:
|
||||
steps = workPackage
|
||||
if gch.maxPause > 0:
|
||||
let duration = getticks() - t0
|
||||
# the GC's measuring is not accurate and needs some cleanup actions
|
||||
# (stack unmarking), so subtract some short amount of time in to
|
||||
# order to miss deadlines less often:
|
||||
if duration >= gch.maxPause - 50_000:
|
||||
return false
|
||||
result = true
|
||||
|
||||
proc unmarkStackAndRegisters(gch: var TGcHeap) =
|
||||
var d = gch.decStack.d
|
||||
@@ -788,30 +810,64 @@ proc unmarkStackAndRegisters(gch: var TGcHeap) =
|
||||
sysAssert c.typ != nil, "unmarkStackAndRegisters 2"
|
||||
gch.decStack.len = 0
|
||||
|
||||
proc collectCT(gch: var TGcHeap) =
|
||||
if (gch.zct.len >= ZctThreshold or (cycleGC and
|
||||
getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
|
||||
gch.recGcLock == 0:
|
||||
sysAssert(allocInv(gch.region), "collectCT: begin")
|
||||
|
||||
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
|
||||
sysAssert(gch.decStack.len == 0, "collectCT")
|
||||
prepareForInteriorPointerChecking(gch.region)
|
||||
markStackAndRegisters(gch)
|
||||
markThreadStacks(gch)
|
||||
gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
|
||||
inc(gch.stat.stackScans)
|
||||
collectZCT(gch)
|
||||
proc collectCTBody(gch: var TGcHeap) =
|
||||
when withRealtime:
|
||||
let t0 = getticks()
|
||||
sysAssert(allocInv(gch.region), "collectCT: begin")
|
||||
|
||||
gch.stat.maxStackSize = max(gch.stat.maxStackSize, stackSize())
|
||||
sysAssert(gch.decStack.len == 0, "collectCT")
|
||||
prepareForInteriorPointerChecking(gch.region)
|
||||
markStackAndRegisters(gch)
|
||||
markThreadStacks(gch)
|
||||
gch.stat.maxStackCells = max(gch.stat.maxStackCells, gch.decStack.len)
|
||||
inc(gch.stat.stackScans)
|
||||
if collectZCT(gch):
|
||||
when cycleGC:
|
||||
if getOccupiedMem(gch.region) >= gch.cycleThreshold or alwaysCycleGC:
|
||||
collectCycles(gch)
|
||||
collectZCT(gch)
|
||||
discard collectZCT(gch)
|
||||
inc(gch.stat.cycleCollections)
|
||||
gch.cycleThreshold = max(InitialCycleThreshold, getOccupiedMem() *
|
||||
cycleIncrease)
|
||||
gch.stat.maxThreshold = max(gch.stat.maxThreshold, gch.cycleThreshold)
|
||||
unmarkStackAndRegisters(gch)
|
||||
sysAssert(allocInv(gch.region), "collectCT: end")
|
||||
unmarkStackAndRegisters(gch)
|
||||
sysAssert(allocInv(gch.region), "collectCT: end")
|
||||
|
||||
when withRealtime:
|
||||
let duration = getticks() - t0
|
||||
gch.stat.maxPause = max(gch.stat.maxPause, duration)
|
||||
when defined(reportMissedDeadlines):
|
||||
if gch.maxPause > 0 and duration > gch.maxPause:
|
||||
c_fprintf(c_stdout, "[GC] missed deadline: %ld\n", duration)
|
||||
|
||||
proc collectCT(gch: var TGcHeap) =
|
||||
if (gch.zct.len >= ZctThreshold or (cycleGC and
|
||||
getOccupiedMem(gch.region)>=gch.cycleThreshold) or alwaysGC) and
|
||||
gch.recGcLock == 0:
|
||||
collectCTBody(gch)
|
||||
|
||||
when withRealtime:
|
||||
proc toNano(x: int): TNanos {.inline.} =
|
||||
result = x * 1000
|
||||
|
||||
proc GC_setMaxPause*(MaxPauseInUs: int) =
|
||||
gch.maxPause = MaxPauseInUs.toNano
|
||||
|
||||
proc GC_step(gch: var TGcHeap, us: int, strongAdvice: bool) =
|
||||
acquire(gch)
|
||||
var oldThreshold = gch.cycleThreshold
|
||||
# disable cycle collection:
|
||||
gch.cycleThreshold = high(gch.cycleThreshold)-1
|
||||
gch.maxPause = us.toNano
|
||||
if strongAdvice:
|
||||
if gch.recGcLock == 0: collectCTBody(gch)
|
||||
else:
|
||||
collectCT(gch)
|
||||
gch.cycleThreshold = oldThreshold
|
||||
release(gch)
|
||||
|
||||
proc GC_step*(us: int, strongAdvice = false) = GC_step(gch, us, strongAdvice)
|
||||
|
||||
when not defined(useNimRtl):
|
||||
proc GC_disable() =
|
||||
@@ -858,6 +914,7 @@ when not defined(useNimRtl):
|
||||
"[GC] max threshold: " & $gch.stat.maxThreshold & "\n" &
|
||||
"[GC] zct capacity: " & $gch.zct.cap & "\n" &
|
||||
"[GC] max cycle table size: " & $gch.stat.cycleTableSize & "\n" &
|
||||
"[GC] max stack size: " & $gch.stat.maxStackSize
|
||||
"[GC] max stack size: " & $gch.stat.maxStackSize & "\n" &
|
||||
"[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
|
||||
when traceGC: writeLeakage()
|
||||
GC_enable()
|
||||
|
||||
92
lib/system/timers.nim
Normal file
92
lib/system/timers.nim
Normal file
@@ -0,0 +1,92 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2012 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## Timer support for the realtime GC. Based on
|
||||
## `<https://github.com/jckarter/clay/blob/master/compiler/src/hirestimer.cpp>`_
|
||||
|
||||
type
|
||||
TTicks = distinct int64
|
||||
TNanos = int64
|
||||
|
||||
when defined(windows):
|
||||
|
||||
proc QueryPerformanceCounter(res: var TTicks) {.
|
||||
importc: "QueryPerformanceCounter", stdcall, dynlib: "kernel32".}
|
||||
proc QueryPerformanceFrequency(res: var int64) {.
|
||||
importc: "QueryPerformanceFrequency", stdcall, dynlib: "kernel32".}
|
||||
|
||||
proc getTicks(): TTicks {.inline.} =
|
||||
QueryPerformanceCounter(result)
|
||||
|
||||
proc `-`(a, b: TTicks): TNanos =
|
||||
var frequency: int64
|
||||
QueryPerformanceFrequency(frequency)
|
||||
var performanceCounterRate = 1000000000.0 / toFloat(frequency.int)
|
||||
|
||||
result = ((a.int64 - b.int64).int.toFloat * performanceCounterRate).TNanos
|
||||
|
||||
elif defined(macosx):
|
||||
type
|
||||
TMachTimebaseInfoData {.pure, final,
|
||||
importc: "mach_timebase_info_data_t",
|
||||
header: "<mach/mach_time.h>".} = object
|
||||
numer, denom: int32
|
||||
|
||||
proc mach_absolute_time(): int64 {.importc, header: "<mach/mach.h>".}
|
||||
proc mach_timebase_info(info: var TMachTimebaseInfoData) {.importc,
|
||||
header: "<mach/mach_time.h>".}
|
||||
|
||||
proc getTicks(): TTicks {.inline.} =
|
||||
result = TTicks(mach_absolute_time())
|
||||
|
||||
proc `-`(a, b: TTicks): TNanos =
|
||||
var timeBaseInfo: TMachTimebaseInfoData
|
||||
mach_timebase_info(timeBaseInfo)
|
||||
result = (a.int64 - b.int64) * timeBaseInfo.numer div timeBaseInfo.denom
|
||||
|
||||
elif defined(posixRealtime):
|
||||
type
|
||||
TClockid {.importc: "clockid_t", header: "<time.h>", final.} = object
|
||||
|
||||
TTimeSpec {.importc: "struct timespec", header: "<time.h>",
|
||||
final, pure.} = object ## struct timespec
|
||||
tv_sec: int ## Seconds.
|
||||
tv_nsec: int ## Nanoseconds.
|
||||
|
||||
var
|
||||
CLOCK_REALTIME {.importc: "CLOCK_REALTIME", header: "<time.h>".}: TClockid
|
||||
|
||||
proc clock_gettime(clkId: TClockid, tp: var TTimespec) {.
|
||||
importc: "clock_gettime", header: "<time.h>".}
|
||||
|
||||
proc getTicks(): TTicks =
|
||||
var t: TTimespec
|
||||
clock_gettime(CLOCK_REALTIME, t)
|
||||
result = TTicks(int64(t.tv_sec) * 1000000000'i64 + int64(t.tv_nsec))
|
||||
|
||||
proc `-`(a, b: TTicks): TNanos {.borrow.}
|
||||
|
||||
else:
|
||||
# fallback Posix implementation:
|
||||
type
|
||||
Ttimeval {.importc: "struct timeval", header: "<sys/select.h>",
|
||||
final, pure.} = object ## struct timeval
|
||||
tv_sec: int ## Seconds.
|
||||
tv_usec: int ## Microseconds.
|
||||
|
||||
proc posix_gettimeofday(tp: var Ttimeval, unused: pointer = nil) {.
|
||||
importc: "gettimeofday", header: "<sys/time.h>".}
|
||||
|
||||
proc getTicks(): TTicks =
|
||||
var t: Ttimeval
|
||||
posix_gettimeofday(t)
|
||||
result = TTicks(int64(t.tv_sec) * 1000_000_000'i64 +
|
||||
int64(t.tv_usec) * 1000'i64)
|
||||
|
||||
proc `-`(a, b: TTicks): TNanos {.borrow.}
|
||||
@@ -161,5 +161,8 @@ proc main() =
|
||||
var elapsed = epochTime() - t
|
||||
PrintDiagnostics()
|
||||
echo("Completed in " & $elapsed & "ms. Success!")
|
||||
|
||||
when defined(GC_setMaxPause):
|
||||
GC_setMaxPause 2_000
|
||||
|
||||
main()
|
||||
|
||||
@@ -2,6 +2,9 @@ discard """
|
||||
outputsub: "no leak: "
|
||||
"""
|
||||
|
||||
when defined(GC_setMaxPause):
|
||||
GC_setMaxPause 2_000
|
||||
|
||||
type
|
||||
TTestObj = object of TObject
|
||||
x: string
|
||||
|
||||
@@ -2,6 +2,9 @@ discard """
|
||||
outputsub: "no leak: "
|
||||
"""
|
||||
|
||||
when defined(GC_setMaxPause):
|
||||
GC_setMaxPause 2_000
|
||||
|
||||
type
|
||||
TTestObj = object of TObject
|
||||
x: string
|
||||
|
||||
@@ -2,6 +2,9 @@ discard """
|
||||
outputsub: "no leak: "
|
||||
"""
|
||||
|
||||
when defined(GC_setMaxPause):
|
||||
GC_setMaxPause 2_000
|
||||
|
||||
type
|
||||
TSomething = object
|
||||
s: string
|
||||
|
||||
@@ -122,6 +122,8 @@ proc runGcTests(r: var TResults, options: string) =
|
||||
template test(filename: expr): stmt =
|
||||
runSingleTest(r, "tests/gc" / filename, options)
|
||||
runSingleTest(r, "tests/gc" / filename, options & " -d:release")
|
||||
runSingleTest(r, "tests/gc" / filename, options &
|
||||
" -d:release -d:useRealtimeGC")
|
||||
|
||||
test "gcbench"
|
||||
test "gcleak"
|
||||
|
||||
3
todo.txt
3
todo.txt
@@ -1,7 +1,8 @@
|
||||
version 0.9.0
|
||||
=============
|
||||
|
||||
- make GC realtime capable: GC_step(ms: int)
|
||||
- complete GC's documentation
|
||||
- make ``cookies`` part of the stdlib's documentation
|
||||
- make templates hygienic by default
|
||||
- ``=`` should be overloadable; requires specialization for ``=``
|
||||
- fix remaining generics bugs
|
||||
|
||||
@@ -43,7 +43,7 @@ Nimrod is efficient
|
||||
* Native code generation (currently via compilation to C), not dependent on a
|
||||
virtual machine: **Nimrod produces small executables without dependencies
|
||||
for easy redistribution.**
|
||||
* A fast **non-tracing** garbage collector that should be well suited for soft
|
||||
* A fast **non-tracing** garbage collector that supports soft
|
||||
real-time systems (like games).
|
||||
* System programming features: Ability to manage your own memory and access the
|
||||
hardware directly. Pointers to garbage collected memory are distinguished
|
||||
|
||||
@@ -35,6 +35,8 @@ Library Additions
|
||||
- Added a wrapper for ``libsvm``.
|
||||
- Added a wrapper for ``mongodb``.
|
||||
- Added ``terminal.isatty``.
|
||||
- The GC supports (soft) realtime systems via ``GC_setMaxPause``
|
||||
and ``GC_step`` procs.
|
||||
|
||||
|
||||
Changes affecting backwards compatibility
|
||||
|
||||
@@ -23,7 +23,7 @@ file: ticker
|
||||
[Documentation]
|
||||
doc: "endb;intern;apis;lib;manual;tut1;tut2;nimrodc;overview;filters"
|
||||
doc: "tools;c2nim;niminst;nimgrep"
|
||||
pdf: "manual;lib;tut1;tut2;nimrodc;c2nim;niminst"
|
||||
pdf: "manual;lib;tut1;tut2;nimrodc;c2nim;niminst;gc"
|
||||
srcdoc: "core/macros;pure/marshal;core/typeinfo"
|
||||
srcdoc: "impure/graphics;impure/re;pure/sockets"
|
||||
srcdoc: "system.nim;system/threads.nim;system/channels.nim"
|
||||
|
||||
Reference in New Issue
Block a user