implemented a stack trace profiler

This commit is contained in:
Araq
2012-09-15 00:21:10 +02:00
parent 4a435a8fb4
commit 214c7a2ebd
11 changed files with 241 additions and 65 deletions

View File

@@ -11,6 +11,7 @@
# TODO:
# - eliminate "used" field
# - make searching for block O(1)
{.push profiler:off.}
# ------------ platform specific chunk allocation code -----------------------
@@ -794,3 +795,4 @@ template InstantiateForRegion(allocator: expr) =
else:
result = realloc(p, newsize)
{.pop.}

View File

@@ -7,7 +7,6 @@
# distribution, for details about the copyright.
#
# Garbage Collector
#
# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
@@ -17,6 +16,7 @@
# Special care has been taken to avoid recursion as far as possible to avoid
# stack overflows when traversing deep datastructures. It is well-suited
# for soft real time applications (like games).
{.push profiler:off.}
const
CycleIncrease = 2 # is a multiplicative increase
@@ -26,7 +26,7 @@ const
# this seems to be a good value
withRealTime = defined(useRealtimeGC)
when withRealTime:
when withRealTime and not defined(getTicks):
include "system/timers"
const
@@ -426,6 +426,8 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
result = cellToUsr(res)
sysAssert(allocInv(gch.region), "rawNewObj end")
{.pop.}
proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
result = rawNewObj(typ, size, gch)
zeroMem(result, size)
@@ -514,6 +516,8 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
result = growObj(old, newsize, gch)
{.push profiler:off.}
# ---------------- cycle collector -------------------------------------------
proc doOperation(p: pointer, op: TWalkOp) =
@@ -917,3 +921,5 @@ when not defined(useNimRtl):
"[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
when traceGC: writeLeakage()
GC_enable()
{.pop.}

View File

@@ -8,54 +8,100 @@
#
# This file implements the Nimrod profiler. The profiler needs support by the
# code generator.
# code generator. The idea is to inject the instruction stream
# with 'nimProfile()' calls. These calls are injected at every loop end
# (except perhaps loops that have no side-effects). At every Nth call a
# stack trace is taken. A stack tace is a list of cstrings. We have a count
# table of those.
#
# The nice thing about this profiler is that it's completely time independent!
{.push profiler: off.}
when not defined(getTicks):
include "system/timers"
const
MaxTraceLen = 20 # tracking the last 20 calls is enough
type
TProfileData {.compilerproc, final.} = object
procname: cstring
total: float
TStackTrace* = array [0..MaxTraceLen-1, cstring]
TProfilerHook* = proc (st: TStackTrace) {.nimcall.}
proc captureStackTrace(f: PFrame, st: var TStackTrace) =
const
firstCalls = 5
var
it = f
i = 0
total = 0
while it != nil and i <= high(st)-(firstCalls-1):
# the (-1) is for a nil entry that marks where the '...' should occur
st[i] = it.procname
inc(i)
inc(total)
it = it.prev
var b = it
while it != nil:
inc(total)
it = it.prev
for j in 1..total-i-(firstCalls-1):
if b != nil: b = b.prev
if total != i:
st[i] = "..."
inc(i)
while b != nil and i <= high(st):
st[i] = b.procname
inc(i)
b = b.prev
var
profileData {.compilerproc.}: array [0..64*1024-1, TProfileData]
profilerHook*: TProfilerHook
## set this variable to provide a procedure that implements a profiler in
## user space. See the `nimprof` module for a reference implementation.
SamplingInterval = 50_000
# set this to change the default sampling interval
gTicker = SamplingInterval
interval: TNanos = 5_000_000 # 5ms
proc sortProfile(a: var array[0..64*1024-1, TProfileData], N: int) =
# we use shellsort here; fast enough and simple
var h = 1
while true:
h = 3 * h + 1
if h > N: break
while true:
h = h div 3
for i in countup(h, N - 1):
var v = a[i]
var j = i
while a[j-h].total <= v.total:
a[j] = a[j-h]
j = j-h
if j < h: break
a[j] = v
if h == 1: break
proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
# 'noinline' so that 'nimProfile' does not perform the stack allocation
# in the common case.
var st: TStackTrace
captureStackTrace(framePtr, st)
hook(st)
proc writeProfile() {.noconv.} =
const filename = "profile_results"
var i = 0
var f: TFile
var j = 1
while open(f, filename & $j & ".txt"):
close(f)
inc(j)
if open(f, filename & $j & ".txt", fmWrite):
var N = 0
# we have to compute the actual length of the array:
while profileData[N].procname != nil: inc(N)
sortProfile(profileData, N)
writeln(f, "total running time of each proc" &
" (interpret these numbers relatively)")
while profileData[i].procname != nil:
write(f, profileData[i].procname)
write(f, ": ")
writeln(f, profileData[i].total)
inc(i)
close(f)
proc setProfilingInterval*(intervalInUs: int): TNanos =
## set this to change the sampling interval. Default value is 5ms.
interval = intervalInUs * 1000
addQuitProc(writeProfile)
var t0: TTicks
var usefulCall* = 0
var uselessCall* = 0
proc nimProfile() =
## This is invoked by the compiler in every loop and on every proc entry!
dec gTicker
if gTicker == 0:
gTicker = -1
let t1 = getticks()
if getticks() - t0 > interval:
inc usefulCall
if not isNil(profilerHook):
# disable recursive calls: XXX should use try..finally,
# but that's too expensive!
let oldHook = profilerHook
profilerHook = nil
callProfilerHook(oldHook)
profilerHook = oldHook
t0 = getticks()
else:
inc uselessCall
gTicker = SamplingInterval
proc stopProfiling*() =
## call this to stop profiling; should be called by the client profiler.
profilerHook = nil
{.pop.}