profiler improvements

This commit is contained in:
Araq
2012-09-17 00:31:41 +02:00
parent ad6ee9e857
commit 75abf72503
8 changed files with 110 additions and 71 deletions

View File

@@ -46,6 +46,7 @@ path="$lib/pure/unidecode"
overflow_checks:off
assertions:off
stacktrace:off
linetrace:off
debugger:off
line_dir:off
dead_code_elim:on

View File

@@ -21,8 +21,7 @@ your own profiler.
The setting ``--profiler:on`` defines the conditional symbol ``profiler``.
After your program has finished the profiler will create a
file ``profile_resultsX.txt``(where ``X`` is a previously unused number)
containing the profiling results.
file ``profile_results.txt`` containing the profiling results.
Since the profiler works by examining stack traces, it's essential that
the option ``--stackTrace:on`` is active! Unfortunately this means that a
@@ -35,7 +34,7 @@ Example results file
The results file lists stack traces ordered by significance.
The following example file has been generated by profiling the Nimrod compiler
itself: It shows that in total 4.1% of the runtime has been spent
itself: It shows that in total 5.4% of the runtime has been spent
in ``crcFromRope`` or its children.
In general the stack traces show you immediately where the problem is because

View File

@@ -12,19 +12,27 @@
## report at program exit.
when not defined(profiler):
{.error: "Profiling support is turned off!".}
{.warning: "Profiling support is turned off!".}
# We don't want to profile the profiling code ...
{.push profiler: off.}
import hashes, algorithm, strutils
import hashes, algorithm, strutils, tables, sets
include "system/timers"
const
withThreads = compileOption("threads")
tickCountCorrection = 50_000
when not defined(system.TStackTrace):
type TStackTrace = array [0..20, cstring]
# We use a simple hash table of bounded size to keep track of the stack traces:
type
TProfileEntry = object
total: int
st: system.TStackTrace
TProfileData = array [0..64*1024-1, ref TProfileEntry]
st: TStackTrace
TProfileData = array [0..64*1024-1, ptr TProfileEntry]
proc `==`(a, b: TStackTrace): bool =
for i in 0 .. high(a):
@@ -32,14 +40,32 @@ proc `==`(a, b: TStackTrace): bool =
result = true
# XXX extract this data structure; it is generally useful ;-)
# However a chain length of over 3000 is suspicious...
var
profileData: TProfileData
emptySlots = profileData.len * 3 div 2
maxChainLen = 0
totalCalls = 0
proc hook(st: system.TStackTrace) {.nimcall.} =
var
interval: TNanos = 1_000_000 - tickCountCorrection # 5ms
proc setSamplingFrequency*(intervalInUs: int) =
## set this to change the sampling frequency. Default value is 5ms.
## Set it to 0 to disable time based profiling; it uses an imprecise
## instruction count measure instead then.
if intervalInUs <= 0: interval = 0
else: interval = intervalInUs * 1000 - tickCountCorrection
when withThreads:
var
profilingLock: TLock
InitLock profilingLock
proc hookAux(st: TStackTrace) =
# this is quite performance sensitive!
when withThreads: Acquire profilingLock
inc totalCalls
var last = high(st)
while last > 0 and isNil(st[last]): dec last
@@ -65,64 +91,93 @@ proc hook(st: system.TStackTrace) {.nimcall.} =
var chain = 0
while true:
if profileData[h] == nil:
GC_disable()
new profileData[h]
GC_enable()
profileData[h] = cast[ptr TProfileEntry](
allocShared0(sizeof(TProfileEntry)))
profileData[h].total = 1
profileData[h].st = st
dec emptySlots
maxChainLen = max(maxChainLen, chain)
return
break
if profileData[h].st == st:
# wow, same entry found:
inc profileData[h].total
maxChainLen = max(maxChainLen, chain)
return
break
h = ((5 * h) + 1) and high(profileData)
inc chain
maxChainLen = max(maxChainLen, chain)
when withThreads: Release profilingLock
proc getTotal(x: ref TProfileEntry): int =
var
t0 {.threadvar.}: TTicks
proc hook(st: TStackTrace) {.nimcall.} =
if interval == 0:
hookAux(st)
elif getticks() - t0 > interval:
hookAux(st)
t0 = getticks()
proc getTotal(x: ptr TProfileEntry): int =
result = if isNil(x): 0 else: x.total
proc cmpEntries(a, b: ref TProfileEntry): int =
proc cmpEntries(a, b: ptr TProfileEntry): int =
result = b.getTotal - a.getTotal
proc `//`(a, b: int): string =
result = format("$1/$2 = $3%", a, b, formatFloat(a / b * 100.0, ffDefault, 2))
proc writeProfile() {.noconv.} =
stopProfiling()
const filename = "profile_results"
when defined(system.TStackTrace):
system.profilerHook = nil
const filename = "profile_results.txt"
echo "writing " & filename & "..."
var f: TFile
var j = 1
while open(f, filename & $j & ".txt"):
close(f)
inc(j)
let filen = filename & $j & ".txt"
echo "writing ", filen, "..."
if open(f, filen, fmWrite):
if open(f, filename, fmWrite):
sort(profileData, cmpEntries)
writeln(f, "total executions of each stack trace:")
var entries = 0
for i in 0..high(profileData):
if profileData[i] != nil: inc entries
var perProc = initCountTable[string]()
for i in 0..entries-1:
var dups = initSet[string]()
for ii in 0..high(TStackTrace):
let procname = profileData[i].st[ii]
if isNil(procname): break
let p = $procname
if not containsOrIncl(dups, p):
perProc.inc(p, profileData[i].total)
var sum = 0
# only write the first 100 entries:
for i in 0..min(100, high(profileData)):
if profileData[i] != nil and profileData[i].total > 1:
for i in 0..min(100, entries-1):
if profileData[i].total > 1:
inc sum, profileData[i].total
writeln(f, "Entry: ", i, "/", entries, " Calls: ",
writeln(f, "Entry: ", i+1, "/", entries, " Calls: ",
profileData[i].total // totalCalls, " [sum: ", sum, "; ",
sum // totalCalls, "]")
for ii in 0..high(TStackTrace):
let procname = profileData[i].st[ii]
if isNil(procname): break
writeln(f, " ", procname)
writeln(f, " ", procname, " ", perProc[$procname] // totalCalls)
close(f)
echo "... done"
echo "... done"
else:
echo "... failed"
system.profilerHook = hook
addQuitProc(writeProfile)
var
disabled: int
proc disableProfiling*() =
atomicDec disabled
system.profilerHook = nil
proc enableProfiling*() =
if atomicInc(disabled) >= 0:
system.profilerHook = hook
when defined(system.TStackTrace):
system.profilerHook = hook
addQuitProc(writeProfile)
{.pop.}

View File

@@ -1629,7 +1629,7 @@ type
len: int # length of slots (when not debugging always zero)
when not defined(ECMAScript):
{.push stack_trace:off.}
{.push stack_trace:off, profiler:off.}
proc add*(x: var string, y: cstring) {.noStackFrame.} =
var i = 0
while y[i] != '\0':
@@ -1674,7 +1674,7 @@ template newException*(exceptn: typeDesc, message: string): expr =
e
when not defined(EcmaScript) and not defined(NimrodVM):
{.push stack_trace: off.}
{.push stack_trace: off, profiler:off.}
proc initGC()
when not defined(boehmgc) and not defined(useMalloc):
@@ -1958,7 +1958,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
## gets the stack trace associated with `e`, which is the stack that
## lead to the ``raise`` statement. This is only works for debug builds.
{.push stack_trace: off.}
{.push stack_trace: off, profiler:off.}
when hostOS == "standalone":
include "system/embedded"
else:
@@ -1997,7 +1997,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
result = n.sons[n.len]
include "system/mmdisp"
{.push stack_trace: off.}
{.push stack_trace: off, profiler:off.}
when hostOS != "standalone": include "system/sysstr"
{.pop.}
@@ -2032,7 +2032,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
var e = getCurrentException()
return if e == nil: "" else: e.msg
{.push stack_trace: off.}
{.push stack_trace: off, profiler:off.}
when defined(endb):
include "system/debugger"

View File

@@ -33,7 +33,7 @@ var
# list of exception handlers
# a global variable for the root of all try blocks
{.push stacktrace: off.}
{.push stacktrace: off, profiler:off.}
proc nimBoolToStr(x: bool): string {.compilerproc.} =
if x: result = "true"
else: result = "false"

View File

@@ -78,11 +78,11 @@ var
when not defined(useNimRtl):
InstantiateForRegion(gch.region)
proc acquire(gch: var TGcHeap) {.inline.} =
template acquire(gch: TGcHeap) =
when hasThreadSupport and hasSharedHeap:
AcquireSys(HeapLock)
proc release(gch: var TGcHeap) {.inline.} =
template release(gch: TGcHeap) =
when hasThreadSupport and hasSharedHeap:
releaseSys(HeapLock)

View File

@@ -18,9 +18,6 @@
{.push profiler: off.}
when not defined(getTicks):
include "system/timers"
const
MaxTraceLen = 20 # tracking the last 20 calls is enough
@@ -36,7 +33,7 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
i = 0
total = 0
while it != nil and i <= high(st)-(firstCalls-1):
# the (-1) is for a nil entry that marks where the '...' should occur
# the (-1) is for the "..." entry
st[i] = it.procname
inc(i)
inc(total)
@@ -55,14 +52,14 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
inc(i)
b = b.prev
const
SamplingInterval = 50_000
# set this to change the default sampling interval
var
profilerHook*: TProfilerHook
## set this variable to provide a procedure that implements a profiler in
## user space. See the `nimprof` module for a reference implementation.
SamplingInterval = 50_000
# set this to change the default sampling interval
gTicker = SamplingInterval
interval: TNanos = 5_000_000 # 5ms
gTicker {.threadvar.}: int
proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
# 'noinline' so that 'nimProfile' does not perform the stack allocation
@@ -71,31 +68,18 @@ proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
captureStackTrace(framePtr, st)
hook(st)
proc setProfilingInterval*(intervalInUs: int): TNanos =
## set this to change the sampling interval. Default value is 5ms.
interval = intervalInUs * 1000
var t0: TTicks
proc nimProfile() =
## This is invoked by the compiler in every loop and on every proc entry!
dec gTicker
if gTicker == 0:
gTicker = -1
let t1 = getticks()
if getticks() - t0 > interval:
if not isNil(profilerHook):
# disable recursive calls: XXX should use try..finally,
# but that's too expensive!
let oldHook = profilerHook
profilerHook = nil
callProfilerHook(oldHook)
profilerHook = oldHook
t0 = getticks()
if not isNil(profilerHook):
# disable recursive calls: XXX should use try..finally,
# but that's too expensive!
let oldHook = profilerHook
profilerHook = nil
callProfilerHook(oldHook)
profilerHook = oldHook
gTicker = SamplingInterval
proc stopProfiling*() =
## call this to stop profiling; should be called by the client profiler.
profilerHook = nil
dec gTicker
{.pop.}

View File

@@ -224,7 +224,7 @@ proc ReadBytes(f: TFile, a: var openarray[int8], start, len: int): int =
proc ReadChars(f: TFile, a: var openarray[char], start, len: int): int =
result = readBuffer(f, addr(a[start]), len)
{.push stackTrace:off.}
{.push stackTrace:off, profiler:off.}
proc writeBytes(f: TFile, a: openarray[int8], start, len: int): int =
var x = cast[ptr array[0..1000_000_000, int8]](a)
result = writeBuffer(f, addr(x[start]), len)