mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 01:14:41 +00:00
profiler improvements
This commit is contained in:
@@ -46,6 +46,7 @@ path="$lib/pure/unidecode"
|
||||
overflow_checks:off
|
||||
assertions:off
|
||||
stacktrace:off
|
||||
linetrace:off
|
||||
debugger:off
|
||||
line_dir:off
|
||||
dead_code_elim:on
|
||||
|
||||
@@ -21,8 +21,7 @@ your own profiler.
|
||||
The setting ``--profiler:on`` defines the conditional symbol ``profiler``.
|
||||
|
||||
After your program has finished the profiler will create a
|
||||
file ``profile_resultsX.txt``(where ``X`` is a previously unused number)
|
||||
containing the profiling results.
|
||||
file ``profile_results.txt`` containing the profiling results.
|
||||
|
||||
Since the profiler works by examining stack traces, it's essential that
|
||||
the option ``--stackTrace:on`` is active! Unfortunately this means that a
|
||||
@@ -35,7 +34,7 @@ Example results file
|
||||
The results file lists stack traces ordered by significance.
|
||||
|
||||
The following example file has been generated by profiling the Nimrod compiler
|
||||
itself: It shows that in total 4.1% of the runtime has been spent
|
||||
itself: It shows that in total 5.4% of the runtime has been spent
|
||||
in ``crcFromRope`` or its children.
|
||||
|
||||
In general the stack traces show you immediately where the problem is because
|
||||
|
||||
@@ -12,19 +12,27 @@
|
||||
## report at program exit.
|
||||
|
||||
when not defined(profiler):
|
||||
{.error: "Profiling support is turned off!".}
|
||||
{.warning: "Profiling support is turned off!".}
|
||||
|
||||
# We don't want to profile the profiling code ...
|
||||
{.push profiler: off.}
|
||||
|
||||
import hashes, algorithm, strutils
|
||||
import hashes, algorithm, strutils, tables, sets
|
||||
include "system/timers"
|
||||
|
||||
const
|
||||
withThreads = compileOption("threads")
|
||||
tickCountCorrection = 50_000
|
||||
|
||||
when not defined(system.TStackTrace):
|
||||
type TStackTrace = array [0..20, cstring]
|
||||
|
||||
# We use a simple hash table of bounded size to keep track of the stack traces:
|
||||
type
|
||||
TProfileEntry = object
|
||||
total: int
|
||||
st: system.TStackTrace
|
||||
TProfileData = array [0..64*1024-1, ref TProfileEntry]
|
||||
st: TStackTrace
|
||||
TProfileData = array [0..64*1024-1, ptr TProfileEntry]
|
||||
|
||||
proc `==`(a, b: TStackTrace): bool =
|
||||
for i in 0 .. high(a):
|
||||
@@ -32,14 +40,32 @@ proc `==`(a, b: TStackTrace): bool =
|
||||
result = true
|
||||
|
||||
# XXX extract this data structure; it is generally useful ;-)
|
||||
# However a chain length of over 3000 is suspicious...
|
||||
var
|
||||
profileData: TProfileData
|
||||
emptySlots = profileData.len * 3 div 2
|
||||
maxChainLen = 0
|
||||
totalCalls = 0
|
||||
|
||||
proc hook(st: system.TStackTrace) {.nimcall.} =
|
||||
var
|
||||
interval: TNanos = 1_000_000 - tickCountCorrection # 5ms
|
||||
|
||||
proc setSamplingFrequency*(intervalInUs: int) =
|
||||
## set this to change the sampling frequency. Default value is 5ms.
|
||||
## Set it to 0 to disable time based profiling; it uses an imprecise
|
||||
## instruction count measure instead then.
|
||||
if intervalInUs <= 0: interval = 0
|
||||
else: interval = intervalInUs * 1000 - tickCountCorrection
|
||||
|
||||
when withThreads:
|
||||
var
|
||||
profilingLock: TLock
|
||||
|
||||
InitLock profilingLock
|
||||
|
||||
proc hookAux(st: TStackTrace) =
|
||||
# this is quite performance sensitive!
|
||||
when withThreads: Acquire profilingLock
|
||||
inc totalCalls
|
||||
var last = high(st)
|
||||
while last > 0 and isNil(st[last]): dec last
|
||||
@@ -65,64 +91,93 @@ proc hook(st: system.TStackTrace) {.nimcall.} =
|
||||
var chain = 0
|
||||
while true:
|
||||
if profileData[h] == nil:
|
||||
GC_disable()
|
||||
new profileData[h]
|
||||
GC_enable()
|
||||
profileData[h] = cast[ptr TProfileEntry](
|
||||
allocShared0(sizeof(TProfileEntry)))
|
||||
profileData[h].total = 1
|
||||
profileData[h].st = st
|
||||
dec emptySlots
|
||||
maxChainLen = max(maxChainLen, chain)
|
||||
return
|
||||
break
|
||||
if profileData[h].st == st:
|
||||
# wow, same entry found:
|
||||
inc profileData[h].total
|
||||
maxChainLen = max(maxChainLen, chain)
|
||||
return
|
||||
break
|
||||
h = ((5 * h) + 1) and high(profileData)
|
||||
inc chain
|
||||
maxChainLen = max(maxChainLen, chain)
|
||||
when withThreads: Release profilingLock
|
||||
|
||||
proc getTotal(x: ref TProfileEntry): int =
|
||||
var
|
||||
t0 {.threadvar.}: TTicks
|
||||
|
||||
proc hook(st: TStackTrace) {.nimcall.} =
|
||||
if interval == 0:
|
||||
hookAux(st)
|
||||
elif getticks() - t0 > interval:
|
||||
hookAux(st)
|
||||
t0 = getticks()
|
||||
|
||||
proc getTotal(x: ptr TProfileEntry): int =
|
||||
result = if isNil(x): 0 else: x.total
|
||||
|
||||
proc cmpEntries(a, b: ref TProfileEntry): int =
|
||||
proc cmpEntries(a, b: ptr TProfileEntry): int =
|
||||
result = b.getTotal - a.getTotal
|
||||
|
||||
proc `//`(a, b: int): string =
|
||||
result = format("$1/$2 = $3%", a, b, formatFloat(a / b * 100.0, ffDefault, 2))
|
||||
|
||||
proc writeProfile() {.noconv.} =
|
||||
stopProfiling()
|
||||
const filename = "profile_results"
|
||||
when defined(system.TStackTrace):
|
||||
system.profilerHook = nil
|
||||
const filename = "profile_results.txt"
|
||||
echo "writing " & filename & "..."
|
||||
var f: TFile
|
||||
var j = 1
|
||||
while open(f, filename & $j & ".txt"):
|
||||
close(f)
|
||||
inc(j)
|
||||
let filen = filename & $j & ".txt"
|
||||
echo "writing ", filen, "..."
|
||||
if open(f, filen, fmWrite):
|
||||
if open(f, filename, fmWrite):
|
||||
sort(profileData, cmpEntries)
|
||||
writeln(f, "total executions of each stack trace:")
|
||||
var entries = 0
|
||||
for i in 0..high(profileData):
|
||||
if profileData[i] != nil: inc entries
|
||||
|
||||
var perProc = initCountTable[string]()
|
||||
for i in 0..entries-1:
|
||||
var dups = initSet[string]()
|
||||
for ii in 0..high(TStackTrace):
|
||||
let procname = profileData[i].st[ii]
|
||||
if isNil(procname): break
|
||||
let p = $procname
|
||||
if not containsOrIncl(dups, p):
|
||||
perProc.inc(p, profileData[i].total)
|
||||
|
||||
var sum = 0
|
||||
# only write the first 100 entries:
|
||||
for i in 0..min(100, high(profileData)):
|
||||
if profileData[i] != nil and profileData[i].total > 1:
|
||||
for i in 0..min(100, entries-1):
|
||||
if profileData[i].total > 1:
|
||||
inc sum, profileData[i].total
|
||||
writeln(f, "Entry: ", i, "/", entries, " Calls: ",
|
||||
writeln(f, "Entry: ", i+1, "/", entries, " Calls: ",
|
||||
profileData[i].total // totalCalls, " [sum: ", sum, "; ",
|
||||
sum // totalCalls, "]")
|
||||
for ii in 0..high(TStackTrace):
|
||||
let procname = profileData[i].st[ii]
|
||||
if isNil(procname): break
|
||||
writeln(f, " ", procname)
|
||||
writeln(f, " ", procname, " ", perProc[$procname] // totalCalls)
|
||||
close(f)
|
||||
echo "... done"
|
||||
echo "... done"
|
||||
else:
|
||||
echo "... failed"
|
||||
|
||||
system.profilerHook = hook
|
||||
addQuitProc(writeProfile)
|
||||
var
|
||||
disabled: int
|
||||
|
||||
proc disableProfiling*() =
|
||||
atomicDec disabled
|
||||
system.profilerHook = nil
|
||||
|
||||
proc enableProfiling*() =
|
||||
if atomicInc(disabled) >= 0:
|
||||
system.profilerHook = hook
|
||||
|
||||
when defined(system.TStackTrace):
|
||||
system.profilerHook = hook
|
||||
addQuitProc(writeProfile)
|
||||
|
||||
{.pop.}
|
||||
|
||||
@@ -1629,7 +1629,7 @@ type
|
||||
len: int # length of slots (when not debugging always zero)
|
||||
|
||||
when not defined(ECMAScript):
|
||||
{.push stack_trace:off.}
|
||||
{.push stack_trace:off, profiler:off.}
|
||||
proc add*(x: var string, y: cstring) {.noStackFrame.} =
|
||||
var i = 0
|
||||
while y[i] != '\0':
|
||||
@@ -1674,7 +1674,7 @@ template newException*(exceptn: typeDesc, message: string): expr =
|
||||
e
|
||||
|
||||
when not defined(EcmaScript) and not defined(NimrodVM):
|
||||
{.push stack_trace: off.}
|
||||
{.push stack_trace: off, profiler:off.}
|
||||
|
||||
proc initGC()
|
||||
when not defined(boehmgc) and not defined(useMalloc):
|
||||
@@ -1958,7 +1958,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
|
||||
## gets the stack trace associated with `e`, which is the stack that
|
||||
## lead to the ``raise`` statement. This is only works for debug builds.
|
||||
|
||||
{.push stack_trace: off.}
|
||||
{.push stack_trace: off, profiler:off.}
|
||||
when hostOS == "standalone":
|
||||
include "system/embedded"
|
||||
else:
|
||||
@@ -1997,7 +1997,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
|
||||
result = n.sons[n.len]
|
||||
|
||||
include "system/mmdisp"
|
||||
{.push stack_trace: off.}
|
||||
{.push stack_trace: off, profiler:off.}
|
||||
when hostOS != "standalone": include "system/sysstr"
|
||||
{.pop.}
|
||||
|
||||
@@ -2032,7 +2032,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
|
||||
var e = getCurrentException()
|
||||
return if e == nil: "" else: e.msg
|
||||
|
||||
{.push stack_trace: off.}
|
||||
{.push stack_trace: off, profiler:off.}
|
||||
when defined(endb):
|
||||
include "system/debugger"
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ var
|
||||
# list of exception handlers
|
||||
# a global variable for the root of all try blocks
|
||||
|
||||
{.push stacktrace: off.}
|
||||
{.push stacktrace: off, profiler:off.}
|
||||
proc nimBoolToStr(x: bool): string {.compilerproc.} =
|
||||
if x: result = "true"
|
||||
else: result = "false"
|
||||
|
||||
@@ -78,11 +78,11 @@ var
|
||||
when not defined(useNimRtl):
|
||||
InstantiateForRegion(gch.region)
|
||||
|
||||
proc acquire(gch: var TGcHeap) {.inline.} =
|
||||
template acquire(gch: TGcHeap) =
|
||||
when hasThreadSupport and hasSharedHeap:
|
||||
AcquireSys(HeapLock)
|
||||
|
||||
proc release(gch: var TGcHeap) {.inline.} =
|
||||
template release(gch: TGcHeap) =
|
||||
when hasThreadSupport and hasSharedHeap:
|
||||
releaseSys(HeapLock)
|
||||
|
||||
|
||||
@@ -18,9 +18,6 @@
|
||||
|
||||
{.push profiler: off.}
|
||||
|
||||
when not defined(getTicks):
|
||||
include "system/timers"
|
||||
|
||||
const
|
||||
MaxTraceLen = 20 # tracking the last 20 calls is enough
|
||||
|
||||
@@ -36,7 +33,7 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
|
||||
i = 0
|
||||
total = 0
|
||||
while it != nil and i <= high(st)-(firstCalls-1):
|
||||
# the (-1) is for a nil entry that marks where the '...' should occur
|
||||
# the (-1) is for the "..." entry
|
||||
st[i] = it.procname
|
||||
inc(i)
|
||||
inc(total)
|
||||
@@ -55,14 +52,14 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
|
||||
inc(i)
|
||||
b = b.prev
|
||||
|
||||
const
|
||||
SamplingInterval = 50_000
|
||||
# set this to change the default sampling interval
|
||||
var
|
||||
profilerHook*: TProfilerHook
|
||||
## set this variable to provide a procedure that implements a profiler in
|
||||
## user space. See the `nimprof` module for a reference implementation.
|
||||
SamplingInterval = 50_000
|
||||
# set this to change the default sampling interval
|
||||
gTicker = SamplingInterval
|
||||
interval: TNanos = 5_000_000 # 5ms
|
||||
gTicker {.threadvar.}: int
|
||||
|
||||
proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
|
||||
# 'noinline' so that 'nimProfile' does not perform the stack allocation
|
||||
@@ -71,31 +68,18 @@ proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
|
||||
captureStackTrace(framePtr, st)
|
||||
hook(st)
|
||||
|
||||
proc setProfilingInterval*(intervalInUs: int): TNanos =
|
||||
## set this to change the sampling interval. Default value is 5ms.
|
||||
interval = intervalInUs * 1000
|
||||
|
||||
var t0: TTicks
|
||||
|
||||
proc nimProfile() =
|
||||
## This is invoked by the compiler in every loop and on every proc entry!
|
||||
dec gTicker
|
||||
if gTicker == 0:
|
||||
gTicker = -1
|
||||
let t1 = getticks()
|
||||
if getticks() - t0 > interval:
|
||||
if not isNil(profilerHook):
|
||||
# disable recursive calls: XXX should use try..finally,
|
||||
# but that's too expensive!
|
||||
let oldHook = profilerHook
|
||||
profilerHook = nil
|
||||
callProfilerHook(oldHook)
|
||||
profilerHook = oldHook
|
||||
t0 = getticks()
|
||||
if not isNil(profilerHook):
|
||||
# disable recursive calls: XXX should use try..finally,
|
||||
# but that's too expensive!
|
||||
let oldHook = profilerHook
|
||||
profilerHook = nil
|
||||
callProfilerHook(oldHook)
|
||||
profilerHook = oldHook
|
||||
gTicker = SamplingInterval
|
||||
|
||||
proc stopProfiling*() =
|
||||
## call this to stop profiling; should be called by the client profiler.
|
||||
profilerHook = nil
|
||||
dec gTicker
|
||||
|
||||
{.pop.}
|
||||
|
||||
@@ -224,7 +224,7 @@ proc ReadBytes(f: TFile, a: var openarray[int8], start, len: int): int =
|
||||
proc ReadChars(f: TFile, a: var openarray[char], start, len: int): int =
|
||||
result = readBuffer(f, addr(a[start]), len)
|
||||
|
||||
{.push stackTrace:off.}
|
||||
{.push stackTrace:off, profiler:off.}
|
||||
proc writeBytes(f: TFile, a: openarray[int8], start, len: int): int =
|
||||
var x = cast[ptr array[0..1000_000_000, int8]](a)
|
||||
result = writeBuffer(f, addr(x[start]), len)
|
||||
|
||||
Reference in New Issue
Block a user