mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-31 18:32:11 +00:00
implemented a stack trace profiler
This commit is contained in:
@@ -265,6 +265,10 @@ proc genWhileStmt(p: BProc, t: PNode) =
|
||||
let label = assignLabel(p.blocks[p.breakIdx])
|
||||
lineF(p, cpsStmts, "if (!$1) goto $2;$n", [rdLoc(a), label])
|
||||
genStmts(p, t.sons[1])
|
||||
|
||||
if optProfiler in p.options:
|
||||
# invoke at loop body exit:
|
||||
lineCg(p, cpsStmts, "#nimProfile();$n")
|
||||
endBlock(p)
|
||||
|
||||
dec(p.withinLoop)
|
||||
|
||||
@@ -680,27 +680,14 @@ proc genProcAux(m: BModule, prc: PSym) =
|
||||
app(generatedProc, initFrame(p, procname, filename))
|
||||
else:
|
||||
app(generatedProc, p.s(cpsLocals))
|
||||
if (optProfiler in prc.options) and (gCmd != cmdCompileToLLVM):
|
||||
if gProcProfile >= 64 * 1024:
|
||||
InternalError(prc.info, "too many procedures for profiling")
|
||||
discard cgsym(m, "profileData")
|
||||
appf(p.s(cpsLocals), "\tticks NIM_profilingStart;$n")
|
||||
if prc.loc.a < 0:
|
||||
appf(m.s[cfsDebugInit], "\tprofileData[$1].procname = $2;$n", [
|
||||
toRope(gProcProfile),
|
||||
makeCString(prc.name.s)])
|
||||
prc.loc.a = gProcProfile
|
||||
inc(gProcProfile)
|
||||
prepend(p.s(cpsInit), ropef("\tNIM_profilingStart = getticks();$n"))
|
||||
if (optProfiler in prc.options) and (gCmd != cmdCompileToLLVM):
|
||||
# invoke at proc entry for recursion:
|
||||
appcg(p, cpsInit, "\t#nimProfile();$n", [])
|
||||
app(generatedProc, p.s(cpsInit))
|
||||
app(generatedProc, p.s(cpsStmts))
|
||||
if p.beforeRetNeeded: appf(generatedProc, "\tBeforeRet: ;$n")
|
||||
app(generatedProc, deinitGCFrame(p))
|
||||
if optStackTrace in prc.options: app(generatedProc, deinitFrame(p))
|
||||
if (optProfiler in prc.options) and (gCmd != cmdCompileToLLVM):
|
||||
appf(generatedProc,
|
||||
"\tprofileData[$1].total += elapsed(getticks(), NIM_profilingStart);$n",
|
||||
[toRope(prc.loc.a)])
|
||||
app(generatedProc, returnStmt)
|
||||
appf(generatedProc, "}$N")
|
||||
app(m.s[cfsProcs], generatedProc)
|
||||
|
||||
@@ -115,7 +115,6 @@ type
|
||||
var
|
||||
mainModProcs*, mainModInit*, mainDatInit*: PRope # parts of the main module
|
||||
gMapping*: PRope # the generated mapping file (if requested)
|
||||
gProcProfile*: Natural # proc profile counter
|
||||
gPendingModules*: seq[BModule] = @[] # list of modules that are not
|
||||
# finished with code generation
|
||||
gModules*: seq[BModule] = @[] # list of all compiled modules
|
||||
|
||||
@@ -20,6 +20,10 @@ import
|
||||
when hasTinyCBackend:
|
||||
import tccgen
|
||||
|
||||
when defined(profiler):
|
||||
{.hint: "Profiling support is turned on!".}
|
||||
import nimprof
|
||||
|
||||
var
|
||||
arguments: string = "" # the arguments to be passed to the program that
|
||||
# should be run
|
||||
|
||||
@@ -294,6 +294,7 @@ proc crcFromRopeAux(r: PRope, startVal: TCrc32): TCrc32 =
|
||||
result = crcFromRopeAux(r.right, result)
|
||||
|
||||
proc newCrcFromRopeAux(r: PRope, startVal: TCrc32): TCrc32 =
|
||||
# XXX profiling shows this is actually expensive
|
||||
var stack: TRopeSeq = @[r]
|
||||
result = startVal
|
||||
while len(stack) > 0:
|
||||
|
||||
125
lib/pure/nimprof.nim
Normal file
125
lib/pure/nimprof.nim
Normal file
@@ -0,0 +1,125 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2012 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## Profiling support for Nimrod. This is an embedded profiler that requires
|
||||
## ``--profiler:on``. You only need to import this module to get a profiling
|
||||
## report at program exit.
|
||||
|
||||
when not defined(profiler):
|
||||
{.error: "Profiling support is turned off!".}
|
||||
|
||||
# We don't want to profile the profiling code ...
|
||||
{.push profiler: off.}
|
||||
|
||||
import hashes, algorithm, strutils
|
||||
|
||||
# We use a simple hash table of bounded size to keep track of the stack traces:
|
||||
type
|
||||
TProfileEntry = object
|
||||
total: int
|
||||
st: system.TStackTrace
|
||||
TProfileData = array [0..64*1024-1, ref TProfileEntry]
|
||||
|
||||
proc `==`(a, b: TStackTrace): bool =
|
||||
for i in 0 .. high(a):
|
||||
if a[i] != b[i]: return false
|
||||
result = true
|
||||
|
||||
# XXX extract this data structure; it is generally useful ;-)
|
||||
var
|
||||
profileData: TProfileData
|
||||
emptySlots = profileData.len * 3 div 2
|
||||
maxChainLen = 0
|
||||
totalCalls = 0
|
||||
|
||||
proc hook(st: system.TStackTrace) {.nimcall.} =
|
||||
# this is quite performance sensitive!
|
||||
inc totalCalls
|
||||
var last = high(st)
|
||||
while last > 0 and isNil(st[last]): dec last
|
||||
var h = hash(pointer(st[last])) and high(profileData)
|
||||
|
||||
# we use probing for maxChainLen entries and replace the encountered entry
|
||||
# with the minimal 'total' value:
|
||||
if emptySlots == 0:
|
||||
var minIdx = h
|
||||
var probes = maxChainLen
|
||||
while probes >= 0:
|
||||
if profileData[h].st == st:
|
||||
# wow, same entry found:
|
||||
inc profileData[h].total
|
||||
return
|
||||
if profileData[minIdx].total < profileData[h].total:
|
||||
minIdx = h
|
||||
h = ((5 * h) + 1) and high(profileData)
|
||||
dec probes
|
||||
profileData[minIdx].total = 1
|
||||
profileData[minIdx].st = st
|
||||
else:
|
||||
var chain = 0
|
||||
while true:
|
||||
if profileData[h] == nil:
|
||||
GC_disable()
|
||||
new profileData[h]
|
||||
GC_enable()
|
||||
profileData[h].total = 1
|
||||
profileData[h].st = st
|
||||
dec emptySlots
|
||||
maxChainLen = max(maxChainLen, chain)
|
||||
return
|
||||
if profileData[h].st == st:
|
||||
# wow, same entry found:
|
||||
inc profileData[h].total
|
||||
maxChainLen = max(maxChainLen, chain)
|
||||
return
|
||||
h = ((5 * h) + 1) and high(profileData)
|
||||
inc chain
|
||||
|
||||
proc getTotal(x: ref TProfileEntry): int =
|
||||
result = if isNil(x): 0 else: x.total
|
||||
|
||||
proc cmpEntries(a, b: ref TProfileEntry): int =
|
||||
result = b.getTotal - a.getTotal
|
||||
|
||||
proc writeProfile() {.noconv.} =
|
||||
stopProfiling()
|
||||
const filename = "profile_results"
|
||||
var f: TFile
|
||||
var j = 1
|
||||
while open(f, filename & $j & ".txt"):
|
||||
close(f)
|
||||
inc(j)
|
||||
let filen = filename & $j & ".txt"
|
||||
echo "writing ", filen, "..."
|
||||
if open(f, filen, fmWrite):
|
||||
sort(profileData, cmpEntries)
|
||||
writeln(f, "total executions of each stack trace:")
|
||||
var entries = 0
|
||||
for i in 0..high(profileData):
|
||||
if profileData[i] != nil: inc entries
|
||||
|
||||
var sum = 0
|
||||
# only write the first 100 entries:
|
||||
for i in 0..min(100, high(profileData)):
|
||||
if profileData[i] != nil and profileData[i].total > 1:
|
||||
inc sum, profileData[i].total
|
||||
writeln(f, "Entry: ", i, "/", entries, " Calls: ",
|
||||
profileData[i].total, "/", totalCalls, " [sum: ", sum, "; ",
|
||||
formatFloat(sum / totalCalls * 100.0, ffDefault, 2), "%]")
|
||||
for ii in 0..high(TStackTrace):
|
||||
let procname = profileData[i].st[ii]
|
||||
if isNil(procname): break
|
||||
writeln(f, " ", procname)
|
||||
close(f)
|
||||
echo "... done"
|
||||
|
||||
system.profilerHook = hook
|
||||
addQuitProc(writeProfile)
|
||||
|
||||
{.pop.}
|
||||
@@ -829,7 +829,8 @@ when taintMode:
|
||||
else:
|
||||
type TaintedString* = string
|
||||
|
||||
|
||||
when defined(profiler):
|
||||
proc nimProfile() {.compilerProc, noinline.}
|
||||
when hasThreadSupport:
|
||||
{.pragma: rtlThreadVar, threadvar.}
|
||||
else:
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
# TODO:
|
||||
# - eliminate "used" field
|
||||
# - make searching for block O(1)
|
||||
{.push profiler:off.}
|
||||
|
||||
# ------------ platform specific chunk allocation code -----------------------
|
||||
|
||||
@@ -794,3 +795,4 @@ template InstantiateForRegion(allocator: expr) =
|
||||
else:
|
||||
result = realloc(p, newsize)
|
||||
|
||||
{.pop.}
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
|
||||
# Garbage Collector
|
||||
#
|
||||
# The basic algorithm is *Deferrent Reference Counting* with cycle detection.
|
||||
@@ -17,6 +16,7 @@
|
||||
# Special care has been taken to avoid recursion as far as possible to avoid
|
||||
# stack overflows when traversing deep datastructures. It is well-suited
|
||||
# for soft real time applications (like games).
|
||||
{.push profiler:off.}
|
||||
|
||||
const
|
||||
CycleIncrease = 2 # is a multiplicative increase
|
||||
@@ -26,7 +26,7 @@ const
|
||||
# this seems to be a good value
|
||||
withRealTime = defined(useRealtimeGC)
|
||||
|
||||
when withRealTime:
|
||||
when withRealTime and not defined(getTicks):
|
||||
include "system/timers"
|
||||
|
||||
const
|
||||
@@ -426,6 +426,8 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
|
||||
result = cellToUsr(res)
|
||||
sysAssert(allocInv(gch.region), "rawNewObj end")
|
||||
|
||||
{.pop.}
|
||||
|
||||
proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
|
||||
result = rawNewObj(typ, size, gch)
|
||||
zeroMem(result, size)
|
||||
@@ -514,6 +516,8 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
|
||||
proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
|
||||
result = growObj(old, newsize, gch)
|
||||
|
||||
{.push profiler:off.}
|
||||
|
||||
# ---------------- cycle collector -------------------------------------------
|
||||
|
||||
proc doOperation(p: pointer, op: TWalkOp) =
|
||||
@@ -917,3 +921,5 @@ when not defined(useNimRtl):
|
||||
"[GC] max pause time [ms]: " & $(gch.stat.maxPause div 1000_000)
|
||||
when traceGC: writeLeakage()
|
||||
GC_enable()
|
||||
|
||||
{.pop.}
|
||||
|
||||
@@ -8,54 +8,100 @@
|
||||
#
|
||||
|
||||
# This file implements the Nimrod profiler. The profiler needs support by the
|
||||
# code generator.
|
||||
# code generator. The idea is to inject the instruction stream
|
||||
# with 'nimProfile()' calls. These calls are injected at every loop end
|
||||
# (except perhaps loops that have no side-effects). At every Nth call a
|
||||
# stack trace is taken. A stack tace is a list of cstrings. We have a count
|
||||
# table of those.
|
||||
#
|
||||
# The nice thing about this profiler is that it's completely time independent!
|
||||
|
||||
{.push profiler: off.}
|
||||
|
||||
when not defined(getTicks):
|
||||
include "system/timers"
|
||||
|
||||
const
|
||||
MaxTraceLen = 20 # tracking the last 20 calls is enough
|
||||
|
||||
type
|
||||
TProfileData {.compilerproc, final.} = object
|
||||
procname: cstring
|
||||
total: float
|
||||
TStackTrace* = array [0..MaxTraceLen-1, cstring]
|
||||
TProfilerHook* = proc (st: TStackTrace) {.nimcall.}
|
||||
|
||||
proc captureStackTrace(f: PFrame, st: var TStackTrace) =
|
||||
const
|
||||
firstCalls = 5
|
||||
var
|
||||
it = f
|
||||
i = 0
|
||||
total = 0
|
||||
while it != nil and i <= high(st)-(firstCalls-1):
|
||||
# the (-1) is for a nil entry that marks where the '...' should occur
|
||||
st[i] = it.procname
|
||||
inc(i)
|
||||
inc(total)
|
||||
it = it.prev
|
||||
var b = it
|
||||
while it != nil:
|
||||
inc(total)
|
||||
it = it.prev
|
||||
for j in 1..total-i-(firstCalls-1):
|
||||
if b != nil: b = b.prev
|
||||
if total != i:
|
||||
st[i] = "..."
|
||||
inc(i)
|
||||
while b != nil and i <= high(st):
|
||||
st[i] = b.procname
|
||||
inc(i)
|
||||
b = b.prev
|
||||
|
||||
var
|
||||
profileData {.compilerproc.}: array [0..64*1024-1, TProfileData]
|
||||
profilerHook*: TProfilerHook
|
||||
## set this variable to provide a procedure that implements a profiler in
|
||||
## user space. See the `nimprof` module for a reference implementation.
|
||||
SamplingInterval = 50_000
|
||||
# set this to change the default sampling interval
|
||||
gTicker = SamplingInterval
|
||||
interval: TNanos = 5_000_000 # 5ms
|
||||
|
||||
proc sortProfile(a: var array[0..64*1024-1, TProfileData], N: int) =
|
||||
# we use shellsort here; fast enough and simple
|
||||
var h = 1
|
||||
while true:
|
||||
h = 3 * h + 1
|
||||
if h > N: break
|
||||
while true:
|
||||
h = h div 3
|
||||
for i in countup(h, N - 1):
|
||||
var v = a[i]
|
||||
var j = i
|
||||
while a[j-h].total <= v.total:
|
||||
a[j] = a[j-h]
|
||||
j = j-h
|
||||
if j < h: break
|
||||
a[j] = v
|
||||
if h == 1: break
|
||||
proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
|
||||
# 'noinline' so that 'nimProfile' does not perform the stack allocation
|
||||
# in the common case.
|
||||
var st: TStackTrace
|
||||
captureStackTrace(framePtr, st)
|
||||
hook(st)
|
||||
|
||||
proc writeProfile() {.noconv.} =
|
||||
const filename = "profile_results"
|
||||
var i = 0
|
||||
var f: TFile
|
||||
var j = 1
|
||||
while open(f, filename & $j & ".txt"):
|
||||
close(f)
|
||||
inc(j)
|
||||
if open(f, filename & $j & ".txt", fmWrite):
|
||||
var N = 0
|
||||
# we have to compute the actual length of the array:
|
||||
while profileData[N].procname != nil: inc(N)
|
||||
sortProfile(profileData, N)
|
||||
writeln(f, "total running time of each proc" &
|
||||
" (interpret these numbers relatively)")
|
||||
while profileData[i].procname != nil:
|
||||
write(f, profileData[i].procname)
|
||||
write(f, ": ")
|
||||
writeln(f, profileData[i].total)
|
||||
inc(i)
|
||||
close(f)
|
||||
proc setProfilingInterval*(intervalInUs: int): TNanos =
|
||||
## set this to change the sampling interval. Default value is 5ms.
|
||||
interval = intervalInUs * 1000
|
||||
|
||||
addQuitProc(writeProfile)
|
||||
var t0: TTicks
|
||||
|
||||
var usefulCall* = 0
|
||||
var uselessCall* = 0
|
||||
|
||||
proc nimProfile() =
|
||||
## This is invoked by the compiler in every loop and on every proc entry!
|
||||
dec gTicker
|
||||
if gTicker == 0:
|
||||
gTicker = -1
|
||||
let t1 = getticks()
|
||||
if getticks() - t0 > interval:
|
||||
inc usefulCall
|
||||
if not isNil(profilerHook):
|
||||
# disable recursive calls: XXX should use try..finally,
|
||||
# but that's too expensive!
|
||||
let oldHook = profilerHook
|
||||
profilerHook = nil
|
||||
callProfilerHook(oldHook)
|
||||
profilerHook = oldHook
|
||||
t0 = getticks()
|
||||
else:
|
||||
inc uselessCall
|
||||
gTicker = SamplingInterval
|
||||
|
||||
proc stopProfiling*() =
|
||||
## call this to stop profiling; should be called by the client profiler.
|
||||
profilerHook = nil
|
||||
|
||||
{.pop.}
|
||||
|
||||
3
todo.txt
3
todo.txt
@@ -1,7 +1,6 @@
|
||||
version 0.9.0
|
||||
=============
|
||||
|
||||
- make 'm: stmt' use overloading resolution
|
||||
- make 'bind' default for templates and introduce 'mixin'
|
||||
|
||||
- implicit deref for parameter matching
|
||||
@@ -22,6 +21,7 @@ Bugs
|
||||
- bug: the parser is not strict enough with newlines: 'echo "a" echo "b"'
|
||||
compiles
|
||||
- bug: blocks can "export" an identifier but the CCG generates {} for them ...
|
||||
- bug: what if we pass an iterator to a proc var? iterators use tyProc too ...
|
||||
|
||||
|
||||
version 0.9.XX
|
||||
@@ -38,6 +38,7 @@ version 0.9.XX
|
||||
echo a
|
||||
echo b)
|
||||
|
||||
- make 'm: stmt' use overloading resolution
|
||||
- implement "closure tuple consists of a single 'ref'" optimization
|
||||
- implement for loop transformation for first class iterators
|
||||
- JS gen:
|
||||
|
||||
Reference in New Issue
Block a user