first version of a memory profiler

This commit is contained in:
Araq
2012-10-10 00:41:34 +02:00
parent cf06131dec
commit d43febf81e
8 changed files with 115 additions and 54 deletions

View File

@@ -20,7 +20,7 @@ import
when hasTinyCBackend:
import tccgen
when defined(profiler):
when defined(profiler) or defined(memProfiler):
{.hint: "Profiling support is turned on!".}
import nimprof

View File

@@ -28,6 +28,22 @@ the option ``--stackTrace:on`` is active! Unfortunately this means that a
profiling build is much slower than a release build.
Memory profiler
===============
You can also use ESTP as a memory profiler to see which stack traces allocate
the most memory and thus create the most GC pressure. It may also help to
find memory leaks. To activate the memory profiler you need to do:
* compile your program with the ``--profiler:off --stackTrace:on -d:memProfiler``
command line options
* import the ``nimprof`` module
* run your program as usual.
Define the symbol ``ignoreAllocationSize`` so that only the number of
allocations is counted and the sizes of the memory allocations do not matter.
Example results file
====================

View File

@@ -160,6 +160,7 @@ Define Effect
for further information.
``nodejs`` The EcmaScript target is actually ``node.js``.
``ssl`` Enables OpenSSL support for the sockets module.
``memProfiler`` Enables memory profiling for the native GC.
================== =========================================================

View File

@@ -11,14 +11,16 @@
## ``--profiler:on``. You only need to import this module to get a profiling
## report at program exit.
when not defined(profiler):
when not defined(profiler) and not defined(memProfiler):
{.warning: "Profiling support is turned off!".}
# We don't want to profile the profiling code ...
{.push profiler: off.}
import hashes, algorithm, strutils, tables, sets
include "system/timers"
when not defined(memProfiler):
include "system/timers"
const
withThreads = compileOption("threads")
@@ -47,15 +49,15 @@ var
maxChainLen = 0
totalCalls = 0
var
interval: TNanos = 5_000_000 - tickCountCorrection # 5ms
when not defined(memProfiler):
var interval: TNanos = 5_000_000 - tickCountCorrection # 5ms
proc setSamplingFrequency*(intervalInUs: int) =
## set this to change the sampling frequency. Default value is 5ms.
## Set it to 0 to disable time based profiling; it uses an imprecise
## instruction count measure instead then.
if intervalInUs <= 0: interval = 0
else: interval = intervalInUs * 1000 - tickCountCorrection
proc setSamplingFrequency*(intervalInUs: int) =
## set this to change the sampling frequency. Default value is 5ms.
## Set it to 0 to disable time based profiling; it uses an imprecise
## instruction count measure instead then.
if intervalInUs <= 0: interval = 0
else: interval = intervalInUs * 1000 - tickCountCorrection
when withThreads:
var
@@ -63,7 +65,7 @@ when withThreads:
InitLock profilingLock
proc hookAux(st: TStackTrace) =
proc hookAux(st: TStackTrace, costs: int) =
# this is quite performance sensitive!
when withThreads: Acquire profilingLock
inc totalCalls
@@ -79,13 +81,13 @@ proc hookAux(st: TStackTrace) =
while probes >= 0:
if profileData[h].st == st:
# wow, same entry found:
inc profileData[h].total
inc profileData[h].total, costs
return
if profileData[minIdx].total < profileData[h].total:
minIdx = h
h = ((5 * h) + 1) and high(profileData)
dec probes
profileData[minIdx].total = 1
profileData[minIdx].total = costs
profileData[minIdx].st = st
else:
var chain = 0
@@ -93,28 +95,45 @@ proc hookAux(st: TStackTrace) =
if profileData[h] == nil:
profileData[h] = cast[ptr TProfileEntry](
allocShared0(sizeof(TProfileEntry)))
profileData[h].total = 1
profileData[h].total = costs
profileData[h].st = st
dec emptySlots
break
if profileData[h].st == st:
# wow, same entry found:
inc profileData[h].total
inc profileData[h].total, costs
break
h = ((5 * h) + 1) and high(profileData)
inc chain
maxChainLen = max(maxChainLen, chain)
when withThreads: Release profilingLock
var
t0 {.threadvar.}: TTicks
when defined(memProfiler):
const
SamplingInterval = 50_000
var
gTicker {.threadvar.}: int
proc hook(st: TStackTrace) {.nimcall.} =
if interval == 0:
hookAux(st)
elif getticks() - t0 > interval:
hookAux(st)
t0 = getticks()
proc hook(st: TStackTrace, size: int) {.nimcall.} =
if gTicker == 0:
gTicker = -1
when defined(ignoreAllocationSize):
hookAux(st, 1)
else:
hookAux(st, size)
gTicker = SamplingInterval
dec gTicker
else:
var
t0 {.threadvar.}: TTicks
proc hook(st: TStackTrace) {.nimcall.} =
if interval == 0:
hookAux(st, 1)
elif getticks() - t0 > interval:
hookAux(st, 1)
t0 = getticks()
proc getTotal(x: ptr TProfileEntry): int =
result = if isNil(x): 0 else: x.total

View File

@@ -2045,7 +2045,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
when defined(endb):
include "system/debugger"
when defined(profiler):
when defined(profiler) or defined(memProfiler):
include "system/profiler"
{.pop.} # stacktrace

View File

@@ -28,6 +28,8 @@ const
when withRealTime and not defined(getTicks):
include "system/timers"
when defined(memProfiler):
proc nimProfile(requestedSize: int)
const
rcIncrement = 0b1000 # so that lowest 3 bits are not touched
@@ -431,12 +433,15 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
result = rawNewObj(typ, size, gch)
zeroMem(result, size)
when defined(memProfiler): nimProfile(size)
proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
# `newObj` already uses locks, so no need for them here.
result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
result = newObj(typ, size)
cast[PGenericSeq](result).len = len
cast[PGenericSeq](result).reserved = len
when defined(memProfiler): nimProfile(size)
proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
# generates a new object and sets its reference counter to 1
@@ -463,11 +468,14 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
result = cellToUsr(res)
zeroMem(result, size)
sysAssert(allocInv(gch.region), "newObjRC1 end")
when defined(memProfiler): nimProfile(size)
proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
result = newObjRC1(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
result = newObjRC1(typ, size)
cast[PGenericSeq](result).len = len
cast[PGenericSeq](result).reserved = len
when defined(memProfiler): nimProfile(size)
proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
acquire(gch)
@@ -512,6 +520,7 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
release(gch)
result = cellToUsr(res)
sysAssert(allocInv(gch.region), "growObj end")
when defined(memProfiler): nimProfile(newsize-oldsize)
proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
result = growObj(old, newsize, gch)

View File

@@ -49,34 +49,51 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
inc(i)
b = b.prev
const
SamplingInterval = 50_000
# set this to change the default sampling interval
var
profilerHook*: TProfilerHook
## set this variable to provide a procedure that implements a profiler in
## user space. See the `nimprof` module for a reference implementation.
gTicker {.threadvar.}: int
when defined(memProfiler):
type
TMemProfilerHook* = proc (st: TStackTrace, requestedSize: int) {.nimcall.}
var
profilerHook*: TMemProfilerHook
## set this variable to provide a procedure that implements a profiler in
## user space. See the `nimprof` module for a reference implementation.
proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
# 'noinline' so that 'nimProfile' does not perform the stack allocation
# in the common case.
var st: TStackTrace
captureStackTrace(framePtr, st)
hook(st)
proc callProfilerHook(hook: TMemProfilerHook, requestedSize: int) =
var st: TStackTrace
captureStackTrace(framePtr, st)
hook(st, requestedSize)
proc nimProfile() =
## This is invoked by the compiler in every loop and on every proc entry!
if gTicker == 0:
gTicker = -1
proc nimProfile(requestedSize: int) =
if not isNil(profilerHook):
# disable recursive calls: XXX should use try..finally,
# but that's too expensive!
let oldHook = profilerHook
profilerHook = nil
callProfilerHook(oldHook)
profilerHook = oldHook
gTicker = SamplingInterval
dec gTicker
callProfilerHook(profilerHook, requestedSize)
else:
const
SamplingInterval = 50_000
# set this to change the default sampling interval
var
profilerHook*: TProfilerHook
## set this variable to provide a procedure that implements a profiler in
## user space. See the `nimprof` module for a reference implementation.
gTicker {.threadvar.}: int
proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
# 'noinline' so that 'nimProfile' does not perform the stack allocation
# in the common case.
var st: TStackTrace
captureStackTrace(framePtr, st)
hook(st)
proc nimProfile() =
## This is invoked by the compiler in every loop and on every proc entry!
if gTicker == 0:
gTicker = -1
if not isNil(profilerHook):
# disable recursive calls: XXX should use try..finally,
# but that's too expensive!
let oldHook = profilerHook
profilerHook = nil
callProfilerHook(oldHook)
profilerHook = oldHook
gTicker = SamplingInterval
dec gTicker
{.pop.}

View File

@@ -1,7 +1,6 @@
version 0.9.2
=============
- memory profiler
- implement the compiler as a service
- implement for loop transformation for first class iterators
- ``=`` should be overloadable; requires specialization for ``=``