mirror of
https://github.com/nim-lang/Nim.git
synced 2026-06-04 02:44:44 +00:00
first version of a memory profiler
This commit is contained in:
@@ -20,7 +20,7 @@ import
|
||||
when hasTinyCBackend:
|
||||
import tccgen
|
||||
|
||||
when defined(profiler):
|
||||
when defined(profiler) or defined(memProfiler):
|
||||
{.hint: "Profiling support is turned on!".}
|
||||
import nimprof
|
||||
|
||||
|
||||
16
doc/estp.txt
16
doc/estp.txt
@@ -28,6 +28,22 @@ the option ``--stackTrace:on`` is active! Unfortunately this means that a
|
||||
profiling build is much slower than a release build.
|
||||
|
||||
|
||||
Memory profiler
|
||||
===============
|
||||
|
||||
You can also use ESTP as a memory profiler to see which stack traces allocate
|
||||
the most memory and thus create the most GC pressure. It may also help to
|
||||
find memory leaks. To activate the memory profiler you need to do:
|
||||
|
||||
* compile your program with the ``--profiler:off --stackTrace:on -d:memProfiler``
|
||||
command line options
|
||||
* import the ``nimprof`` module
|
||||
* run your program as usual.
|
||||
|
||||
Define the symbol ``ignoreAllocationSize`` so that only the number of
|
||||
allocations is counted and the sizes of the memory allocations do not matter.
|
||||
|
||||
|
||||
Example results file
|
||||
====================
|
||||
|
||||
|
||||
@@ -160,6 +160,7 @@ Define Effect
|
||||
for further information.
|
||||
``nodejs`` The EcmaScript target is actually ``node.js``.
|
||||
``ssl`` Enables OpenSSL support for the sockets module.
|
||||
``memProfiler`` Enables memory profiling for the native GC.
|
||||
================== =========================================================
|
||||
|
||||
|
||||
|
||||
@@ -11,14 +11,16 @@
|
||||
## ``--profiler:on``. You only need to import this module to get a profiling
|
||||
## report at program exit.
|
||||
|
||||
when not defined(profiler):
|
||||
when not defined(profiler) and not defined(memProfiler):
|
||||
{.warning: "Profiling support is turned off!".}
|
||||
|
||||
# We don't want to profile the profiling code ...
|
||||
{.push profiler: off.}
|
||||
|
||||
import hashes, algorithm, strutils, tables, sets
|
||||
include "system/timers"
|
||||
|
||||
when not defined(memProfiler):
|
||||
include "system/timers"
|
||||
|
||||
const
|
||||
withThreads = compileOption("threads")
|
||||
@@ -47,15 +49,15 @@ var
|
||||
maxChainLen = 0
|
||||
totalCalls = 0
|
||||
|
||||
var
|
||||
interval: TNanos = 5_000_000 - tickCountCorrection # 5ms
|
||||
when not defined(memProfiler):
|
||||
var interval: TNanos = 5_000_000 - tickCountCorrection # 5ms
|
||||
|
||||
proc setSamplingFrequency*(intervalInUs: int) =
|
||||
## set this to change the sampling frequency. Default value is 5ms.
|
||||
## Set it to 0 to disable time based profiling; it uses an imprecise
|
||||
## instruction count measure instead then.
|
||||
if intervalInUs <= 0: interval = 0
|
||||
else: interval = intervalInUs * 1000 - tickCountCorrection
|
||||
proc setSamplingFrequency*(intervalInUs: int) =
|
||||
## set this to change the sampling frequency. Default value is 5ms.
|
||||
## Set it to 0 to disable time based profiling; it uses an imprecise
|
||||
## instruction count measure instead then.
|
||||
if intervalInUs <= 0: interval = 0
|
||||
else: interval = intervalInUs * 1000 - tickCountCorrection
|
||||
|
||||
when withThreads:
|
||||
var
|
||||
@@ -63,7 +65,7 @@ when withThreads:
|
||||
|
||||
InitLock profilingLock
|
||||
|
||||
proc hookAux(st: TStackTrace) =
|
||||
proc hookAux(st: TStackTrace, costs: int) =
|
||||
# this is quite performance sensitive!
|
||||
when withThreads: Acquire profilingLock
|
||||
inc totalCalls
|
||||
@@ -79,13 +81,13 @@ proc hookAux(st: TStackTrace) =
|
||||
while probes >= 0:
|
||||
if profileData[h].st == st:
|
||||
# wow, same entry found:
|
||||
inc profileData[h].total
|
||||
inc profileData[h].total, costs
|
||||
return
|
||||
if profileData[minIdx].total < profileData[h].total:
|
||||
minIdx = h
|
||||
h = ((5 * h) + 1) and high(profileData)
|
||||
dec probes
|
||||
profileData[minIdx].total = 1
|
||||
profileData[minIdx].total = costs
|
||||
profileData[minIdx].st = st
|
||||
else:
|
||||
var chain = 0
|
||||
@@ -93,28 +95,45 @@ proc hookAux(st: TStackTrace) =
|
||||
if profileData[h] == nil:
|
||||
profileData[h] = cast[ptr TProfileEntry](
|
||||
allocShared0(sizeof(TProfileEntry)))
|
||||
profileData[h].total = 1
|
||||
profileData[h].total = costs
|
||||
profileData[h].st = st
|
||||
dec emptySlots
|
||||
break
|
||||
if profileData[h].st == st:
|
||||
# wow, same entry found:
|
||||
inc profileData[h].total
|
||||
inc profileData[h].total, costs
|
||||
break
|
||||
h = ((5 * h) + 1) and high(profileData)
|
||||
inc chain
|
||||
maxChainLen = max(maxChainLen, chain)
|
||||
when withThreads: Release profilingLock
|
||||
|
||||
var
|
||||
t0 {.threadvar.}: TTicks
|
||||
when defined(memProfiler):
|
||||
const
|
||||
SamplingInterval = 50_000
|
||||
var
|
||||
gTicker {.threadvar.}: int
|
||||
|
||||
proc hook(st: TStackTrace) {.nimcall.} =
|
||||
if interval == 0:
|
||||
hookAux(st)
|
||||
elif getticks() - t0 > interval:
|
||||
hookAux(st)
|
||||
t0 = getticks()
|
||||
proc hook(st: TStackTrace, size: int) {.nimcall.} =
|
||||
if gTicker == 0:
|
||||
gTicker = -1
|
||||
when defined(ignoreAllocationSize):
|
||||
hookAux(st, 1)
|
||||
else:
|
||||
hookAux(st, size)
|
||||
gTicker = SamplingInterval
|
||||
dec gTicker
|
||||
|
||||
else:
|
||||
var
|
||||
t0 {.threadvar.}: TTicks
|
||||
|
||||
proc hook(st: TStackTrace) {.nimcall.} =
|
||||
if interval == 0:
|
||||
hookAux(st, 1)
|
||||
elif getticks() - t0 > interval:
|
||||
hookAux(st, 1)
|
||||
t0 = getticks()
|
||||
|
||||
proc getTotal(x: ptr TProfileEntry): int =
|
||||
result = if isNil(x): 0 else: x.total
|
||||
|
||||
@@ -2045,7 +2045,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
|
||||
when defined(endb):
|
||||
include "system/debugger"
|
||||
|
||||
when defined(profiler):
|
||||
when defined(profiler) or defined(memProfiler):
|
||||
include "system/profiler"
|
||||
{.pop.} # stacktrace
|
||||
|
||||
|
||||
@@ -28,6 +28,8 @@ const
|
||||
|
||||
when withRealTime and not defined(getTicks):
|
||||
include "system/timers"
|
||||
when defined(memProfiler):
|
||||
proc nimProfile(requestedSize: int)
|
||||
|
||||
const
|
||||
rcIncrement = 0b1000 # so that lowest 3 bits are not touched
|
||||
@@ -431,12 +433,15 @@ proc rawNewObj(typ: PNimType, size: int, gch: var TGcHeap): pointer =
|
||||
proc newObj(typ: PNimType, size: int): pointer {.compilerRtl.} =
|
||||
result = rawNewObj(typ, size, gch)
|
||||
zeroMem(result, size)
|
||||
when defined(memProfiler): nimProfile(size)
|
||||
|
||||
proc newSeq(typ: PNimType, len: int): pointer {.compilerRtl.} =
|
||||
# `newObj` already uses locks, so no need for them here.
|
||||
result = newObj(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
|
||||
let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
|
||||
result = newObj(typ, size)
|
||||
cast[PGenericSeq](result).len = len
|
||||
cast[PGenericSeq](result).reserved = len
|
||||
when defined(memProfiler): nimProfile(size)
|
||||
|
||||
proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
|
||||
# generates a new object and sets its reference counter to 1
|
||||
@@ -463,11 +468,14 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl.} =
|
||||
result = cellToUsr(res)
|
||||
zeroMem(result, size)
|
||||
sysAssert(allocInv(gch.region), "newObjRC1 end")
|
||||
when defined(memProfiler): nimProfile(size)
|
||||
|
||||
proc newSeqRC1(typ: PNimType, len: int): pointer {.compilerRtl.} =
|
||||
result = newObjRC1(typ, addInt(mulInt(len, typ.base.size), GenericSeqSize))
|
||||
let size = addInt(mulInt(len, typ.base.size), GenericSeqSize)
|
||||
result = newObjRC1(typ, size)
|
||||
cast[PGenericSeq](result).len = len
|
||||
cast[PGenericSeq](result).reserved = len
|
||||
when defined(memProfiler): nimProfile(size)
|
||||
|
||||
proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
|
||||
acquire(gch)
|
||||
@@ -512,6 +520,7 @@ proc growObj(old: pointer, newsize: int, gch: var TGcHeap): pointer =
|
||||
release(gch)
|
||||
result = cellToUsr(res)
|
||||
sysAssert(allocInv(gch.region), "growObj end")
|
||||
when defined(memProfiler): nimProfile(newsize-oldsize)
|
||||
|
||||
proc growObj(old: pointer, newsize: int): pointer {.rtl.} =
|
||||
result = growObj(old, newsize, gch)
|
||||
|
||||
@@ -49,34 +49,51 @@ proc captureStackTrace(f: PFrame, st: var TStackTrace) =
|
||||
inc(i)
|
||||
b = b.prev
|
||||
|
||||
const
|
||||
SamplingInterval = 50_000
|
||||
# set this to change the default sampling interval
|
||||
var
|
||||
profilerHook*: TProfilerHook
|
||||
## set this variable to provide a procedure that implements a profiler in
|
||||
## user space. See the `nimprof` module for a reference implementation.
|
||||
gTicker {.threadvar.}: int
|
||||
when defined(memProfiler):
|
||||
type
|
||||
TMemProfilerHook* = proc (st: TStackTrace, requestedSize: int) {.nimcall.}
|
||||
var
|
||||
profilerHook*: TMemProfilerHook
|
||||
## set this variable to provide a procedure that implements a profiler in
|
||||
## user space. See the `nimprof` module for a reference implementation.
|
||||
|
||||
proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
|
||||
# 'noinline' so that 'nimProfile' does not perform the stack allocation
|
||||
# in the common case.
|
||||
var st: TStackTrace
|
||||
captureStackTrace(framePtr, st)
|
||||
hook(st)
|
||||
proc callProfilerHook(hook: TMemProfilerHook, requestedSize: int) =
|
||||
var st: TStackTrace
|
||||
captureStackTrace(framePtr, st)
|
||||
hook(st, requestedSize)
|
||||
|
||||
proc nimProfile() =
|
||||
## This is invoked by the compiler in every loop and on every proc entry!
|
||||
if gTicker == 0:
|
||||
gTicker = -1
|
||||
proc nimProfile(requestedSize: int) =
|
||||
if not isNil(profilerHook):
|
||||
# disable recursive calls: XXX should use try..finally,
|
||||
# but that's too expensive!
|
||||
let oldHook = profilerHook
|
||||
profilerHook = nil
|
||||
callProfilerHook(oldHook)
|
||||
profilerHook = oldHook
|
||||
gTicker = SamplingInterval
|
||||
dec gTicker
|
||||
callProfilerHook(profilerHook, requestedSize)
|
||||
else:
|
||||
const
|
||||
SamplingInterval = 50_000
|
||||
# set this to change the default sampling interval
|
||||
var
|
||||
profilerHook*: TProfilerHook
|
||||
## set this variable to provide a procedure that implements a profiler in
|
||||
## user space. See the `nimprof` module for a reference implementation.
|
||||
gTicker {.threadvar.}: int
|
||||
|
||||
proc callProfilerHook(hook: TProfilerHook) {.noinline.} =
|
||||
# 'noinline' so that 'nimProfile' does not perform the stack allocation
|
||||
# in the common case.
|
||||
var st: TStackTrace
|
||||
captureStackTrace(framePtr, st)
|
||||
hook(st)
|
||||
|
||||
proc nimProfile() =
|
||||
## This is invoked by the compiler in every loop and on every proc entry!
|
||||
if gTicker == 0:
|
||||
gTicker = -1
|
||||
if not isNil(profilerHook):
|
||||
# disable recursive calls: XXX should use try..finally,
|
||||
# but that's too expensive!
|
||||
let oldHook = profilerHook
|
||||
profilerHook = nil
|
||||
callProfilerHook(oldHook)
|
||||
profilerHook = oldHook
|
||||
gTicker = SamplingInterval
|
||||
dec gTicker
|
||||
|
||||
{.pop.}
|
||||
|
||||
Reference in New Issue
Block a user