mirror of
https://github.com/nim-lang/Nim.git
synced 2026-06-08 12:54:22 +00:00
Allocator: Always place free cells into the active chunk and add documentation (#23871)
Lets single threaded applications benefit from tracking foreign cells as
well.
After this, `SmallChunk` technically doesn't need to act as a linked
list anymore I think, gotta investigate that more though.
The likelihood of overflowing `chunk.free` also rises, so to work around
that it might make sense to check `foreignCells` instead of adjusting
free space or replace free with a counter for the local capacity.
For Nim compile I can observe a ~10mb reduction, and smaller ones for
other projects.
(cherry picked from commit 881fbb8f81)
This commit is contained in:
@@ -20,6 +20,37 @@ template track(op, address, size) =
|
||||
|
||||
# We manage *chunks* of memory. Each chunk is a multiple of the page size.
|
||||
# Each chunk starts at an address that is divisible by the page size.
|
||||
# Small chunks may be divided into smaller cells of reusable pointers to reduce the number of page allocations.
|
||||
|
||||
# An allocation of a small pointer looks approximately like this
|
||||
#[
|
||||
|
||||
alloc -> rawAlloc -> No free chunk available > Request a new page from tslf -> result = chunk.data -------------+
|
||||
| |
|
||||
v |
|
||||
Free chunk available |
|
||||
| |
|
||||
v v
|
||||
Fetch shared cells -> No free cells available -> Advance acc -> result = chunk.data + chunk.acc -------> return
|
||||
(may not add new cells) ^
|
||||
| |
|
||||
v |
|
||||
Free cells available -> result = chunk.freeList -> Advance chunk.freeList -----------------------------------+
|
||||
]#
|
||||
# so it is split into 3 paths, where the last path is preferred to prevent unnecessary allocations.
|
||||
#
|
||||
#
|
||||
# A deallocation of a small pointer then looks like this
|
||||
#[
|
||||
dealloc -> rawDealloc -> chunk.owner == addr(a) --------------> This thread owns the chunk ------> The current chunk is active -> Chunk is completely unused -----> Chunk references no foreign cells
|
||||
| | (Add cell into the current chunk) | Return the current chunk back to tlsf
|
||||
| | | |
|
||||
v v v v
|
||||
A different thread owns this chunk. The current chunk is not active. chunk.free was < size Chunk references foreign cells, noop
|
||||
Add the cell to a.sharedFreeLists Add the cell into the active chunk Activate the chunk (end)
|
||||
(end) (end) (end)
|
||||
]#
|
||||
# So "true" deallocation is delayed for as long as possible in favor of reusing cells.
|
||||
|
||||
const
|
||||
nimMinHeapPages {.intdefine.} = 128 # 0.5 MB
|
||||
@@ -71,6 +102,8 @@ const
|
||||
|
||||
type
|
||||
FreeCell {.final, pure.} = object
|
||||
# A free cell is a pointer that has been freed, meaning it became available for reuse.
|
||||
# It may become foreign if it is lent to a chunk that did not create it, doing so reduces the amount of needed pages.
|
||||
next: ptr FreeCell # next free cell in chunk (overlaid with refcount)
|
||||
when not defined(gcDestructors):
|
||||
zeroField: int # 0 means cell is not used (overlaid with typ field)
|
||||
@@ -90,11 +123,18 @@ type
|
||||
|
||||
SmallChunk = object of BaseChunk
|
||||
next, prev: PSmallChunk # chunks of the same size
|
||||
freeList: ptr FreeCell
|
||||
free: int32 # how many bytes remain
|
||||
acc: uint32 # accumulator for small object allocation
|
||||
foreignCells: int # Number of deferred free cells from other threads this chunk stole from sharedFreeLists.
|
||||
# Freeing the chunk before this is zero means the stolen cells become inaccessible permanently.
|
||||
freeList: ptr FreeCell # Singly linked list of cells. They may be from foreign chunks or from the current chunk.
|
||||
# Should be `nil` when the chunk isn't active in `a.freeSmallChunks`.
|
||||
free: int32 # Bytes this chunk is able to provide using both the accumulator and free cells.
|
||||
# When a cell is considered foreign, its source chunk's free field is NOT adjusted until it
|
||||
# reaches dealloc while the source chunk is active.
|
||||
# Instead, the receiving chunk gains the capacity and thus reserves space in the foreign chunk.
|
||||
acc: uint32 # Offset from data, used when there are no free cells available but the chunk is considered free.
|
||||
foreignCells: int # When a free cell is given to a chunk that is not its origin,
|
||||
# both the cell and the source chunk are considered foreign.
|
||||
# Receiving a foreign cell can happen both when deallocating from another thread or when
|
||||
# the active chunk in `a.freeSmallChunks` is not the current chunk.
|
||||
# Freeing a chunk while `foreignCells > 0` leaks memory as all references to it become lost.
|
||||
data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
|
||||
|
||||
BigChunk = object of BaseChunk # not necessarily > PageSize!
|
||||
@@ -110,8 +150,11 @@ type
|
||||
when not defined(gcDestructors):
|
||||
minLargeObj, maxLargeObj: int
|
||||
freeSmallChunks: array[0..max(1, SmallChunkSize div MemAlign-1), PSmallChunk]
|
||||
# List of available chunks per size class. Only one is expected to be active per class.
|
||||
when defined(gcDestructors):
|
||||
sharedFreeLists: array[0..max(1, SmallChunkSize div MemAlign-1), ptr FreeCell]
|
||||
# When a thread frees a pointer it did not create, it must not adjust the counters.
|
||||
# Instead, the cell is placed here and deferred until the next allocation.
|
||||
flBitmap: uint32
|
||||
slBitmap: array[RealFli, uint32]
|
||||
matrix: array[RealFli, array[MaxSli, PBigChunk]]
|
||||
@@ -794,6 +837,8 @@ when defined(gcDestructors):
|
||||
inc total, size
|
||||
let chunk = cast[PSmallChunk](pageAddr(it))
|
||||
if c != chunk:
|
||||
# The cell is foreign, potentially even from a foreign thread.
|
||||
# It must block the current chunk from being freed, as doing so would leak memory.
|
||||
inc c.foreignCells
|
||||
it = it.next
|
||||
# By not adjusting the foreign chunk we reserve space in it to prevent deallocation
|
||||
@@ -828,6 +873,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
|
||||
if size <= SmallChunkSize-smallChunkOverhead():
|
||||
template fetchSharedCells(tc: PSmallChunk) =
|
||||
# Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
|
||||
when defined(gcDestructors):
|
||||
if tc.freeList == nil:
|
||||
when hasThreadSupport:
|
||||
@@ -836,13 +882,17 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
else:
|
||||
tc.freeList = a.sharedFreeLists[s]
|
||||
a.sharedFreeLists[s] = nil
|
||||
# if `tc.freeList` isn't nil, `tc` will gain capacity.
|
||||
# We must calculate how much it gained and how many foreign cells are included.
|
||||
compensateCounters(a, tc, size)
|
||||
|
||||
# allocate a small block: for small chunks, we use only its next pointer
|
||||
let s = size div MemAlign
|
||||
var c = a.freeSmallChunks[s]
|
||||
if c == nil:
|
||||
# There is no free chunk of the requested size available, we need a new one.
|
||||
c = getSmallChunk(a)
|
||||
# init all fields in case memory didn't get zeroed
|
||||
c.freeList = nil
|
||||
c.foreignCells = 0
|
||||
sysAssert c.size == PageSize, "rawAlloc 3"
|
||||
@@ -852,12 +902,17 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
|
||||
c.next = nil
|
||||
c.prev = nil
|
||||
# Shared cells are fetched here in case `c.size * 2 >= SmallChunkSize - smallChunkOverhead()`.
|
||||
# For those single cell chunks, we would otherwise have to allocate a new one almost every time.
|
||||
fetchSharedCells(c)
|
||||
if c.free >= size:
|
||||
# Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc,
|
||||
# we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed.
|
||||
listAdd(a.freeSmallChunks[s], c)
|
||||
result = addr(c.data)
|
||||
sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4")
|
||||
else:
|
||||
# There is a free chunk of the requested size available, use it.
|
||||
sysAssert(allocInv(a), "rawAlloc: begin c != nil")
|
||||
sysAssert c.next != c, "rawAlloc 5"
|
||||
#if c.size != size:
|
||||
@@ -869,24 +924,30 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
result = cast[pointer](cast[int](addr(c.data)) +% c.acc.int)
|
||||
inc(c.acc, size)
|
||||
else:
|
||||
# There are free cells available, prefer them over the accumulator
|
||||
result = c.freeList
|
||||
when not defined(gcDestructors):
|
||||
sysAssert(c.freeList.zeroField == 0, "rawAlloc 8")
|
||||
c.freeList = c.freeList.next
|
||||
if cast[PSmallChunk](pageAddr(result)) != c:
|
||||
# This cell isn't a blocker for the current chunk anymore
|
||||
# This cell isn't a blocker for the current chunk's deallocation anymore
|
||||
dec(c.foreignCells)
|
||||
else:
|
||||
sysAssert(c == cast[PSmallChunk](pageAddr(result)), "Bad cell")
|
||||
sysAssert(c == cast[PSmallChunk](pageAddr(result)), "rawAlloc: Bad cell")
|
||||
# Even if the cell we return is foreign, the local chunk's capacity decreases.
|
||||
# The capacity was previously reserved in the source chunk (when it first got allocated),
|
||||
# then added into the current chunk during dealloc,
|
||||
# so the source chunk will not be freed or leak memory because of this.
|
||||
dec(c.free, size)
|
||||
sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 9")
|
||||
sysAssert(allocInv(a), "rawAlloc: end c != nil")
|
||||
# We fetch deferred cells *after* advancing c.freeList/acc to adjust c.free.
|
||||
# We fetch deferred cells *after* advancing `c.freeList`/`acc` to adjust `c.free`.
|
||||
# If after the adjustment it turns out there's free cells available,
|
||||
# the chunk stays in a.freeSmallChunks[s] and the need for a new chunk is delayed.
|
||||
# the chunk stays in `a.freeSmallChunks[s]` and the need for a new chunk is delayed.
|
||||
fetchSharedCells(c)
|
||||
sysAssert(allocInv(a), "rawAlloc: before c.free < size")
|
||||
if c.free < size:
|
||||
# Even after fetching shared cells the chunk has no usable memory left. It is no longer the active chunk
|
||||
sysAssert(allocInv(a), "rawAlloc: before listRemove test")
|
||||
listRemove(a.freeSmallChunks[s], c)
|
||||
sysAssert(allocInv(a), "rawAlloc: end listRemove test")
|
||||
@@ -952,23 +1013,37 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
|
||||
#echo("setting to nil: ", $cast[int](addr(f.zeroField)))
|
||||
sysAssert(f.zeroField != 0, "rawDealloc 1")
|
||||
f.zeroField = 0
|
||||
f.next = c.freeList
|
||||
c.freeList = f
|
||||
when overwriteFree:
|
||||
# set to 0xff to check for usage after free bugs:
|
||||
nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
|
||||
s -% sizeof(FreeCell))
|
||||
# check if it is not in the freeSmallChunks[s] list:
|
||||
if c.free < s:
|
||||
# add it to the freeSmallChunks[s] array:
|
||||
listAdd(a.freeSmallChunks[s div MemAlign], c)
|
||||
inc(c.free, s)
|
||||
let activeChunk = a.freeSmallChunks[s div MemAlign]
|
||||
if activeChunk != nil and c != activeChunk:
|
||||
# This pointer is not part of the active chunk, lend it out
|
||||
# and do not adjust the current chunk (same logic as compensateCounters.)
|
||||
# Put the cell into the active chunk,
|
||||
# may prevent a queue of available chunks from forming in a.freeSmallChunks[s div MemAlign].
|
||||
# This queue would otherwise waste memory in the form of free cells until we return to those chunks.
|
||||
f.next = activeChunk.freeList
|
||||
activeChunk.freeList = f # lend the cell
|
||||
inc(activeChunk.free, s) # By not adjusting the current chunk's capacity it is prevented from being freed
|
||||
inc(activeChunk.foreignCells) # The cell is now considered foreign from the perspective of the active chunk
|
||||
else:
|
||||
inc(c.free, s)
|
||||
if c.free == SmallChunkSize-smallChunkOverhead() and c.foreignCells == 0:
|
||||
listRemove(a.freeSmallChunks[s div MemAlign], c)
|
||||
c.size = SmallChunkSize
|
||||
freeBigChunk(a, cast[PBigChunk](c))
|
||||
f.next = c.freeList
|
||||
c.freeList = f
|
||||
if c.free < s:
|
||||
# The chunk could not have been active as it didn't have enough space to give
|
||||
listAdd(a.freeSmallChunks[s div MemAlign], c)
|
||||
inc(c.free, s)
|
||||
else:
|
||||
inc(c.free, s)
|
||||
# Free only if the entire chunk is unused and there are no borrowed cells.
|
||||
# If the chunk were to be freed while it references foreign cells,
|
||||
# the foreign chunks will leak memory and can never be freed.
|
||||
if c.free == SmallChunkSize-smallChunkOverhead() and c.foreignCells == 0:
|
||||
listRemove(a.freeSmallChunks[s div MemAlign], c)
|
||||
c.size = SmallChunkSize
|
||||
freeBigChunk(a, cast[PBigChunk](c))
|
||||
else:
|
||||
when logAlloc: cprintf("dealloc(pointer_%p) # SMALL FROM %p CALLER %p\n", p, c.owner, addr(a))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user