This commit is contained in:
Andreas Rumpf
2026-04-07 18:07:34 +02:00
committed by GitHub
parent 184d423779
commit 6621d64398
3 changed files with 101 additions and 27 deletions

View File

@@ -127,11 +127,12 @@ type
# reaches dealloc while the source chunk is active.
# Instead, the receiving chunk gains the capacity and thus reserves space in the foreign chunk.
acc: uint32 # Offset from data, used when there are no free cells available but the chunk is considered free.
foreignCells: int # When a free cell is given to a chunk that is not its origin,
foreignCells: int32 # When a free cell is given to a chunk that is not its origin,
# both the cell and the source chunk are considered foreign.
# Receiving a foreign cell can happen both when deallocating from another thread or when
# the active chunk in `a.freeSmallChunks` is not the current chunk.
# Freeing a chunk while `foreignCells > 0` leaks memory as all references to it become lost.
chunkAlignOff: int32 # Byte offset from `data` where cells begin. Non-zero for alignment > MemAlign.
data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
BigChunk = object of BaseChunk # not necessarily > PageSize!
@@ -472,8 +473,8 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} =
var c = cast[PSmallChunk](c)
let size = c.size
var a = cast[int](addr(c.data))
let limit = a + c.acc.int
var a = cast[int](addr(c.data)) + c.chunkAlignOff.int
let limit = cast[int](addr(c.data)) + c.acc.int
while a <% limit:
yield cast[pointer](a)
a = a +% size
@@ -851,6 +852,15 @@ when defined(heaptrack):
proc heaptrack_malloc(a: pointer, size: int) {.cdecl, importc, dynlib: heaptrackLib.}
proc heaptrack_free(a: pointer) {.cdecl, importc, dynlib: heaptrackLib.}
proc smallChunkAlignOffset(alignment: int): int {.inline.} =
## Compute the initial data offset so that data + result + sizeof(FreeCell)
## is alignment-aligned within a page-aligned small chunk.
if alignment <= MemAlign:
result = 0
else:
result = align(smallChunkOverhead() + sizeof(FreeCell), alignment) -
smallChunkOverhead() - sizeof(FreeCell)
proc bigChunkAlignOffset(alignment: int): int {.inline.} =
## Compute the alignment offset for big chunk data.
if alignment == 0:
@@ -863,14 +873,13 @@ proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = 0): pointer
inc(a.allocCounter)
sysAssert(allocInv(a), "rawAlloc: begin")
sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
var size = roundup(requestedSize, MemAlign)
var size = roundup(requestedSize, max(MemAlign, alignment))
let alignOff = smallChunkAlignOffset(alignment)
sysAssert(size >= sizeof(FreeCell), "rawAlloc: requested size too small")
sysAssert(size >= requestedSize, "insufficient allocated size!")
#c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
# For custom alignments > MemAlign, force big chunk allocation
# Small chunks cannot handle arbitrary alignments due to fixed cell boundaries
if size <= SmallChunkSize-smallChunkOverhead() and alignment == 0:
if size + alignOff <= SmallChunkSize-smallChunkOverhead():
template fetchSharedCells(tc: PSmallChunk) =
# Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
when defined(gcDestructors):
@@ -888,16 +897,19 @@ proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = 0): pointer
# allocate a small block: for small chunks, we use only its next pointer
let s = size div MemAlign
var c = a.freeSmallChunks[s]
if c != nil and c.chunkAlignOff != alignOff.int32:
c = nil
if c == nil:
# There is no free chunk of the requested size available, we need a new one.
c = getSmallChunk(a)
# init all fields in case memory didn't get zeroed
c.freeList = nil
c.foreignCells = 0
c.chunkAlignOff = alignOff.int32
sysAssert c.size == PageSize, "rawAlloc 3"
c.size = size
c.acc = size.uint32
c.free = SmallChunkSize - smallChunkOverhead() - size.int32
c.acc = (alignOff + size).uint32
c.free = SmallChunkSize - smallChunkOverhead() - alignOff.int32 - size.int32
sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
c.next = nil
c.prev = nil
@@ -908,7 +920,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = 0): pointer
# Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc,
# we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed.
listAdd(a.freeSmallChunks[s], c)
result = addr(c.data)
result = addr(c.data) +! alignOff
sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4")
else:
# There is a free chunk of the requested size available, use it.
@@ -950,7 +962,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = 0): pointer
sysAssert(allocInv(a), "rawAlloc: before listRemove test")
listRemove(a.freeSmallChunks[s], c)
sysAssert(allocInv(a), "rawAlloc: end listRemove test")
sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead()) %%
sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead() - c.chunkAlignOff) %%
size == 0, "rawAlloc 21")
sysAssert(allocInv(a), "rawAlloc: end small size")
inc a.occ, size
@@ -1018,7 +1030,7 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
dec a.occ, s
untrackSize(s)
sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead() - c.chunkAlignOff) %%
s == 0, "rawDealloc 3")
when not defined(gcDestructors):
#echo("setting to nil: ", $cast[int](addr(f.zeroField)))
@@ -1029,7 +1041,8 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
s -% sizeof(FreeCell))
let activeChunk = a.freeSmallChunks[s div MemAlign]
if activeChunk != nil and c != activeChunk:
if activeChunk != nil and c != activeChunk and
activeChunk.chunkAlignOff == c.chunkAlignOff:
# This pointer is not part of the active chunk, lend it out
# and do not adjust the current chunk (same logic as compensateCounters.)
# Put the cell into the active chunk,
@@ -1076,7 +1089,7 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
when defined(gcDestructors):
addToSharedFreeList(c, f, s div MemAlign)
sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead() - c.chunkAlignOff) %%
s == 0, "rawDealloc 2")
else:
# set to 0xff to check for usage after free bugs:
@@ -1102,8 +1115,11 @@ when not defined(gcDestructors):
var c = cast[PSmallChunk](c)
var offset = (cast[int](p) and (PageSize-1)) -%
smallChunkOverhead()
result = (c.acc.int >% offset) and (offset %% c.size == 0) and
(cast[ptr FreeCell](p).zeroField >% 1)
if c.acc.int >% offset:
let ao = c.chunkAlignOff.int
result = (offset >= ao) and
((offset -% ao) %% c.size == 0) and
(cast[ptr FreeCell](p).zeroField >% 1)
else:
var c = cast[PBigChunk](c)
# prev stores the aligned data pointer set during rawAlloc
@@ -1122,11 +1138,12 @@ when not defined(gcDestructors):
var c = cast[PSmallChunk](c)
var offset = (cast[int](p) and (PageSize-1)) -%
smallChunkOverhead()
if c.acc.int >% offset:
let ao = c.chunkAlignOff.int
if c.acc.int >% offset and offset >= ao:
sysAssert(cast[int](addr(c.data)) +% offset ==
cast[int](p), "offset is not what you think it is")
var d = cast[ptr FreeCell](cast[int](addr(c.data)) +%
offset -% (offset %% c.size))
ao +% ((offset -% ao) -% ((offset -% ao) %% c.size)))
if d.zeroField >% 1:
result = d
sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
@@ -1257,6 +1274,20 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
proc alloc0Impl(size: Natural): pointer =
result = alloc0(allocator, size)
when defined(gcOrc) or defined(gcYrc):
proc nimAlignedAlloc0(size: Natural, alignment: int): pointer =
incStat(allocCount)
result = rawAlloc(allocator, size, alignment)
zeroMem(result, size)
proc nimAlignedAlloc(size: Natural, alignment: int): pointer =
incStat(allocCount)
result = rawAlloc(allocator, size, alignment)
proc nimAlignedDealloc(p: pointer) =
incStat(deallocCount)
rawDealloc(allocator, p)
proc deallocImpl(p: pointer) =
dealloc(allocator, p)

View File

@@ -92,12 +92,27 @@ else:
when not defined(nimHasQuirky):
{.pragma: quirky.}
# Forward declarations for native allocator alignment (implemented in alloc.nim).
# rawAlloc's contract: result + sizeof(FreeCell) is alignment-aligned.
# For ORC/YRC, sizeof(FreeCell) == sizeof(RefHeader).
const useNativeAlignedAlloc = (defined(gcOrc) or defined(gcYrc)) and
not defined(useMalloc) and not defined(nimscript) and
not defined(nimdoc) and not defined(useNimRtl)
when useNativeAlignedAlloc:
proc nimAlignedAlloc0(size: Natural, alignment: int): pointer {.gcsafe, raises: [].}
proc nimAlignedAlloc(size: Natural, alignment: int): pointer {.gcsafe, raises: [].}
proc nimAlignedDealloc(p: pointer) {.gcsafe, raises: [].}
proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
let hdrSize = align(sizeof(RefHeader), alignment)
let s = size +% hdrSize
when defined(nimscript):
when defined(nimscript) or defined(nimdoc):
discard
elif useNativeAlignedAlloc:
let s = size +% sizeof(RefHeader)
result = nimAlignedAlloc0(s, alignment) +! sizeof(RefHeader)
else:
let hdrSize = align(sizeof(RefHeader), alignment)
let s = size +% hdrSize
result = alignedAlloc0(s, alignment) +! hdrSize
when defined(nimArcDebug) or defined(nimArcIds):
head(result).refId = gRefId
@@ -111,12 +126,14 @@ proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
# Same as 'newNewObj' but do not initialize the memory to zero.
# The codegen proved for us that this is not necessary.
let hdrSize = align(sizeof(RefHeader), alignment)
let s = size + hdrSize
when defined(nimscript):
when defined(nimscript) or defined(nimdoc):
discard
elif useNativeAlignedAlloc:
let s = size + sizeof(RefHeader)
result = cast[ptr RefHeader](nimAlignedAlloc(s, alignment) +! sizeof(RefHeader))
else:
let hdrSize = align(sizeof(RefHeader), alignment)
let s = size + hdrSize
result = cast[ptr RefHeader](alignedAlloc(s, alignment) +! hdrSize)
head(result).rc = 0
when defined(gcOrc) or defined(gcYrc):
@@ -189,8 +206,11 @@ proc nimRawDispose(p: pointer, alignment: int) {.compilerRtl.} =
if freedCells.data == nil: init(freedCells)
freedCells.incl head(p)
else:
let hdrSize = align(sizeof(RefHeader), alignment)
alignedDealloc(p -! hdrSize, alignment)
when useNativeAlignedAlloc:
nimAlignedDealloc(p -! sizeof(RefHeader))
else:
let hdrSize = align(sizeof(RefHeader), alignment)
alignedDealloc(p -! hdrSize, alignment)
template `=dispose`*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x), T.alignOf)
#proc dispose*(x: pointer) = nimRawDispose(x)

View File

@@ -0,0 +1,23 @@
discard """
matrix: "--mm:refc; --mm:orc; --mm:arc"
targets: "c cpp"
output: "ok"
"""
# Test that heap-allocated objects with .align use small chunks,
# not a big chunk per object (regression test for #25577).
type U = object
d {.align: 32.}: int8
var e: seq[ref U]
for _ in 0 ..< 10000: e.add(new U)
# Without small-chunk alignment, each object gets its own page (~46 MB).
# With the fix, 10000 objects fit in ~1-3 MB depending on the GC.
doAssert getTotalMem() < 8 * 1024 * 1024, "align:32 heap objects use too much memory"
# Verify alignment is actually correct
for i in 0 ..< e.len:
doAssert (cast[int](addr e[i].d) and 31) == 0, "field not 32-byte aligned"
echo "ok"