mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-14 11:35:52 +00:00
@@ -127,11 +127,12 @@ type
|
||||
# reaches dealloc while the source chunk is active.
|
||||
# Instead, the receiving chunk gains the capacity and thus reserves space in the foreign chunk.
|
||||
acc: uint32 # Offset from data, used when there are no free cells available but the chunk is considered free.
|
||||
foreignCells: int # When a free cell is given to a chunk that is not its origin,
|
||||
foreignCells: int32 # When a free cell is given to a chunk that is not its origin,
|
||||
# both the cell and the source chunk are considered foreign.
|
||||
# Receiving a foreign cell can happen both when deallocating from another thread or when
|
||||
# the active chunk in `a.freeSmallChunks` is not the current chunk.
|
||||
# Freeing a chunk while `foreignCells > 0` leaks memory as all references to it become lost.
|
||||
chunkAlignOff: int32 # Byte offset from `data` where cells begin. Non-zero for alignment > MemAlign.
|
||||
data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
|
||||
|
||||
BigChunk = object of BaseChunk # not necessarily > PageSize!
|
||||
@@ -472,8 +473,8 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} =
|
||||
var c = cast[PSmallChunk](c)
|
||||
|
||||
let size = c.size
|
||||
var a = cast[int](addr(c.data))
|
||||
let limit = a + c.acc.int
|
||||
var a = cast[int](addr(c.data)) + c.chunkAlignOff.int
|
||||
let limit = cast[int](addr(c.data)) + c.acc.int
|
||||
while a <% limit:
|
||||
yield cast[pointer](a)
|
||||
a = a +% size
|
||||
@@ -851,6 +852,15 @@ when defined(heaptrack):
|
||||
proc heaptrack_malloc(a: pointer, size: int) {.cdecl, importc, dynlib: heaptrackLib.}
|
||||
proc heaptrack_free(a: pointer) {.cdecl, importc, dynlib: heaptrackLib.}
|
||||
|
||||
proc smallChunkAlignOffset(alignment: int): int {.inline.} =
|
||||
## Compute the initial data offset so that data + result + sizeof(FreeCell)
|
||||
## is alignment-aligned within a page-aligned small chunk.
|
||||
if alignment <= MemAlign:
|
||||
result = 0
|
||||
else:
|
||||
result = align(smallChunkOverhead() + sizeof(FreeCell), alignment) -
|
||||
smallChunkOverhead() - sizeof(FreeCell)
|
||||
|
||||
proc bigChunkAlignOffset(alignment: int): int {.inline.} =
|
||||
## Compute the alignment offset for big chunk data.
|
||||
if alignment == 0:
|
||||
@@ -863,14 +873,13 @@ proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = 0): pointer
|
||||
inc(a.allocCounter)
|
||||
sysAssert(allocInv(a), "rawAlloc: begin")
|
||||
sysAssert(roundup(65, 8) == 72, "rawAlloc: roundup broken")
|
||||
var size = roundup(requestedSize, MemAlign)
|
||||
var size = roundup(requestedSize, max(MemAlign, alignment))
|
||||
let alignOff = smallChunkAlignOffset(alignment)
|
||||
sysAssert(size >= sizeof(FreeCell), "rawAlloc: requested size too small")
|
||||
sysAssert(size >= requestedSize, "insufficient allocated size!")
|
||||
#c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
|
||||
|
||||
# For custom alignments > MemAlign, force big chunk allocation
|
||||
# Small chunks cannot handle arbitrary alignments due to fixed cell boundaries
|
||||
if size <= SmallChunkSize-smallChunkOverhead() and alignment == 0:
|
||||
if size + alignOff <= SmallChunkSize-smallChunkOverhead():
|
||||
template fetchSharedCells(tc: PSmallChunk) =
|
||||
# Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
|
||||
when defined(gcDestructors):
|
||||
@@ -888,16 +897,19 @@ proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = 0): pointer
|
||||
# allocate a small block: for small chunks, we use only its next pointer
|
||||
let s = size div MemAlign
|
||||
var c = a.freeSmallChunks[s]
|
||||
if c != nil and c.chunkAlignOff != alignOff.int32:
|
||||
c = nil
|
||||
if c == nil:
|
||||
# There is no free chunk of the requested size available, we need a new one.
|
||||
c = getSmallChunk(a)
|
||||
# init all fields in case memory didn't get zeroed
|
||||
c.freeList = nil
|
||||
c.foreignCells = 0
|
||||
c.chunkAlignOff = alignOff.int32
|
||||
sysAssert c.size == PageSize, "rawAlloc 3"
|
||||
c.size = size
|
||||
c.acc = size.uint32
|
||||
c.free = SmallChunkSize - smallChunkOverhead() - size.int32
|
||||
c.acc = (alignOff + size).uint32
|
||||
c.free = SmallChunkSize - smallChunkOverhead() - alignOff.int32 - size.int32
|
||||
sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
|
||||
c.next = nil
|
||||
c.prev = nil
|
||||
@@ -908,7 +920,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = 0): pointer
|
||||
# Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc,
|
||||
# we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed.
|
||||
listAdd(a.freeSmallChunks[s], c)
|
||||
result = addr(c.data)
|
||||
result = addr(c.data) +! alignOff
|
||||
sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4")
|
||||
else:
|
||||
# There is a free chunk of the requested size available, use it.
|
||||
@@ -950,7 +962,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = 0): pointer
|
||||
sysAssert(allocInv(a), "rawAlloc: before listRemove test")
|
||||
listRemove(a.freeSmallChunks[s], c)
|
||||
sysAssert(allocInv(a), "rawAlloc: end listRemove test")
|
||||
sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead()) %%
|
||||
sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead() - c.chunkAlignOff) %%
|
||||
size == 0, "rawAlloc 21")
|
||||
sysAssert(allocInv(a), "rawAlloc: end small size")
|
||||
inc a.occ, size
|
||||
@@ -1018,7 +1030,7 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
|
||||
dec a.occ, s
|
||||
untrackSize(s)
|
||||
sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
|
||||
sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
|
||||
sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead() - c.chunkAlignOff) %%
|
||||
s == 0, "rawDealloc 3")
|
||||
when not defined(gcDestructors):
|
||||
#echo("setting to nil: ", $cast[int](addr(f.zeroField)))
|
||||
@@ -1029,7 +1041,8 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
|
||||
nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
|
||||
s -% sizeof(FreeCell))
|
||||
let activeChunk = a.freeSmallChunks[s div MemAlign]
|
||||
if activeChunk != nil and c != activeChunk:
|
||||
if activeChunk != nil and c != activeChunk and
|
||||
activeChunk.chunkAlignOff == c.chunkAlignOff:
|
||||
# This pointer is not part of the active chunk, lend it out
|
||||
# and do not adjust the current chunk (same logic as compensateCounters.)
|
||||
# Put the cell into the active chunk,
|
||||
@@ -1076,7 +1089,7 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
|
||||
|
||||
when defined(gcDestructors):
|
||||
addToSharedFreeList(c, f, s div MemAlign)
|
||||
sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %%
|
||||
sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead() - c.chunkAlignOff) %%
|
||||
s == 0, "rawDealloc 2")
|
||||
else:
|
||||
# set to 0xff to check for usage after free bugs:
|
||||
@@ -1102,8 +1115,11 @@ when not defined(gcDestructors):
|
||||
var c = cast[PSmallChunk](c)
|
||||
var offset = (cast[int](p) and (PageSize-1)) -%
|
||||
smallChunkOverhead()
|
||||
result = (c.acc.int >% offset) and (offset %% c.size == 0) and
|
||||
(cast[ptr FreeCell](p).zeroField >% 1)
|
||||
if c.acc.int >% offset:
|
||||
let ao = c.chunkAlignOff.int
|
||||
result = (offset >= ao) and
|
||||
((offset -% ao) %% c.size == 0) and
|
||||
(cast[ptr FreeCell](p).zeroField >% 1)
|
||||
else:
|
||||
var c = cast[PBigChunk](c)
|
||||
# prev stores the aligned data pointer set during rawAlloc
|
||||
@@ -1122,11 +1138,12 @@ when not defined(gcDestructors):
|
||||
var c = cast[PSmallChunk](c)
|
||||
var offset = (cast[int](p) and (PageSize-1)) -%
|
||||
smallChunkOverhead()
|
||||
if c.acc.int >% offset:
|
||||
let ao = c.chunkAlignOff.int
|
||||
if c.acc.int >% offset and offset >= ao:
|
||||
sysAssert(cast[int](addr(c.data)) +% offset ==
|
||||
cast[int](p), "offset is not what you think it is")
|
||||
var d = cast[ptr FreeCell](cast[int](addr(c.data)) +%
|
||||
offset -% (offset %% c.size))
|
||||
ao +% ((offset -% ao) -% ((offset -% ao) %% c.size)))
|
||||
if d.zeroField >% 1:
|
||||
result = d
|
||||
sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
|
||||
@@ -1257,6 +1274,20 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
|
||||
proc alloc0Impl(size: Natural): pointer =
|
||||
result = alloc0(allocator, size)
|
||||
|
||||
when defined(gcOrc) or defined(gcYrc):
|
||||
proc nimAlignedAlloc0(size: Natural, alignment: int): pointer =
|
||||
incStat(allocCount)
|
||||
result = rawAlloc(allocator, size, alignment)
|
||||
zeroMem(result, size)
|
||||
|
||||
proc nimAlignedAlloc(size: Natural, alignment: int): pointer =
|
||||
incStat(allocCount)
|
||||
result = rawAlloc(allocator, size, alignment)
|
||||
|
||||
proc nimAlignedDealloc(p: pointer) =
|
||||
incStat(deallocCount)
|
||||
rawDealloc(allocator, p)
|
||||
|
||||
proc deallocImpl(p: pointer) =
|
||||
dealloc(allocator, p)
|
||||
|
||||
|
||||
@@ -92,12 +92,27 @@ else:
|
||||
when not defined(nimHasQuirky):
|
||||
{.pragma: quirky.}
|
||||
|
||||
# Forward declarations for native allocator alignment (implemented in alloc.nim).
|
||||
# rawAlloc's contract: result + sizeof(FreeCell) is alignment-aligned.
|
||||
# For ORC/YRC, sizeof(FreeCell) == sizeof(RefHeader).
|
||||
const useNativeAlignedAlloc = (defined(gcOrc) or defined(gcYrc)) and
|
||||
not defined(useMalloc) and not defined(nimscript) and
|
||||
not defined(nimdoc) and not defined(useNimRtl)
|
||||
|
||||
when useNativeAlignedAlloc:
|
||||
proc nimAlignedAlloc0(size: Natural, alignment: int): pointer {.gcsafe, raises: [].}
|
||||
proc nimAlignedAlloc(size: Natural, alignment: int): pointer {.gcsafe, raises: [].}
|
||||
proc nimAlignedDealloc(p: pointer) {.gcsafe, raises: [].}
|
||||
|
||||
proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
let s = size +% hdrSize
|
||||
when defined(nimscript):
|
||||
when defined(nimscript) or defined(nimdoc):
|
||||
discard
|
||||
elif useNativeAlignedAlloc:
|
||||
let s = size +% sizeof(RefHeader)
|
||||
result = nimAlignedAlloc0(s, alignment) +! sizeof(RefHeader)
|
||||
else:
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
let s = size +% hdrSize
|
||||
result = alignedAlloc0(s, alignment) +! hdrSize
|
||||
when defined(nimArcDebug) or defined(nimArcIds):
|
||||
head(result).refId = gRefId
|
||||
@@ -111,12 +126,14 @@ proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
|
||||
|
||||
proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
|
||||
# Same as 'newNewObj' but do not initialize the memory to zero.
|
||||
# The codegen proved for us that this is not necessary.
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
let s = size + hdrSize
|
||||
when defined(nimscript):
|
||||
when defined(nimscript) or defined(nimdoc):
|
||||
discard
|
||||
elif useNativeAlignedAlloc:
|
||||
let s = size + sizeof(RefHeader)
|
||||
result = cast[ptr RefHeader](nimAlignedAlloc(s, alignment) +! sizeof(RefHeader))
|
||||
else:
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
let s = size + hdrSize
|
||||
result = cast[ptr RefHeader](alignedAlloc(s, alignment) +! hdrSize)
|
||||
head(result).rc = 0
|
||||
when defined(gcOrc) or defined(gcYrc):
|
||||
@@ -189,8 +206,11 @@ proc nimRawDispose(p: pointer, alignment: int) {.compilerRtl.} =
|
||||
if freedCells.data == nil: init(freedCells)
|
||||
freedCells.incl head(p)
|
||||
else:
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
alignedDealloc(p -! hdrSize, alignment)
|
||||
when useNativeAlignedAlloc:
|
||||
nimAlignedDealloc(p -! sizeof(RefHeader))
|
||||
else:
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
alignedDealloc(p -! hdrSize, alignment)
|
||||
|
||||
template `=dispose`*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x), T.alignOf)
|
||||
#proc dispose*(x: pointer) = nimRawDispose(x)
|
||||
|
||||
23
tests/align/talign_heap.nim
Normal file
23
tests/align/talign_heap.nim
Normal file
@@ -0,0 +1,23 @@
|
||||
discard """
|
||||
matrix: "--mm:refc; --mm:orc; --mm:arc"
|
||||
targets: "c cpp"
|
||||
output: "ok"
|
||||
"""
|
||||
|
||||
# Test that heap-allocated objects with .align use small chunks,
|
||||
# not a big chunk per object (regression test for #25577).
|
||||
type U = object
|
||||
d {.align: 32.}: int8
|
||||
|
||||
var e: seq[ref U]
|
||||
for _ in 0 ..< 10000: e.add(new U)
|
||||
|
||||
# Without small-chunk alignment, each object gets its own page (~46 MB).
|
||||
# With the fix, 10000 objects fit in ~1-3 MB depending on the GC.
|
||||
doAssert getTotalMem() < 8 * 1024 * 1024, "align:32 heap objects use too much memory"
|
||||
|
||||
# Verify alignment is actually correct
|
||||
for i in 0 ..< e.len:
|
||||
doAssert (cast[int](addr e[i].d) and 31) == 0, "field not 32-byte aligned"
|
||||
|
||||
echo "ok"
|
||||
Reference in New Issue
Block a user