fixes #25457; make rawAlloc support alignment (#25476)

fixes https://github.com/nim-lang/Nim/issues/25457

Small chunks allocate memory in fixed-size cells. Each cell is
positioned at exact multiples of the cell size from the chunk's data
start, which makes it much harder to support alignment

```nim
sysAssert c.size == size, "rawAlloc 6"
if c.freeList == nil:
  sysAssert(c.acc.int + smallChunkOverhead() + size <= SmallChunkSize,
            "rawAlloc 7")
  result = cast[pointer](cast[int](addr(c.data)) +% c.acc.int)
  inc(c.acc, size)
```

See also https://github.com/nim-lang/Nim/pull/12926 

While using big trunk, each allocation gets its own chunk
This commit is contained in:
ringabout
2026-02-11 18:33:31 +08:00
committed by GitHub
parent c346a2b228
commit 94008531c1
5 changed files with 161 additions and 27 deletions

View File

@@ -477,7 +477,8 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} =
a = a +% size
else:
let c = cast[PBigChunk](c)
yield addr(c.data)
# prev stores the aligned data pointer set during rawAlloc
yield cast[pointer](c.prev)
m.locked = false
proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {.
@@ -777,7 +778,10 @@ proc deallocBigChunk(a: var MemRegion, c: PBigChunk) =
sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
when not defined(gcDestructors):
a.deleted = getBottom(a)
del(a, a.root, cast[int](addr(c.data)))
# prev stores the aligned data pointer that was added to the AVL tree during allocation
del(a, a.root, cast[int](c.prev))
# Reset prev before freeing (required by listAdd assertions in freeBigChunk)
c.prev = nil
if c.size >= HugeChunkSize: freeHugeChunk(a, c)
else: freeBigChunk(a, c)
@@ -845,7 +849,14 @@ when defined(heaptrack):
proc heaptrack_malloc(a: pointer, size: int) {.cdecl, importc, dynlib: heaptrackLib.}
proc heaptrack_free(a: pointer) {.cdecl, importc, dynlib: heaptrackLib.}
proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
proc bigChunkAlignOffset(alignment: int): int {.inline.} =
## Compute the alignment offset for big chunk data.
if alignment <= MemAlign:
result = 0
else:
result = align(sizeof(BigChunk) + sizeof(Cell), alignment) - sizeof(BigChunk) - sizeof(Cell)
proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = MemAlign): pointer =
when defined(nimTypeNames):
inc(a.allocCounter)
sysAssert(allocInv(a), "rawAlloc: begin")
@@ -855,7 +866,9 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
sysAssert(size >= requestedSize, "insufficient allocated size!")
#c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
if size <= SmallChunkSize-smallChunkOverhead():
# For custom alignments > MemAlign, force big chunk allocation
# Small chunks cannot handle arbitrary alignments due to fixed cell boundaries
if size <= SmallChunkSize-smallChunkOverhead() and alignment <= MemAlign:
template fetchSharedCells(tc: PSmallChunk) =
# Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]`
when defined(gcDestructors):
@@ -950,13 +963,21 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
if deferredFrees != nil:
freeDeferredObjects(a, deferredFrees)
size = requestedSize + bigChunkOverhead() # roundup(requestedSize+bigChunkOverhead(), PageSize)
# For big chunks with custom alignment, allocate extra space.
# Since chunks are page-aligned, the needed padding is a compile-time
# deterministic value rather than a worst-case estimate.
let alignPad = bigChunkAlignOffset(alignment)
size = requestedSize + bigChunkOverhead() + alignPad
# allocate a large block
var c = if size >= HugeChunkSize: getHugeChunk(a, size)
else: getBigChunk(a, size)
sysAssert c.prev == nil, "rawAlloc 10"
sysAssert c.next == nil, "rawAlloc 11"
result = addr(c.data)
result = addr(c.data) +! alignPad
# Store the aligned data pointer in prev for deallocation and GC traversal.
# prev is unused while the chunk is allocated (next/prev are free-list links).
c.prev = cast[PBigChunk](result)
sysAssert((cast[int](c) and (MemAlign-1)) == 0, "rawAlloc 13")
sysAssert((cast[int](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary")
when not defined(gcDestructors):
@@ -1067,7 +1088,9 @@ when not defined(gcDestructors):
(cast[ptr FreeCell](p).zeroField >% 1)
else:
var c = cast[PBigChunk](c)
result = p == addr(c.data) and cast[ptr FreeCell](p).zeroField >% 1
# prev stores the aligned data pointer set during rawAlloc
let cellPtr = cast[pointer](c.prev)
result = p == cellPtr and cast[ptr FreeCell](p).zeroField >% 1
proc prepareForInteriorPointerChecking(a: var MemRegion) {.inline.} =
a.minLargeObj = lowGauge(a.root)
@@ -1091,7 +1114,8 @@ when not defined(gcDestructors):
sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
else:
var c = cast[PBigChunk](c)
var d = addr(c.data)
# prev stores the aligned data pointer set during rawAlloc
var d = cast[pointer](c.prev)
if p >= d and cast[ptr FreeCell](d).zeroField >% 1:
result = d
sysAssert isAllocatedPtr(a, result), " result wrong pointer!"
@@ -1104,7 +1128,8 @@ when not defined(gcDestructors):
if avlNode != nil:
var k = cast[pointer](avlNode.key)
var c = cast[PBigChunk](pageAddr(k))
sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!")
# prev stores the aligned data pointer (the AVL tree key)
sysAssert(cast[pointer](c.prev) == k, " k is not the aligned address!")
if cast[ptr FreeCell](k).zeroField >% 1:
result = k
sysAssert isAllocatedPtr(a, result), " result wrong pointer!"

View File

@@ -49,20 +49,6 @@ when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc) or defined(gcYrc):
when not declaredInScope(PageShift):
include bitmasks
else:
type
RefCount = int
Cell {.pure.} = object
refcount: RefCount # the refcount and some flags
typ: PNimType
when trackAllocationSource:
filename: cstring
line: int
when useCellIds:
id: int
PCell = ptr Cell
type
PPageDesc = ptr PageDesc

View File

@@ -458,9 +458,12 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer =
sysAssert(allocInv(gch.region), "rawNewObj begin")
gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1")
collectCT(gch)
var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
# Use alignment from typ.base if available, otherwise use MemAlign
let alignment = if typ.kind == tyRef and typ.base != nil: max(typ.base.align, MemAlign) else: MemAlign
var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell), alignment))
#gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small"
gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
# Check that the user data (after the Cell header) is properly aligned
gcAssert((cast[int](cellToUsr(res)) and (alignment-1)) == 0, "newObj: 2")
# now it is buffered in the ZCT
res.typ = typ
setFrameInfo(res)
@@ -508,9 +511,12 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline, raise
collectCT(gch)
sysAssert(allocInv(gch.region), "newObjRC1 after collectCT")
var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell)))
# Use alignment from typ.base if available, otherwise use MemAlign
let alignment = if typ.base != nil: max(typ.base.align, MemAlign) else: MemAlign
var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell), alignment))
sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc")
sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2")
# Check that the user data (after the Cell header) is properly aligned
sysAssert((cast[int](cellToUsr(res)) and (alignment-1)) == 0, "newObj: 2")
# now it is buffered in the ZCT
res.typ = typ
setFrameInfo(res)

View File

@@ -38,6 +38,21 @@ type
PByte = ptr ByteArray
PString = ptr string
when not defined(nimV2):
type
RefCount = int
Cell {.pure.} = object
refcount: RefCount # the refcount and some flags
typ: PNimType
when trackAllocationSource:
filename: cstring
line: int
when useCellIds:
id: int
PCell = ptr Cell
when declared(IntsPerTrunk):
discard
else:

View File

@@ -1,5 +1,6 @@
discard """
ccodeCheck: "\\i @'NIM_ALIGN(128) NI mylocal1' .*"
matrix: "--mm:refc -d:useGcAssert -d:useSysAssert; --mm:orc"
targets: "c cpp"
output: "align ok"
"""
@@ -67,3 +68,104 @@ block: # bug #22419
f()()
type Xxx = object
v {.align: 128.}: byte
type Yyy = object
v: byte
v2: Xxx
for i in 0..<3:
let x = new Yyy
# echo "addr v2.v:", cast[uint](addr x.v2.v)
doAssert cast[uint](addr x.v2.v) mod 128 == 0
let m = new Yyy
m.v2.v = 42
doAssert m.v2.v == 42
m.v = 7
doAssert m.v == 7
type
MyType16 = object
a {.align(16).}: int
var x: array[10, ref MyType16]
for q in 0..500:
for i in 0..<x.len:
new x[i]
x[i].a = q
doAssert(cast[int](x[i]) mod alignof(MyType16) == 0)
type
MyType32 = object
a{.align(32).}: int
var y: array[10, ref MyType32]
for q in 0..500:
for i in 0..<y.len:
new y[i]
y[i].a = q
doAssert(cast[int](y[i]) mod alignof(MyType32) == 0)
# Additional tests: allocate custom aligned objects using `new`
type
MyType64 = object
a{.align(64).}: int
var z: array[10, ref MyType64]
for q in 0..500:
for i in 0..<z.len:
new z[i]
z[i].a = q
doAssert(cast[int](z[i]) mod alignof(MyType64) == 0)
type
MyType128 = object
a{.align(128).}: int
var w: array[10, ref MyType128]
for q in 0..500:
for i in 0..<w.len:
new w[i]
w[i].a = q
doAssert(cast[int](w[i]) mod alignof(MyType128) == 0)
# Nested aligned-object tests
type
Inner128 = object
v {.align(128).}: byte
OuterWithInner = object
prefix: int
inner: Inner128
var outerArr: array[8, ref OuterWithInner]
for q in 0..200:
for i in 0..<outerArr.len:
new outerArr[i]
# write to inner to ensure it's allocated
outerArr[i].inner.v = cast[byte](q and 0xFF)
doAssert(cast[uint](addr outerArr[i].inner) mod uint(alignof(Inner128)) == 0)
# Nested two-level alignment
type
DeepInner = object
b {.align(128).}: int
Mid = object
di: DeepInner
Top = object
m: Mid
var topArr: array[4, ref Top]
for q in 0..100:
for i in 0..<topArr.len:
new topArr[i]
topArr[i].m.di.b = q
doAssert(cast[uint](addr topArr[i].m.di) mod uint(alignof(DeepInner)) == 0)