From 94008531c11eabc04751fa1f24583ad4e6282825 Mon Sep 17 00:00:00 2001 From: ringabout <43030857+ringabout@users.noreply.github.com> Date: Wed, 11 Feb 2026 18:33:31 +0800 Subject: [PATCH] fixes #25457; make rawAlloc support alignment (#25476) fixes https://github.com/nim-lang/Nim/issues/25457 Small chunks allocate memory in fixed-size cells. Each cell is positioned at exact multiples of the cell size from the chunk's data start, which makes it much harder to support alignment ```nim sysAssert c.size == size, "rawAlloc 6" if c.freeList == nil: sysAssert(c.acc.int + smallChunkOverhead() + size <= SmallChunkSize, "rawAlloc 7") result = cast[pointer](cast[int](addr(c.data)) +% c.acc.int) inc(c.acc, size) ``` See also https://github.com/nim-lang/Nim/pull/12926 While using big trunk, each allocation gets its own chunk --- lib/system/alloc.nim | 43 +++++++++++++---- lib/system/cellsets.nim | 14 ------ lib/system/gc.nim | 14 ++++-- lib/system/mmdisp.nim | 15 ++++++ tests/align/talign.nim | 102 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 161 insertions(+), 27 deletions(-) diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 8a29b3bf30..4130ad8cca 100644 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -477,7 +477,8 @@ iterator allObjects(m: var MemRegion): pointer {.inline.} = a = a +% size else: let c = cast[PBigChunk](c) - yield addr(c.data) + # prev stores the aligned data pointer set during rawAlloc + yield cast[pointer](c.prev) m.locked = false proc iterToProc*(iter: typed, envType: typedesc; procName: untyped) {. @@ -777,7 +778,10 @@ proc deallocBigChunk(a: var MemRegion, c: PBigChunk) = sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)" when not defined(gcDestructors): a.deleted = getBottom(a) - del(a, a.root, cast[int](addr(c.data))) + # prev stores the aligned data pointer that was added to the AVL tree during allocation + del(a, a.root, cast[int](c.prev)) + # Reset prev before freeing (required by listAdd assertions in freeBigChunk) + c.prev = nil if c.size >= HugeChunkSize: freeHugeChunk(a, c) else: freeBigChunk(a, c) @@ -845,7 +849,14 @@ when defined(heaptrack): proc heaptrack_malloc(a: pointer, size: int) {.cdecl, importc, dynlib: heaptrackLib.} proc heaptrack_free(a: pointer) {.cdecl, importc, dynlib: heaptrackLib.} -proc rawAlloc(a: var MemRegion, requestedSize: int): pointer = +proc bigChunkAlignOffset(alignment: int): int {.inline.} = + ## Compute the alignment offset for big chunk data. + if alignment <= MemAlign: + result = 0 + else: + result = align(sizeof(BigChunk) + sizeof(Cell), alignment) - sizeof(BigChunk) - sizeof(Cell) + +proc rawAlloc(a: var MemRegion, requestedSize: int, alignment: int = MemAlign): pointer = when defined(nimTypeNames): inc(a.allocCounter) sysAssert(allocInv(a), "rawAlloc: begin") @@ -855,7 +866,9 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer = sysAssert(size >= requestedSize, "insufficient allocated size!") #c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size) - if size <= SmallChunkSize-smallChunkOverhead(): + # For custom alignments > MemAlign, force big chunk allocation + # Small chunks cannot handle arbitrary alignments due to fixed cell boundaries + if size <= SmallChunkSize-smallChunkOverhead() and alignment <= MemAlign: template fetchSharedCells(tc: PSmallChunk) = # Consumes cells from (potentially) foreign threads from `a.sharedFreeLists[s]` when defined(gcDestructors): @@ -950,13 +963,21 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer = if deferredFrees != nil: freeDeferredObjects(a, deferredFrees) - size = requestedSize + bigChunkOverhead() # roundup(requestedSize+bigChunkOverhead(), PageSize) + # For big chunks with custom alignment, allocate extra space. + # Since chunks are page-aligned, the needed padding is a compile-time + # deterministic value rather than a worst-case estimate. + let alignPad = bigChunkAlignOffset(alignment) + size = requestedSize + bigChunkOverhead() + alignPad # allocate a large block var c = if size >= HugeChunkSize: getHugeChunk(a, size) else: getBigChunk(a, size) sysAssert c.prev == nil, "rawAlloc 10" sysAssert c.next == nil, "rawAlloc 11" - result = addr(c.data) + result = addr(c.data) +! alignPad + # Store the aligned data pointer in prev for deallocation and GC traversal. + # prev is unused while the chunk is allocated (next/prev are free-list links). + c.prev = cast[PBigChunk](result) + sysAssert((cast[int](c) and (MemAlign-1)) == 0, "rawAlloc 13") sysAssert((cast[int](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary") when not defined(gcDestructors): @@ -1067,7 +1088,9 @@ when not defined(gcDestructors): (cast[ptr FreeCell](p).zeroField >% 1) else: var c = cast[PBigChunk](c) - result = p == addr(c.data) and cast[ptr FreeCell](p).zeroField >% 1 + # prev stores the aligned data pointer set during rawAlloc + let cellPtr = cast[pointer](c.prev) + result = p == cellPtr and cast[ptr FreeCell](p).zeroField >% 1 proc prepareForInteriorPointerChecking(a: var MemRegion) {.inline.} = a.minLargeObj = lowGauge(a.root) @@ -1091,7 +1114,8 @@ when not defined(gcDestructors): sysAssert isAllocatedPtr(a, result), " result wrong pointer!" else: var c = cast[PBigChunk](c) - var d = addr(c.data) + # prev stores the aligned data pointer set during rawAlloc + var d = cast[pointer](c.prev) if p >= d and cast[ptr FreeCell](d).zeroField >% 1: result = d sysAssert isAllocatedPtr(a, result), " result wrong pointer!" @@ -1104,7 +1128,8 @@ when not defined(gcDestructors): if avlNode != nil: var k = cast[pointer](avlNode.key) var c = cast[PBigChunk](pageAddr(k)) - sysAssert(addr(c.data) == k, " k is not the same as addr(c.data)!") + # prev stores the aligned data pointer (the AVL tree key) + sysAssert(cast[pointer](c.prev) == k, " k is not the aligned address!") if cast[ptr FreeCell](k).zeroField >% 1: result = k sysAssert isAllocatedPtr(a, result), " result wrong pointer!" diff --git a/lib/system/cellsets.nim b/lib/system/cellsets.nim index f8b757b460..80f0367019 100644 --- a/lib/system/cellsets.nim +++ b/lib/system/cellsets.nim @@ -49,20 +49,6 @@ when defined(gcOrc) or defined(gcArc) or defined(gcAtomicArc) or defined(gcYrc): when not declaredInScope(PageShift): include bitmasks -else: - type - RefCount = int - - Cell {.pure.} = object - refcount: RefCount # the refcount and some flags - typ: PNimType - when trackAllocationSource: - filename: cstring - line: int - when useCellIds: - id: int - - PCell = ptr Cell type PPageDesc = ptr PageDesc diff --git a/lib/system/gc.nim b/lib/system/gc.nim index 3942e5eb7f..4b02b2f257 100644 --- a/lib/system/gc.nim +++ b/lib/system/gc.nim @@ -458,9 +458,12 @@ proc rawNewObj(typ: PNimType, size: int, gch: var GcHeap): pointer = sysAssert(allocInv(gch.region), "rawNewObj begin") gcAssert(typ.kind in {tyRef, tyString, tySequence}, "newObj: 1") collectCT(gch) - var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell))) + # Use alignment from typ.base if available, otherwise use MemAlign + let alignment = if typ.kind == tyRef and typ.base != nil: max(typ.base.align, MemAlign) else: MemAlign + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell), alignment)) #gcAssert typ.kind in {tyString, tySequence} or size >= typ.base.size, "size too small" - gcAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2") + # Check that the user data (after the Cell header) is properly aligned + gcAssert((cast[int](cellToUsr(res)) and (alignment-1)) == 0, "newObj: 2") # now it is buffered in the ZCT res.typ = typ setFrameInfo(res) @@ -508,9 +511,12 @@ proc newObjRC1(typ: PNimType, size: int): pointer {.compilerRtl, noinline, raise collectCT(gch) sysAssert(allocInv(gch.region), "newObjRC1 after collectCT") - var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell))) + # Use alignment from typ.base if available, otherwise use MemAlign + let alignment = if typ.base != nil: max(typ.base.align, MemAlign) else: MemAlign + var res = cast[PCell](rawAlloc(gch.region, size + sizeof(Cell), alignment)) sysAssert(allocInv(gch.region), "newObjRC1 after rawAlloc") - sysAssert((cast[int](res) and (MemAlign-1)) == 0, "newObj: 2") + # Check that the user data (after the Cell header) is properly aligned + sysAssert((cast[int](cellToUsr(res)) and (alignment-1)) == 0, "newObj: 2") # now it is buffered in the ZCT res.typ = typ setFrameInfo(res) diff --git a/lib/system/mmdisp.nim b/lib/system/mmdisp.nim index 7fd61e0dc3..ce935ff8af 100644 --- a/lib/system/mmdisp.nim +++ b/lib/system/mmdisp.nim @@ -38,6 +38,21 @@ type PByte = ptr ByteArray PString = ptr string +when not defined(nimV2): + type + RefCount = int + + Cell {.pure.} = object + refcount: RefCount # the refcount and some flags + typ: PNimType + when trackAllocationSource: + filename: cstring + line: int + when useCellIds: + id: int + + PCell = ptr Cell + when declared(IntsPerTrunk): discard else: diff --git a/tests/align/talign.nim b/tests/align/talign.nim index 08373ee497..6397e31214 100644 --- a/tests/align/talign.nim +++ b/tests/align/talign.nim @@ -1,5 +1,6 @@ discard """ ccodeCheck: "\\i @'NIM_ALIGN(128) NI mylocal1' .*" +matrix: "--mm:refc -d:useGcAssert -d:useSysAssert; --mm:orc" targets: "c cpp" output: "align ok" """ @@ -67,3 +68,104 @@ block: # bug #22419 f()() + +type Xxx = object + v {.align: 128.}: byte + +type Yyy = object + v: byte + v2: Xxx + +for i in 0..<3: + let x = new Yyy + # echo "addr v2.v:", cast[uint](addr x.v2.v) + doAssert cast[uint](addr x.v2.v) mod 128 == 0 + +let m = new Yyy +m.v2.v = 42 +doAssert m.v2.v == 42 +m.v = 7 +doAssert m.v == 7 + + +type + MyType16 = object + a {.align(16).}: int + + +var x: array[10, ref MyType16] +for q in 0..500: + for i in 0..