mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 01:14:41 +00:00
threaded alloc (#20492)
* allocator: catch up with multi-threading techniques * removed the global thread lock * more atomics for fun and profit * added important sysAssert * stats remain thread local and don't have to be atomic * undo split chunk optimizations in the hope it makes the CI happy
This commit is contained in:
@@ -44,44 +44,6 @@ type
|
||||
IntSet = object
|
||||
data: TrunkBuckets
|
||||
|
||||
type
|
||||
FreeCell {.final, pure.} = object
|
||||
next: ptr FreeCell # next free cell in chunk (overlaid with refcount)
|
||||
when not defined(gcDestructors):
|
||||
zeroField: int # 0 means cell is not used (overlaid with typ field)
|
||||
# 1 means cell is manually managed pointer
|
||||
# otherwise a PNimType is stored in there
|
||||
else:
|
||||
alignment: int
|
||||
|
||||
PChunk = ptr BaseChunk
|
||||
PBigChunk = ptr BigChunk
|
||||
PSmallChunk = ptr SmallChunk
|
||||
BaseChunk {.pure, inheritable.} = object
|
||||
prevSize: int # size of previous chunk; for coalescing
|
||||
# 0th bit == 1 if 'used
|
||||
size: int # if < PageSize it is a small chunk
|
||||
|
||||
SmallChunk = object of BaseChunk
|
||||
next, prev: PSmallChunk # chunks of the same size
|
||||
freeList: ptr FreeCell
|
||||
free: int # how many bytes remain
|
||||
acc: int # accumulator for small object allocation
|
||||
when defined(nimAlignPragma):
|
||||
data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
|
||||
else:
|
||||
data: UncheckedArray[byte]
|
||||
|
||||
BigChunk = object of BaseChunk # not necessarily > PageSize!
|
||||
next, prev: PBigChunk # chunks of the same (or bigger) size
|
||||
when defined(nimAlignPragma):
|
||||
data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
|
||||
else:
|
||||
data: UncheckedArray[byte]
|
||||
|
||||
template smallChunkOverhead(): untyped = sizeof(SmallChunk)
|
||||
template bigChunkOverhead(): untyped = sizeof(BigChunk)
|
||||
|
||||
# ------------- chunk table ---------------------------------------------------
|
||||
# We use a PtrSet of chunk starts and a table[Page, chunksize] for chunk
|
||||
# endings of big chunks. This is needed by the merging operation. The only
|
||||
@@ -102,6 +64,47 @@ type
|
||||
key, upperBound: int
|
||||
level: int
|
||||
|
||||
const
|
||||
RegionHasLock = false # hasThreadSupport and defined(gcDestructors)
|
||||
|
||||
type
|
||||
FreeCell {.final, pure.} = object
|
||||
next: ptr FreeCell # next free cell in chunk (overlaid with refcount)
|
||||
when not defined(gcDestructors):
|
||||
zeroField: int # 0 means cell is not used (overlaid with typ field)
|
||||
# 1 means cell is manually managed pointer
|
||||
# otherwise a PNimType is stored in there
|
||||
else:
|
||||
alignment: int
|
||||
|
||||
PChunk = ptr BaseChunk
|
||||
PBigChunk = ptr BigChunk
|
||||
PSmallChunk = ptr SmallChunk
|
||||
BaseChunk {.pure, inheritable.} = object
|
||||
prevSize: int # size of previous chunk; for coalescing
|
||||
# 0th bit == 1 if 'used
|
||||
size: int # if < PageSize it is a small chunk
|
||||
owner: ptr MemRegion
|
||||
|
||||
SmallChunk = object of BaseChunk
|
||||
next, prev: PSmallChunk # chunks of the same size
|
||||
freeList: ptr FreeCell
|
||||
free: int # how many bytes remain
|
||||
acc: int # accumulator for small object allocation
|
||||
when defined(gcDestructors):
|
||||
sharedFreeList: ptr FreeCell # make no attempt at avoiding false sharing for now for this object field
|
||||
when defined(nimAlignPragma):
|
||||
data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
|
||||
else:
|
||||
data: UncheckedArray[byte]
|
||||
|
||||
BigChunk = object of BaseChunk # not necessarily > PageSize!
|
||||
next, prev: PBigChunk # chunks of the same (or bigger) size
|
||||
when defined(nimAlignPragma):
|
||||
data {.align: MemAlign.}: UncheckedArray[byte] # start of usable memory
|
||||
else:
|
||||
data: UncheckedArray[byte]
|
||||
|
||||
HeapLinks = object
|
||||
len: int
|
||||
chunks: array[30, (PBigChunk, int)]
|
||||
@@ -117,10 +120,15 @@ type
|
||||
llmem: PLLChunk
|
||||
currMem, maxMem, freeMem, occ: int # memory sizes (allocated from OS)
|
||||
lastSize: int # needed for the case that OS gives us pages linearly
|
||||
when RegionHasLock:
|
||||
lock: SysLock
|
||||
when defined(gcDestructors):
|
||||
sharedFreeListBigChunks: PBigChunk # make no attempt at avoiding false sharing for now for this object field
|
||||
|
||||
chunkStarts: IntSet
|
||||
when not defined(gcDestructors):
|
||||
root, deleted, last, freeAvlNodes: PAvlNode
|
||||
locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages.
|
||||
lockActive, locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages.
|
||||
nextChunkSize: int
|
||||
when not defined(gcDestructors):
|
||||
bottomData: AvlNode
|
||||
@@ -128,6 +136,24 @@ type
|
||||
when defined(nimTypeNames):
|
||||
allocCounter, deallocCounter: int
|
||||
|
||||
template smallChunkOverhead(): untyped = sizeof(SmallChunk)
|
||||
template bigChunkOverhead(): untyped = sizeof(BigChunk)
|
||||
|
||||
when hasThreadSupport:
|
||||
template loada(x: untyped): untyped = atomicLoadN(unsafeAddr x, ATOMIC_RELAXED)
|
||||
template storea(x, y: untyped) = atomicStoreN(unsafeAddr x, y, ATOMIC_RELAXED)
|
||||
|
||||
when false:
|
||||
# not yet required
|
||||
template atomicStatDec(x, diff: untyped) = discard atomicSubFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
|
||||
template atomicStatInc(x, diff: untyped) = discard atomicAddFetch(unsafeAddr x, diff, ATOMIC_RELAXED)
|
||||
else:
|
||||
template loada(x: untyped): untyped = x
|
||||
template storea(x, y: untyped) = x = y
|
||||
|
||||
template atomicStatDec(x, diff: untyped) = dec x, diff
|
||||
template atomicStatInc(x, diff: untyped) = inc x, diff
|
||||
|
||||
const
|
||||
fsLookupTable: array[byte, int8] = [
|
||||
-1'i8, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
@@ -234,11 +260,11 @@ proc addChunkToMatrix(a: var MemRegion; b: PBigChunk) =
|
||||
setBit(fl, a.flBitmap)
|
||||
|
||||
proc incCurrMem(a: var MemRegion, bytes: int) {.inline.} =
|
||||
inc(a.currMem, bytes)
|
||||
atomicStatInc(a.currMem, bytes)
|
||||
|
||||
proc decCurrMem(a: var MemRegion, bytes: int) {.inline.} =
|
||||
a.maxMem = max(a.maxMem, a.currMem)
|
||||
dec(a.currMem, bytes)
|
||||
atomicStatDec(a.currMem, bytes)
|
||||
|
||||
proc getMaxMem(a: var MemRegion): int =
|
||||
# Since we update maxPagesCount only when freeing pages,
|
||||
@@ -536,12 +562,13 @@ proc splitChunk2(a: var MemRegion, c: PBigChunk, size: int): PBigChunk =
|
||||
result = cast[PBigChunk](cast[ByteAddress](c) +% size)
|
||||
result.size = c.size - size
|
||||
track("result.size", addr result.size, sizeof(int))
|
||||
# XXX check if these two nil assignments are dead code given
|
||||
# addChunkToMatrix's implementation:
|
||||
result.next = nil
|
||||
result.prev = nil
|
||||
when not defined(nimOptimizedSplitChunk):
|
||||
# still active because of weird codegen issue on some of our CIs:
|
||||
result.next = nil
|
||||
result.prev = nil
|
||||
# size and not used:
|
||||
result.prevSize = size
|
||||
result.owner = addr a
|
||||
sysAssert((size and 1) == 0, "splitChunk 2")
|
||||
sysAssert((size and PageMask) == 0,
|
||||
"splitChunk: size is not a multiple of the PageSize")
|
||||
@@ -572,6 +599,9 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) =
|
||||
c = cast[PBigChunk](le)
|
||||
if c.size > MaxBigChunkSize:
|
||||
let rest = splitChunk2(a, c, MaxBigChunkSize)
|
||||
when defined(nimOptimizedSplitChunk):
|
||||
rest.next = nil
|
||||
rest.prev = nil
|
||||
addChunkToMatrix(a, c)
|
||||
c = rest
|
||||
when coalescRight:
|
||||
@@ -596,6 +626,13 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
|
||||
mappingSearch(size, fl, sl)
|
||||
sysAssert((size and PageMask) == 0, "getBigChunk: unaligned chunk")
|
||||
result = findSuitableBlock(a, fl, sl)
|
||||
|
||||
when RegionHasLock:
|
||||
if not a.lockActive:
|
||||
a.lockActive = true
|
||||
initSysLock(a.lock)
|
||||
acquireSys a.lock
|
||||
|
||||
if result == nil:
|
||||
if size < nimMinHeapPages * PageSize:
|
||||
result = requestOsChunks(a, nimMinHeapPages * PageSize)
|
||||
@@ -605,6 +642,7 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
|
||||
# if we over allocated split the chunk:
|
||||
if result.size > size:
|
||||
splitChunk(a, result, size)
|
||||
result.owner = addr a
|
||||
else:
|
||||
removeChunkFromMatrix2(a, result, fl, sl)
|
||||
if result.size >= size + PageSize:
|
||||
@@ -612,12 +650,20 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk =
|
||||
# set 'used' to to true:
|
||||
result.prevSize = 1
|
||||
track("setUsedToFalse", addr result.size, sizeof(int))
|
||||
sysAssert result.owner == addr a, "getBigChunk: No owner set!"
|
||||
|
||||
incl(a, a.chunkStarts, pageIndex(result))
|
||||
dec(a.freeMem, size)
|
||||
when RegionHasLock:
|
||||
releaseSys a.lock
|
||||
|
||||
proc getHugeChunk(a: var MemRegion; size: int): PBigChunk =
|
||||
result = cast[PBigChunk](osAllocPages(size))
|
||||
when RegionHasLock:
|
||||
if not a.lockActive:
|
||||
a.lockActive = true
|
||||
initSysLock(a.lock)
|
||||
acquireSys a.lock
|
||||
incCurrMem(a, size)
|
||||
# XXX add this to the heap links. But also remove it from it later.
|
||||
when false: a.addHeapLink(result, size)
|
||||
@@ -627,7 +673,10 @@ proc getHugeChunk(a: var MemRegion; size: int): PBigChunk =
|
||||
result.size = size
|
||||
# set 'used' to to true:
|
||||
result.prevSize = 1
|
||||
result.owner = addr a
|
||||
incl(a, a.chunkStarts, pageIndex(result))
|
||||
when RegionHasLock:
|
||||
releaseSys a.lock
|
||||
|
||||
proc freeHugeChunk(a: var MemRegion; c: PBigChunk) =
|
||||
let size = c.size
|
||||
@@ -691,6 +740,69 @@ else:
|
||||
template trackSize(x) = discard
|
||||
template untrackSize(x) = discard
|
||||
|
||||
proc deallocBigChunk(a: var MemRegion, c: PBigChunk) =
|
||||
when RegionHasLock:
|
||||
acquireSys a.lock
|
||||
dec a.occ, c.size
|
||||
untrackSize(c.size)
|
||||
sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
|
||||
when not defined(gcDestructors):
|
||||
a.deleted = getBottom(a)
|
||||
del(a, a.root, cast[int](addr(c.data)))
|
||||
if c.size >= HugeChunkSize: freeHugeChunk(a, c)
|
||||
else: freeBigChunk(a, c)
|
||||
when RegionHasLock:
|
||||
releaseSys a.lock
|
||||
|
||||
when defined(gcDestructors):
|
||||
template atomicPrepend(head, elem: untyped) =
|
||||
# see also https://en.cppreference.com/w/cpp/atomic/atomic_compare_exchange
|
||||
while true:
|
||||
elem.next.storea head.loada
|
||||
if atomicCompareExchangeN(addr head, addr elem.next, elem, weak = true, ATOMIC_RELEASE, ATOMIC_RELAXED):
|
||||
break
|
||||
|
||||
proc addToSharedFreeListBigChunks(a: var MemRegion; c: PBigChunk) {.inline.} =
|
||||
sysAssert c.next == nil, "c.next pointer must be nil"
|
||||
atomicPrepend a.sharedFreeListBigChunks, c
|
||||
|
||||
proc addToSharedFreeList(c: PSmallChunk; f: ptr FreeCell) {.inline.} =
|
||||
atomicPrepend c.sharedFreeList, f
|
||||
|
||||
proc compensateCounters(a: var MemRegion; c: PSmallChunk; size: int) =
|
||||
# rawDealloc did NOT do the usual:
|
||||
# `inc(c.free, size); dec(a.occ, size)` because it wasn't the owner of these
|
||||
# memory locations. We have to compensate here for these for the entire list.
|
||||
# Well, not for the entire list, but for `max` elements of the list because
|
||||
# we split the list in order to achieve bounded response times.
|
||||
var it = c.freeList
|
||||
var x = 0
|
||||
var maxIters = 20 # make it time-bounded
|
||||
while it != nil:
|
||||
if maxIters == 0:
|
||||
let rest = it.next.loada
|
||||
it.next.storea nil
|
||||
addToSharedFreeList(c, rest)
|
||||
break
|
||||
inc x, size
|
||||
it = it.next.loada
|
||||
dec maxIters
|
||||
inc(c.free, x)
|
||||
dec(a.occ, x)
|
||||
|
||||
proc freeDeferredObjects(a: var MemRegion; root: PBigChunk) =
|
||||
var it = root
|
||||
var maxIters = 20 # make it time-bounded
|
||||
while true:
|
||||
if maxIters == 0:
|
||||
let rest = it.next.loada
|
||||
it.next.storea nil
|
||||
addToSharedFreeListBigChunks(a, rest)
|
||||
break
|
||||
it = it.next.loada
|
||||
dec maxIters
|
||||
if it == nil: break
|
||||
|
||||
proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
when defined(nimTypeNames):
|
||||
inc(a.allocCounter)
|
||||
@@ -703,7 +815,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
#c_fprintf(stdout, "alloc; size: %ld; %ld\n", requestedSize, size)
|
||||
if size <= SmallChunkSize-smallChunkOverhead():
|
||||
# allocate a small block: for small chunks, we use only its next pointer
|
||||
var s = size div MemAlign
|
||||
let s = size div MemAlign
|
||||
var c = a.freeSmallChunks[s]
|
||||
if c == nil:
|
||||
c = getSmallChunk(a)
|
||||
@@ -711,7 +823,10 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
sysAssert c.size == PageSize, "rawAlloc 3"
|
||||
c.size = size
|
||||
c.acc = size
|
||||
when defined(gcDestructors):
|
||||
c.sharedFreeList = nil
|
||||
c.free = SmallChunkSize - smallChunkOverhead() - size
|
||||
sysAssert c.owner == addr(a), "rawAlloc: No owner set!"
|
||||
c.next = nil
|
||||
c.prev = nil
|
||||
listAdd(a.freeSmallChunks[s], c)
|
||||
@@ -723,6 +838,10 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
#if c.size != size:
|
||||
# c_fprintf(stdout, "csize: %lld; size %lld\n", c.size, size)
|
||||
sysAssert c.size == size, "rawAlloc 6"
|
||||
when defined(gcDestructors):
|
||||
if c.freeList == nil:
|
||||
c.freeList = atomicExchangeN(addr c.sharedFreeList, nil, ATOMIC_RELAXED)
|
||||
compensateCounters(a, c, size)
|
||||
if c.freeList == nil:
|
||||
sysAssert(c.acc + smallChunkOverhead() + size <= SmallChunkSize,
|
||||
"rawAlloc 7")
|
||||
@@ -747,6 +866,11 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer =
|
||||
inc a.occ, size
|
||||
trackSize(c.size)
|
||||
else:
|
||||
when defined(gcDestructors):
|
||||
let deferredFrees = atomicExchangeN(addr a.sharedFreeListBigChunks, nil, ATOMIC_RELAXED)
|
||||
if deferredFrees != nil:
|
||||
freeDeferredObjects(a, deferredFrees)
|
||||
|
||||
size = requestedSize + bigChunkOverhead() # roundup(requestedSize+bigChunkOverhead(), PageSize)
|
||||
# allocate a large block
|
||||
var c = if size >= HugeChunkSize: getHugeChunk(a, size)
|
||||
@@ -779,48 +903,52 @@ proc rawDealloc(a: var MemRegion, p: pointer) =
|
||||
# `p` is within a small chunk:
|
||||
var c = cast[PSmallChunk](c)
|
||||
var s = c.size
|
||||
dec a.occ, s
|
||||
untrackSize(s)
|
||||
sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
|
||||
sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
|
||||
s == 0, "rawDealloc 3")
|
||||
# ^ We might access thread foreign storage here.
|
||||
# The other thread cannot possibly free this block as it's still alive.
|
||||
var f = cast[ptr FreeCell](p)
|
||||
when not defined(gcDestructors):
|
||||
#echo("setting to nil: ", $cast[ByteAddress](addr(f.zeroField)))
|
||||
sysAssert(f.zeroField != 0, "rawDealloc 1")
|
||||
f.zeroField = 0
|
||||
f.next = c.freeList
|
||||
c.freeList = f
|
||||
when overwriteFree:
|
||||
# set to 0xff to check for usage after free bugs:
|
||||
nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
|
||||
s -% sizeof(FreeCell))
|
||||
# check if it is not in the freeSmallChunks[s] list:
|
||||
if c.free < s:
|
||||
# add it to the freeSmallChunks[s] array:
|
||||
listAdd(a.freeSmallChunks[s div MemAlign], c)
|
||||
inc(c.free, s)
|
||||
if c.owner == addr(a):
|
||||
# We own the block, there is no foreign thread involved.
|
||||
dec a.occ, s
|
||||
untrackSize(s)
|
||||
sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)"
|
||||
sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
|
||||
s == 0, "rawDealloc 3")
|
||||
when not defined(gcDestructors):
|
||||
#echo("setting to nil: ", $cast[ByteAddress](addr(f.zeroField)))
|
||||
sysAssert(f.zeroField != 0, "rawDealloc 1")
|
||||
f.zeroField = 0
|
||||
f.next = c.freeList
|
||||
c.freeList = f
|
||||
when overwriteFree:
|
||||
# set to 0xff to check for usage after free bugs:
|
||||
nimSetMem(cast[pointer](cast[int](p) +% sizeof(FreeCell)), -1'i32,
|
||||
s -% sizeof(FreeCell))
|
||||
# check if it is not in the freeSmallChunks[s] list:
|
||||
if c.free < s:
|
||||
# add it to the freeSmallChunks[s] array:
|
||||
listAdd(a.freeSmallChunks[s div MemAlign], c)
|
||||
inc(c.free, s)
|
||||
else:
|
||||
inc(c.free, s)
|
||||
if c.free == SmallChunkSize-smallChunkOverhead():
|
||||
listRemove(a.freeSmallChunks[s div MemAlign], c)
|
||||
c.size = SmallChunkSize
|
||||
freeBigChunk(a, cast[PBigChunk](c))
|
||||
else:
|
||||
inc(c.free, s)
|
||||
if c.free == SmallChunkSize-smallChunkOverhead():
|
||||
listRemove(a.freeSmallChunks[s div MemAlign], c)
|
||||
c.size = SmallChunkSize
|
||||
freeBigChunk(a, cast[PBigChunk](c))
|
||||
when defined(gcDestructors):
|
||||
addToSharedFreeList(c, f)
|
||||
sysAssert(((cast[ByteAddress](p) and PageMask) - smallChunkOverhead()) %%
|
||||
s == 0, "rawDealloc 2")
|
||||
else:
|
||||
# set to 0xff to check for usage after free bugs:
|
||||
when overwriteFree: nimSetMem(p, -1'i32, c.size -% bigChunkOverhead())
|
||||
# free big chunk
|
||||
var c = cast[PBigChunk](c)
|
||||
dec a.occ, c.size
|
||||
untrackSize(c.size)
|
||||
sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case B)"
|
||||
when not defined(gcDestructors):
|
||||
a.deleted = getBottom(a)
|
||||
del(a, a.root, cast[int](addr(c.data)))
|
||||
if c.size >= HugeChunkSize: freeHugeChunk(a, c)
|
||||
else: freeBigChunk(a, c)
|
||||
when defined(gcDestructors):
|
||||
if c.owner == addr(a):
|
||||
deallocBigChunk(a, cast[PBigChunk](c))
|
||||
else:
|
||||
addToSharedFreeListBigChunks(c.owner[], cast[PBigChunk](c))
|
||||
else:
|
||||
deallocBigChunk(a, cast[PBigChunk](c))
|
||||
sysAssert(allocInv(a), "rawDealloc: end")
|
||||
when logAlloc: cprintf("dealloc(pointer_%p)\n", p)
|
||||
|
||||
@@ -1002,7 +1130,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
|
||||
inc(result, it.size)
|
||||
it = it.next
|
||||
|
||||
when hasThreadSupport:
|
||||
when hasThreadSupport and not defined(gcDestructors):
|
||||
proc addSysExitProc(quitProc: proc() {.noconv.}) {.importc: "atexit", header: "<stdlib.h>".}
|
||||
|
||||
var sharedHeap: MemRegion
|
||||
@@ -1012,36 +1140,16 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
|
||||
|
||||
proc getFreeMem(): int =
|
||||
#sysAssert(result == countFreeMem())
|
||||
when hasThreadSupport and defined(gcDestructors):
|
||||
acquireSys(heapLock)
|
||||
result = sharedHeap.freeMem
|
||||
releaseSys(heapLock)
|
||||
else:
|
||||
result = allocator.freeMem
|
||||
result = allocator.freeMem
|
||||
|
||||
proc getTotalMem(): int =
|
||||
when hasThreadSupport and defined(gcDestructors):
|
||||
acquireSys(heapLock)
|
||||
result = sharedHeap.currMem
|
||||
releaseSys(heapLock)
|
||||
else:
|
||||
result = allocator.currMem
|
||||
result = allocator.currMem
|
||||
|
||||
proc getOccupiedMem(): int =
|
||||
when hasThreadSupport and defined(gcDestructors):
|
||||
acquireSys(heapLock)
|
||||
result = sharedHeap.occ
|
||||
releaseSys(heapLock)
|
||||
else:
|
||||
result = allocator.occ #getTotalMem() - getFreeMem()
|
||||
result = allocator.occ #getTotalMem() - getFreeMem()
|
||||
|
||||
proc getMaxMem*(): int =
|
||||
when hasThreadSupport and defined(gcDestructors):
|
||||
acquireSys(heapLock)
|
||||
result = getMaxMem(sharedHeap)
|
||||
releaseSys(heapLock)
|
||||
else:
|
||||
result = getMaxMem(allocator)
|
||||
result = getMaxMem(allocator)
|
||||
|
||||
when defined(nimTypeNames):
|
||||
proc getMemCounters*(): (int, int) = getMemCounters(allocator)
|
||||
@@ -1049,7 +1157,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
|
||||
# -------------------- shared heap region ----------------------------------
|
||||
|
||||
proc allocSharedImpl(size: Natural): pointer =
|
||||
when hasThreadSupport:
|
||||
when hasThreadSupport and not defined(gcDestructors):
|
||||
acquireSys(heapLock)
|
||||
result = alloc(sharedHeap, size)
|
||||
releaseSys(heapLock)
|
||||
@@ -1061,7 +1169,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
|
||||
zeroMem(result, size)
|
||||
|
||||
proc deallocSharedImpl(p: pointer) =
|
||||
when hasThreadSupport:
|
||||
when hasThreadSupport and not defined(gcDestructors):
|
||||
acquireSys(heapLock)
|
||||
dealloc(sharedHeap, p)
|
||||
releaseSys(heapLock)
|
||||
@@ -1069,7 +1177,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
|
||||
deallocImpl(p)
|
||||
|
||||
proc reallocSharedImpl(p: pointer, newSize: Natural): pointer =
|
||||
when hasThreadSupport:
|
||||
when hasThreadSupport and not defined(gcDestructors):
|
||||
acquireSys(heapLock)
|
||||
result = realloc(sharedHeap, p, newSize)
|
||||
releaseSys(heapLock)
|
||||
@@ -1077,7 +1185,7 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
|
||||
result = reallocImpl(p, newSize)
|
||||
|
||||
proc reallocShared0Impl(p: pointer, oldSize, newSize: Natural): pointer =
|
||||
when hasThreadSupport:
|
||||
when hasThreadSupport and not defined(gcDestructors):
|
||||
acquireSys(heapLock)
|
||||
result = realloc0(sharedHeap, p, oldSize, newSize)
|
||||
releaseSys(heapLock)
|
||||
@@ -1085,20 +1193,31 @@ template instantiateForRegion(allocator: untyped) {.dirty.} =
|
||||
result = realloc0Impl(p, oldSize, newSize)
|
||||
|
||||
when hasThreadSupport:
|
||||
template sharedMemStatsShared(v: int) =
|
||||
acquireSys(heapLock)
|
||||
result = v
|
||||
releaseSys(heapLock)
|
||||
when defined(gcDestructors):
|
||||
proc getFreeSharedMem(): int =
|
||||
allocator.freeMem
|
||||
|
||||
proc getFreeSharedMem(): int =
|
||||
sharedMemStatsShared(sharedHeap.freeMem)
|
||||
proc getTotalSharedMem(): int =
|
||||
allocator.currMem
|
||||
|
||||
proc getTotalSharedMem(): int =
|
||||
sharedMemStatsShared(sharedHeap.currMem)
|
||||
proc getOccupiedSharedMem(): int =
|
||||
allocator.occ
|
||||
|
||||
proc getOccupiedSharedMem(): int =
|
||||
sharedMemStatsShared(sharedHeap.occ)
|
||||
#sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem)
|
||||
else:
|
||||
template sharedMemStatsShared(v: int) =
|
||||
acquireSys(heapLock)
|
||||
result = v
|
||||
releaseSys(heapLock)
|
||||
|
||||
proc getFreeSharedMem(): int =
|
||||
sharedMemStatsShared(sharedHeap.freeMem)
|
||||
|
||||
proc getTotalSharedMem(): int =
|
||||
sharedMemStatsShared(sharedHeap.currMem)
|
||||
|
||||
proc getOccupiedSharedMem(): int =
|
||||
sharedMemStatsShared(sharedHeap.occ)
|
||||
#sharedMemStatsShared(sharedHeap.currMem - sharedHeap.freeMem)
|
||||
{.pop.}
|
||||
|
||||
{.pop.}
|
||||
|
||||
@@ -17,7 +17,7 @@ type
|
||||
AtomType* = SomeNumber|pointer|ptr|char|bool
|
||||
## Type Class representing valid types for use with atomic procs
|
||||
|
||||
when someGcc and hasThreadSupport:
|
||||
when someGcc:
|
||||
type AtomMemModel* = distinct cint
|
||||
|
||||
var ATOMIC_RELAXED* {.importc: "__ATOMIC_RELAXED", nodecl.}: AtomMemModel
|
||||
@@ -164,7 +164,7 @@ when someGcc and hasThreadSupport:
|
||||
## ignore this parameter.
|
||||
|
||||
template fence*() = atomicThreadFence(ATOMIC_SEQ_CST)
|
||||
elif someVcc and hasThreadSupport:
|
||||
elif someVcc:
|
||||
type AtomMemModel* = distinct cint
|
||||
|
||||
const
|
||||
|
||||
@@ -53,13 +53,23 @@ when defined(nimPreviewSlimSystem):
|
||||
const
|
||||
hasAllocStack = defined(zephyr) # maybe freertos too?
|
||||
|
||||
when defined(gcDestructors):
|
||||
proc allocThreadStorage(size: int): pointer =
|
||||
result = c_malloc(csize_t size)
|
||||
zeroMem(result, size)
|
||||
|
||||
proc deallocThreadStorage(p: pointer) = c_free(p)
|
||||
else:
|
||||
template allocThreadStorage(size: untyped): untyped = allocShared0(size)
|
||||
template deallocThreadStorage(p: pointer) = deallocShared(p)
|
||||
|
||||
when hasAllocStack or defined(zephyr) or defined(freertos):
|
||||
const
|
||||
nimThreadStackSize {.intdefine.} = 8192
|
||||
nimThreadStackSize {.intdefine.} = 8192
|
||||
nimThreadStackGuard {.intdefine.} = 128
|
||||
|
||||
StackGuardSize = nimThreadStackGuard
|
||||
ThreadStackSize = nimThreadStackSize - nimThreadStackGuard
|
||||
StackGuardSize = nimThreadStackGuard
|
||||
ThreadStackSize = nimThreadStackSize - nimThreadStackGuard
|
||||
else:
|
||||
const
|
||||
StackGuardSize = 4096
|
||||
@@ -176,7 +186,7 @@ else:
|
||||
finally:
|
||||
afterThreadRuns()
|
||||
when hasAllocStack:
|
||||
deallocShared(thrd.rawStack)
|
||||
deallocThreadStorage(thrd.rawStack)
|
||||
|
||||
proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) {.raises: [].} =
|
||||
when defined(boehmgc):
|
||||
@@ -207,7 +217,7 @@ template threadProcWrapperBody(closure: untyped): untyped =
|
||||
# mark as not running anymore:
|
||||
thrd.core = nil
|
||||
thrd.dataFn = nil
|
||||
deallocShared(cast[pointer](core))
|
||||
deallocThreadStorage(cast[pointer](core))
|
||||
|
||||
{.push stack_trace:off.}
|
||||
when defined(windows):
|
||||
@@ -278,7 +288,7 @@ when false:
|
||||
t.dataFn = nil
|
||||
## if thread `t` already exited, `t.core` will be `null`.
|
||||
if not isNil(t.core):
|
||||
deallocShared(t.core)
|
||||
deallocThreadStorage(t.core)
|
||||
t.core = nil
|
||||
|
||||
when hostOS == "windows":
|
||||
@@ -290,7 +300,7 @@ when hostOS == "windows":
|
||||
## Entry point is the proc `tp`.
|
||||
## `param` is passed to `tp`. `TArg` can be `void` if you
|
||||
## don't need to pass any data to the thread.
|
||||
t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
|
||||
t.core = cast[PGcThread](allocThreadStorage(sizeof(GcThread)))
|
||||
|
||||
when TArg isnot void: t.data = param
|
||||
t.dataFn = tp
|
||||
@@ -315,7 +325,7 @@ elif defined(genode):
|
||||
proc createThread*[TArg](t: var Thread[TArg],
|
||||
tp: proc (arg: TArg) {.thread, nimcall.},
|
||||
param: TArg) =
|
||||
t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
|
||||
t.core = cast[PGcThread](allocThreadStorage(sizeof(GcThread)))
|
||||
|
||||
when TArg isnot void: t.data = param
|
||||
t.dataFn = tp
|
||||
@@ -339,7 +349,7 @@ else:
|
||||
## Entry point is the proc `tp`. `param` is passed to `tp`.
|
||||
## `TArg` can be `void` if you
|
||||
## don't need to pass any data to the thread.
|
||||
t.core = cast[PGcThread](allocShared0(sizeof(GcThread)))
|
||||
t.core = cast[PGcThread](allocThreadStorage(sizeof(GcThread)))
|
||||
|
||||
when TArg isnot void: t.data = param
|
||||
t.dataFn = tp
|
||||
@@ -348,7 +358,7 @@ else:
|
||||
doAssert pthread_attr_init(a) == 0
|
||||
when hasAllocStack:
|
||||
var
|
||||
rawstk = allocShared0(ThreadStackSize + StackGuardSize)
|
||||
rawstk = allocThreadStorage(ThreadStackSize + StackGuardSize)
|
||||
stk = cast[pointer](cast[uint](rawstk) + StackGuardSize)
|
||||
let setstacksizeResult = pthread_attr_setstack(addr a, stk, ThreadStackSize)
|
||||
t.rawStack = rawstk
|
||||
|
||||
Reference in New Issue
Block a user