threads clean up their heap

This commit is contained in:
Araq
2011-06-04 23:55:10 +02:00
parent 5008b44467
commit 24ed9d560f
9 changed files with 93 additions and 46 deletions

View File

@@ -765,11 +765,15 @@ proc genIfExpr(p: BProc, n: PNode, d: var TLoc) =
genAssignment(p, d, tmp, {}) # no need for deep copying
proc genEcho(p: BProc, n: PNode) =
# this unusal way of implementing it ensures that e.g. ``echo("hallo", 45)``
# is threadsafe.
var args: PRope = nil
var a: TLoc
for i in countup(1, sonsLen(n) - 1):
for i in countup(1, n.len-1):
initLocExpr(p, n.sons[i], a)
appcg(p, cpsStmts, "#rawEcho($1);$n", [rdLoc(a)])
appcg(p, cpsStmts, "#rawEchoNL();$n")
appf(args, ", ($1)->data", [rdLoc(a)])
appcg(p, cpsStmts, "printf(\"" & repeatStr(n.len-1, "%s") &
"\\n\"$1);$n", [args])
proc genCall(p: BProc, t: PNode, d: var TLoc) =
var op, a: TLoc

View File

@@ -527,6 +527,8 @@ proc LookUpForDefined(c: PContext, n: PNode, onlyCurrentScope: bool): PSym =
GlobalError(n.sons[1].info, errIdentifierExpected, "")
of nkAccQuoted:
result = lookupForDefined(c, considerAcc(n), onlyCurrentScope)
of nkSym:
result = n.sym
else:
GlobalError(n.info, errIdentifierExpected, renderTree(n))
result = nil

View File

@@ -778,7 +778,7 @@ proc compileOption*(option, arg: string): bool {.
const
hasThreadSupport = compileOption("threads")
hasSharedHeap = false # don't share heaps, so every thread has its own heap
hasSharedHeap = defined(boehmgc) # don't share heaps; every thread has its own
when hasThreadSupport and not hasSharedHeap:
{.pragma: rtlThreadVar, threadvar.}
@@ -1040,8 +1040,6 @@ proc getRefcount*[T](x: ref T): int {.importc: "getRefcount", noSideEffect.}
## retrieves the reference count of an heap-allocated object. The
## value is implementation-dependant.
#proc writeStackTrace() {.export: "writeStackTrace".}
# new constants:
const
inf* {.magic: "Inf".} = 1.0 / 0.0
@@ -1386,6 +1384,12 @@ var
## set this variable to provide a procedure that should be called before
## each executed instruction. This should only be used by debuggers!
## Only code compiled with the ``debugger:on`` switch calls this hook.
raiseHook*: proc (e: ref E_Base): bool
## with this hook you can influence exception handling on a global level.
## If not nil, every 'raise' statement ends up calling this hook. Ordinary
## application code should never set this hook! You better know what you
## do when setting this. If ``raiseHook`` returns false, the exception
## is caught and does not propagate further through the call stack.
type
PFrame = ptr TFrame
@@ -1420,8 +1424,13 @@ else:
"""
proc echo*[Ty](x: openarray[Ty]) {.magic: "Echo".}
## equivalent to ``writeln(stdout, x); flush(stdout)``. BUT: This is
## available for the ECMAScript target too!
## special built-in that takes a variable number of arguments. Each argument
## is converted to a string via ``$``, so it works for user-defined
## types that have an overloaded ``$`` operator.
## It is roughly equivalent to ``writeln(stdout, x); flush(stdout)``, but
## available for the ECMAScript target too.
## Unlike other IO operations this is guaranteed to be thread-safe as
## ``echo`` is very often used for debugging convenience.
template newException*(exceptn, message: expr): expr =
## creates an exception object of type ``exceptn`` and sets its ``msg`` field

View File

@@ -91,7 +91,7 @@ type
key: int # start address at bit 0
bits: array[0..IntsPerTrunk-1, int] # a bit vector
TTrunkBuckets = array[0..1023, PTrunk]
TTrunkBuckets = array[0..255, PTrunk]
TIntSet {.final.} = object
data: TTrunkBuckets
@@ -119,8 +119,7 @@ type
data: TAlignType # start of usable memory
TBigChunk = object of TBaseChunk # not necessarily > PageSize!
next: PBigChunk # chunks of the same (or bigger) size
prev: PBigChunk
next, prev: PBigChunk # chunks of the same (or bigger) size
align: int
data: TAlignType # start of usable memory
@@ -148,6 +147,7 @@ type
TLLChunk {.pure.} = object ## *low-level* chunk
size: int # remaining size
acc: int # accumulator
next: PLLChunk # next low-level chunk; only needed for dealloc
TAllocator {.final, pure.} = object
llmem: PLLChunk
@@ -172,18 +172,31 @@ proc getMaxMem(a: var TAllocator): int =
proc llAlloc(a: var TAllocator, size: int): pointer =
# *low-level* alloc for the memory managers data structures. Deallocation
# is never done.
# is done at he end of the allocator's life time.
if a.llmem == nil or size > a.llmem.size:
var request = roundup(size+sizeof(TLLChunk), PageSize)
a.llmem = cast[PLLChunk](osAllocPages(request))
incCurrMem(a, request)
a.llmem.size = request - sizeof(TLLChunk)
# the requested size is ``roundup(size+sizeof(TLLChunk), PageSize)``, but
# since we know ``size`` is a (small) constant, we know the requested size
# is one page:
assert roundup(size+sizeof(TLLChunk), PageSize) == PageSize
var old = a.llmem # can be nil and is correct with nil
a.llmem = cast[PLLChunk](osAllocPages(PageSize))
incCurrMem(a, PageSize)
a.llmem.size = PageSize - sizeof(TLLChunk)
a.llmem.acc = sizeof(TLLChunk)
a.llmem.next = old
result = cast[pointer](cast[TAddress](a.llmem) + a.llmem.acc)
dec(a.llmem.size, size)
inc(a.llmem.acc, size)
zeroMem(result, size)
proc llDeallocAll(a: var TAllocator) =
var it = a.llmem
while it != nil:
# we know each block in the list has the size of 1 page:
var next = it.next
osDeallocPages(it, PageSize)
it = next
proc IntSetGet(t: TIntSet, key: int): PTrunk =
var it = t.data[key and high(t.data)]
while it != nil:
@@ -218,6 +231,24 @@ proc Excl(s: var TIntSet, key: int) =
var u = key and TrunkMask
t.bits[u shr IntShift] = t.bits[u shr IntShift] and not
(1 shl (u and IntMask))
iterator elements(t: TIntSet): int {.inline.} =
# while traversing it is forbidden to change the set!
for h in 0..high(t.data):
var r = t.data[h]
while r != nil:
var i = 0
while i <= high(r.bits):
var w = r.bits[i] # taking a copy of r.bits[i] here is correct, because
# modifying operations are not allowed during traversation
var j = 0
while w != 0: # test all remaining bits for zero
if (w and 1) != 0: # the bit is set!
yield (r.key shl TrunkShift) or (i shl IntShift +% j)
inc(j)
w = w shr 1
inc(i)
r = r.next
# ------------- chunk management ----------------------------------------------
proc pageIndex(c: PChunk): int {.inline.} =
@@ -508,9 +539,21 @@ proc isAllocatedPtr(a: TAllocator, p: pointer): bool =
var c = cast[PBigChunk](c)
result = p == addr(c.data) and cast[ptr TFreeCell](p).zeroField >% 1
proc deallocOsPages(a: var TAllocator) =
# we free every 'ordinarily' allocated page by iterating over the page
# bits:
for p in elements(a.chunkStarts):
var page = cast[PChunk](p shl pageShift)
var size = if page.size < PageSize: PageSize else: page.size
osDeallocPages(page, size)
# And then we free the pages that are in use for the page bits:
llDeallocAll(a)
var
allocator {.rtlThreadVar.}: TAllocator
proc deallocOsPages = deallocOsPages(allocator)
# ---------------------- interface to programs -------------------------------
when not defined(useNimRtl):

View File

@@ -212,8 +212,10 @@ proc quitOrDebug() {.inline.} =
endbStep() # call the debugger
proc raiseException(e: ref E_Base, ename: CString) {.compilerRtl.} =
GC_disable() # a bad thing is an error in the GC while raising an exception
e.name = ename
if raiseHook != nil:
if not raiseHook(e): return
GC_disable() # a bad thing is an error in the GC while raising an exception
ThreadGlobals()
if ||excHandler != nil:
pushCurrentException(e)

View File

@@ -107,9 +107,6 @@ proc writeln[Ty](f: TFile, x: openArray[Ty]) =
for i in items(x): write(f, i)
write(f, "\n")
proc rawEcho(x: string) {.inline, compilerproc.} = write(stdout, x)
proc rawEchoNL() {.inline, compilerproc.} = write(stdout, "\n")
# interface to the C procs:
proc fopen(filename, mode: CString): pointer {.importc: "fopen", noDecl.}

View File

@@ -102,24 +102,15 @@ when defined(Windows):
stdcall, dynlib: "kernel32", importc: "TerminateThread".}
type
TThreadVarSlot {.compilerproc.} = distinct int32
TThreadVarSlot = distinct int32
proc TlsAlloc(): TThreadVarSlot {.
proc ThreadVarAlloc(): TThreadVarSlot {.
importc: "TlsAlloc", stdcall, dynlib: "kernel32".}
proc TlsSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {.
proc ThreadVarSetValue(dwTlsIndex: TThreadVarSlot, lpTlsValue: pointer) {.
importc: "TlsSetValue", stdcall, dynlib: "kernel32".}
proc TlsGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
proc ThreadVarGetValue(dwTlsIndex: TThreadVarSlot): pointer {.
importc: "TlsGetValue", stdcall, dynlib: "kernel32".}
proc ThreadVarAlloc(): TThreadVarSlot {.compilerproc, inline.} =
result = TlsAlloc()
proc ThreadVarSetValue(s: TThreadVarSlot, value: pointer) {.
compilerproc, inline.} =
TlsSetValue(s, value)
proc ThreadVarGetValue(s: TThreadVarSlot): pointer {.
compilerproc, inline.} =
result = TlsGetValue(s)
else:
{.passL: "-pthread".}
{.passC: "-pthread".}
@@ -225,7 +216,7 @@ type
TGcThread {.pure.} = object
sys: TSysThread
next, prev: PGcThread
stackBottom, stackTop: pointer
stackBottom, stackTop, threadLocalStorage: pointer
stackSize: int
g: TGlobals
locksLen: int
@@ -242,6 +233,9 @@ var globalsSlot = ThreadVarAlloc()
proc ThisThread(): PGcThread {.compilerRtl, inl.} =
result = cast[PGcThread](ThreadVarGetValue(globalsSlot))
proc GetThreadLocalVars(): pointer {.compilerRtl, inl.} =
result = cast[PGcThread](ThreadVarGetValue(globalsSlot)).threadLocalStorage
# create for the main thread. Note: do not insert this data into the list
# of all threads; it's not to be stopped etc.
when not defined(useNimRtl):
@@ -295,11 +289,14 @@ type
TThread* {.pure, final.}[TParam] = object of TGcThread ## Nimrod thread.
fn: proc (p: TParam)
data: TParam
when not defined(boehmgc) and not hasSharedHeap:
proc deallocOsPages()
template ThreadProcWrapperBody(closure: expr) =
ThreadVarSetValue(globalsSlot, closure)
var t = cast[ptr TThread[TParam]](closure)
when not hasSharedHeap:
when not defined(boehmgc) and not hasSharedHeap:
# init the GC for this thread:
setStackBottom(addr(t))
initGC()
@@ -309,6 +306,7 @@ template ThreadProcWrapperBody(closure: expr) =
t.fn(t.data)
finally:
unregisterThread(t)
when defined(deallocOsPages): deallocOsPages()
{.push stack_trace:off.}
when defined(windows):

View File

@@ -1,19 +1,9 @@
* improve ``echo`` code generation for multi-threading
* codegen for threadvars
* two issues for thread local heaps:
- must prevent to construct a data structure that contains memory
from different heaps: n.next = otherHeapPtr
- must prevent that GC cleans up memory that other threads can still read...
this can be prevented if the shared heap is simply uncollected (at least
for now)
* add --deadlock_prevention:on|off switch? timeout for locks?
* make GC fully thread-safe; needs:
- thread must store its stack boundaries
- GC must traverse these stacks: Even better each thread traverses its
stack! No need to stop if you can help the GC ;-)
- isOnStack() needs to take them into account (SLOW?)
- GC must stop the world
* implicit ref/ptr->var conversion; the compiler may store an object
implicitly on the heap for write barrier efficiency! (Especially

View File

@@ -83,6 +83,8 @@ Additions
- Added a wrapper for ``0mq`` via the ``zmq`` module.
- The compiler now supports array, sequence and string slicing.
- Added ``system.newStringOfCap``.
- Added ``system.raiseHook``.
- ``system.echo`` is guaranteed to be thread-safe.
2010-10-20 Version 0.8.10 released