From 3aac4b1a3e7c7afe7ee94ec0c2aead0895920331 Mon Sep 17 00:00:00 2001 From: flysand7 Date: Sat, 27 Jul 2024 22:15:27 +1100 Subject: [PATCH] [sync]: Document all procedures --- core/sync/atomic.odin | 472 +++++++++++++++++++++++++++++++++++--- core/sync/doc.odin | 21 ++ core/sync/extended.odin | 469 ++++++++++++++++++++++++++++++------- core/sync/primitives.odin | 470 ++++++++++++++++++++++++++++++++----- 4 files changed, 1262 insertions(+), 170 deletions(-) create mode 100644 core/sync/doc.odin diff --git a/core/sync/atomic.odin b/core/sync/atomic.odin index 65d063f15..7e514a6b4 100644 --- a/core/sync/atomic.odin +++ b/core/sync/atomic.odin @@ -2,44 +2,452 @@ package sync import "base:intrinsics" +/* +This procedure may lower CPU consumption or yield to a hyperthreaded twin +processor. It's exact function is architecture specific, but the intent is to +say that you're not doing much on a CPU. +*/ cpu_relax :: intrinsics.cpu_relax /* -Atomic_Memory_Order :: enum { - Relaxed = 0, // Unordered - Consume = 1, // Monotonic - Acquire = 2, - Release = 3, - Acq_Rel = 4, - Seq_Cst = 5, -} +Describes memory ordering for an atomic operation. + +Modern CPU's contain multiple cores and caches specific to those cores. When a +core performs a write to memory, the value is written to cache first. The issue +is that a core doesn't typically see what's inside the caches of other cores. +In order to make operations consistent CPU's implement mechanisms that +synchronize memory operations across cores by asking other cores or by +pushing data about writes to other cores. + +Due to how these algorithms are implemented, the stores and loads performed by +one core may seem to happen in a different order to another core. It also may +happen that a core reorders stores and loads (independent of how compiler put +them into the machine code). This can cause issues when trying to synchronize +multiple memory locations between two cores. Which is why CPU's allow for +stronger memory ordering guarantees if certain instructions or instruction +variants are used. + +In Odin there are 5 different memory ordering guaranties that can be provided +to an atomic operation: + +- `Relaxed`: The memory access (load or store) is unordered with respect to + other memory accesses. This can be used to implement an atomic counter. + Multiple threads access a single variable, but it doesn't matter when + exactly it gets incremented, because it will become eventually consistent. +- `Consume`: No loads or stores dependent on a memory location can be + reordered before a load with consume memory order. If other threads released + the same memory, it becomes visible. +- `Acquire`: No loads or stores on a memory location can be reordered before a + load of that memory location with acquire memory ordering. If other threads + release the same memory, it becomes visible. +- `Release`: No loads or stores on a memory location can be reordered after a + store of that memory location with release memory ordering. All threads that + acquire the same memory location will see all writes done by the current + thread. +- `Acq_Rel`: Acquire-release memory ordering: combines acquire and release + memory orderings in the same operation. +- `Seq_Cst`: Sequential consistency. The strongest memory ordering. A load will + always be an acquire operation, a store will always be a release operation, + and in addition to that all threads observe the same order of writes. + +Non-explicit atomics will always be sequentially consistent. + + Atomic_Memory_Order :: enum { + Relaxed = 0, // Unordered + Consume = 1, // Monotonic + Acquire = 2, + Release = 3, + Acq_Rel = 4, + Seq_Cst = 5, + } + +**Note(i386, x64)**: x86 has a very strong memory model by default. It +guarantees that all writes are ordered, stores and loads aren't reordered. In +a sense, all operations are at least acquire and release operations. If `lock` +prefix is used, all operations are sequentially consistent. If you use explicit +atomics, make sure you have the correct atomic memory order, because bugs likely +will not show up in x86, but may show up on e.g. arm. More on x86 memory +ordering can be found +[[here; https://www.cs.cmu.edu/~410-f10/doc/Intel_Reordering_318147.pdf]] */ Atomic_Memory_Order :: intrinsics.Atomic_Memory_Order +/* +Establish memory ordering. -atomic_thread_fence :: intrinsics.atomic_thread_fence -atomic_signal_fence :: intrinsics.atomic_signal_fence -atomic_store :: intrinsics.atomic_store -atomic_store_explicit :: intrinsics.atomic_store_explicit -atomic_load :: intrinsics.atomic_load -atomic_load_explicit :: intrinsics.atomic_load_explicit -atomic_add :: intrinsics.atomic_add -atomic_add_explicit :: intrinsics.atomic_add_explicit -atomic_sub :: intrinsics.atomic_sub -atomic_sub_explicit :: intrinsics.atomic_sub_explicit -atomic_and :: intrinsics.atomic_and -atomic_and_explicit :: intrinsics.atomic_and_explicit -atomic_nand :: intrinsics.atomic_nand -atomic_nand_explicit :: intrinsics.atomic_nand_explicit -atomic_or :: intrinsics.atomic_or -atomic_or_explicit :: intrinsics.atomic_or_explicit -atomic_xor :: intrinsics.atomic_xor -atomic_xor_explicit :: intrinsics.atomic_xor_explicit -atomic_exchange :: intrinsics.atomic_exchange -atomic_exchange_explicit :: intrinsics.atomic_exchange_explicit +This procedure establishes memory ordering, without an associated atomic +operation. +*/ +atomic_thread_fence :: intrinsics.atomic_thread_fence -// Returns value and optional ok boolean -atomic_compare_exchange_strong :: intrinsics.atomic_compare_exchange_strong +/* +Establish memory ordering between a current thread and a signal handler. + +This procedure establishes memory ordering between a thread and a signal +handler, that run on the same thread, without an associated atomic operation. +This procedure is equivalent to `atomic_thread_fence`, except it doesn't +issue any CPU instructions for memory ordering. +*/ +atomic_signal_fence :: intrinsics.atomic_signal_fence + +/* +Atomically store a value into memory. + +This procedure stores a value to a memory location in such a way that no other +thread is able to see partial reads. This operation is sequentially-consistent. +*/ +atomic_store :: intrinsics.atomic_store + +/* +Atomically store a value into memory with explicit memory ordering. + +This procedure stores a value to a memory location in such a way that no other +thread is able to see partial reads. The memory ordering of this operation is +as specified by the `order` parameter. +*/ +atomic_store_explicit :: intrinsics.atomic_store_explicit + +/* +Atomically load a value from memory. + +This procedure loads a value from a memory location in such a way that the +received value is not a partial read. The memory ordering of this operation is +sequentially-consistent. +*/ +atomic_load :: intrinsics.atomic_load + +/* +Atomically load a value from memory with explicit memory ordering. + +This procedure loads a value from a memory location in such a way that the +received value is not a partial read. The memory ordering of this operation +is as specified by the `order` parameter. +*/ +atomic_load_explicit :: intrinsics.atomic_load_explicit + +/* +Atomically add a value to the value stored in memory. + +This procedure loads a value from memory, adds the specified value to it, and +stores it back as an atomic operation. This operation is an atomic equivalent +of the following: + + dst^ += val + +The memory ordering of this operation is sequentially-consistent. +*/ +atomic_add :: intrinsics.atomic_add + +/* +Atomically add a value to the value stored in memory. + +This procedure loads a value from memory, adds the specified value to it, and +stores it back as an atomic operation. This operation is an atomic equivalent +of the following: + + dst^ += val + +The memory ordering of this operation is as specified by the `order` parameter. +*/ +atomic_add_explicit :: intrinsics.atomic_add_explicit + +/* +Atomically subtract a value from the value stored in memory. + +This procedure loads a value from memory, subtracts the specified value from it, +and stores the result back as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ -= val + +The memory ordering of this operation is sequentially-consistent. +*/ +atomic_sub :: intrinsics.atomic_sub + +/* +Atomically subtract a value from the value stored in memory. + +This procedure loads a value from memory, subtracts the specified value from it, +and stores the result back as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ -= val + +The memory ordering of this operation is as specified by the `order` parameter. +*/ +atomic_sub_explicit :: intrinsics.atomic_sub_explicit + +/* +Atomically replace the memory location with the result of AND operation with +the specified value. + +This procedure loads a value from memory, calculates the result of AND operation +between the loaded value and the specified value, and stores it back into the +same memory location as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ &= val + +The memory ordering of this operation is sequentially-consistent. +*/ +atomic_and :: intrinsics.atomic_and + +/* +Atomically replace the memory location with the result of AND operation with +the specified value. + +This procedure loads a value from memory, calculates the result of AND operation +between the loaded value and the specified value, and stores it back into the +same memory location as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ &= val + +The memory ordering of this operation is as specified by the `order` parameter. +*/ +atomic_and_explicit :: intrinsics.atomic_and_explicit + +/* +Atomically replace the memory location with the result of NAND operation with +the specified value. + +This procedure loads a value from memory, calculates the result of NAND operation +between the loaded value and the specified value, and stores it back into the +same memory location as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ = ~(dst^ & val) + +The memory ordering of this operation is sequentially-consistent. +*/ +atomic_nand :: intrinsics.atomic_nand + +/* +Atomically replace the memory location with the result of NAND operation with +the specified value. + +This procedure loads a value from memory, calculates the result of NAND operation +between the loaded value and the specified value, and stores it back into the +same memory location as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ = ~(dst^ & val) + +The memory ordering of this operation is as specified by the `order` parameter. +*/ +atomic_nand_explicit :: intrinsics.atomic_nand_explicit + +/* +Atomically replace the memory location with the result of OR operation with +the specified value. + +This procedure loads a value from memory, calculates the result of OR operation +between the loaded value and the specified value, and stores it back into the +same memory location as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ |= val + +The memory ordering of this operation is sequentially-consistent. +*/ +atomic_or :: intrinsics.atomic_or + +/* +Atomically replace the memory location with the result of OR operation with +the specified value. + +This procedure loads a value from memory, calculates the result of OR operation +between the loaded value and the specified value, and stores it back into the +same memory location as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ |= val + +The memory ordering of this operation is as specified by the `order` parameter. +*/ +atomic_or_explicit :: intrinsics.atomic_or_explicit + +/* +Atomically replace the memory location with the result of XOR operation with +the specified value. + +This procedure loads a value from memory, calculates the result of XOR operation +between the loaded value and the specified value, and stores it back into the +same memory location as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ ~= val + +The memory ordering of this operation is sequentially-consistent. +*/ +atomic_xor :: intrinsics.atomic_xor + +/* +Atomically replace the memory location with the result of XOR operation with +the specified value. + +This procedure loads a value from memory, calculates the result of XOR operation +between the loaded value and the specified value, and stores it back into the +same memory location as an atomic operation. This operation is an atomic +equivalent of the following: + + dst^ ~= val + +The memory ordering of this operation is as specified by the `order` parameter. +*/ +atomic_xor_explicit :: intrinsics.atomic_xor_explicit + +/* +Atomically exchange the value in a memory location, with the specified value. + +This procedure loads a value from the specified memory location, and stores the +specified value into that memory location. Then the loaded value is returned, +all done in a single atomic operation. This operation is an atomic equivalent +of the following: + + tmp := dst^ + dst^ = val + return tmp + +The memory ordering of this operation is sequentially-consistent. +*/ +atomic_exchange :: intrinsics.atomic_exchange + +/* +Atomically exchange the value in a memory location, with the specified value. + +This procedure loads a value from the specified memory location, and stores the +specified value into that memory location. Then the loaded value is returned, +all done in a single atomic operation. This operation is an atomic equivalent +of the following: + + tmp := dst^ + dst^ = val + return tmp + +The memory ordering of this operation is as specified by the `order` parameter. +*/ +atomic_exchange_explicit :: intrinsics.atomic_exchange_explicit + +/* +Atomically compare and exchange the value with a memory location. + +This procedure checks if the value pointed to by the `dst` parameter is equal +to `old`, and if they are, it stores the value `new` into the memory location, +all done in a single atomic operation. This procedure returns the old value +stored in a memory location and a boolean value signifying whether `old` was +equal to `new`. + +This procedure is an atomic equivalent of the following operation: + + old_dst := dst^ + if old_dst == old { + dst^ = new + return old_dst, true + } else { + return old_dst, false + } + +The strong version of compare exchange always returns true, when the returned +old value stored in location pointed to by `dst` and the `old` parameter are +equal. + +Atomic compare exchange has two memory orderings: One is for the +read-modify-write operation, if the comparison succeeds, and the other is for +the load operation, if the comparison fails. The memory ordering for both of +of these operations is sequentially-consistent. +*/ +atomic_compare_exchange_strong :: intrinsics.atomic_compare_exchange_strong + +/* +Atomically compare and exchange the value with a memory location. + +This procedure checks if the value pointed to by the `dst` parameter is equal +to `old`, and if they are, it stores the value `new` into the memory location, +all done in a single atomic operation. This procedure returns the old value +stored in a memory location and a boolean value signifying whether `old` was +equal to `new`. + +This procedure is an atomic equivalent of the following operation: + + old_dst := dst^ + if old_dst == old { + dst^ = new + return old_dst, true + } else { + return old_dst, false + } + +The strong version of compare exchange always returns true, when the returned +old value stored in location pointed to by `dst` and the `old` parameter are +equal. + +Atomic compare exchange has two memory orderings: One is for the +read-modify-write operation, if the comparison succeeds, and the other is for +the load operation, if the comparison fails. The memory ordering for these +operations is as specified by `success` and `failure` parameters respectively. +*/ atomic_compare_exchange_strong_explicit :: intrinsics.atomic_compare_exchange_strong_explicit -atomic_compare_exchange_weak :: intrinsics.atomic_compare_exchange_weak -atomic_compare_exchange_weak_explicit :: intrinsics.atomic_compare_exchange_weak_explicit \ No newline at end of file + +/* +Atomically compare and exchange the value with a memory location. + +This procedure checks if the value pointed to by the `dst` parameter is equal +to `old`, and if they are, it stores the value `new` into the memory location, +all done in a single atomic operation. This procedure returns the old value +stored in a memory location and a boolean value signifying whether `old` was +equal to `new`. + +This procedure is an atomic equivalent of the following operation: + + old_dst := dst^ + if old_dst == old { + // may return false here + dst^ = new + return old_dst, true + } else { + return old_dst, false + } + +The weak version of compare exchange may return false, even if `dst^ == old`. +On some platforms running weak compare exchange in a loop is faster than a +strong version. + +Atomic compare exchange has two memory orderings: One is for the +read-modify-write operation, if the comparison succeeds, and the other is for +the load operation, if the comparison fails. The memory ordering for both +of these operations is sequentially-consistent. +*/ +atomic_compare_exchange_weak :: intrinsics.atomic_compare_exchange_weak + +/* +Atomically compare and exchange the value with a memory location. + +This procedure checks if the value pointed to by the `dst` parameter is equal +to `old`, and if they are, it stores the value `new` into the memory location, +all done in a single atomic operation. This procedure returns the old value +stored in a memory location and a boolean value signifying whether `old` was +equal to `new`. + +This procedure is an atomic equivalent of the following operation: + + old_dst := dst^ + if old_dst == old { + // may return false here + dst^ = new + return old_dst, true + } else { + return old_dst, false + } + +The weak version of compare exchange may return false, even if `dst^ == old`. +On some platforms running weak compare exchange in a loop is faster than a +strong version. + +Atomic compare exchange has two memory orderings: One is for the +read-modify-write operation, if the comparison succeeds, and the other is for +the load operation, if the comparison fails. The memory ordering for these +operations is as specified by the `success` and `failure` parameters +respectively. +*/ +atomic_compare_exchange_weak_explicit :: intrinsics.atomic_compare_exchange_weak_explicit \ No newline at end of file diff --git a/core/sync/doc.odin b/core/sync/doc.odin new file mode 100644 index 000000000..9876c46fb --- /dev/null +++ b/core/sync/doc.odin @@ -0,0 +1,21 @@ +/* +Synchronization primitives + +This package implements various synchronization primitives that can be used to +synchronize threads' access to shared memory. + +To limit or control the threads' access to shared memory typically the +following approaches are used: + +* Locks +* Lock-free + +When using locks, sections of the code that access shared memory (also known as +**critical sections**) are guarded by locks, allowing limited access to threads +and blocking the execution of any other threads. + +In lock-free programming the data itself is organized in such a way that threads +don't intervene much. It can be done via segmenting the data between threads, +and/or by using atomic operations. +*/ +package sync \ No newline at end of file diff --git a/core/sync/extended.odin b/core/sync/extended.odin index 781ed816e..b446fefa0 100644 --- a/core/sync/extended.odin +++ b/core/sync/extended.odin @@ -4,15 +4,41 @@ import "core:time" import vg "core:sys/valgrind" _ :: vg -// A Wait_Group waits for a collection of threads to finish -// -// A Wait_Group must not be copied after first use +/* +Wait group. + +Wait group is a synchronization primitive used by the waiting thread to wait, +until a all working threads finish work. + +The waiting thread first sets the number of working threads it will expect to +wait for using `wait_group_add` call, and start waiting using `wait_group_wait` +call. When worker threads complete their work, each of them will call +`wait_group_done`, and after all working threads have called this procedure, +the waiting thread will resume execution. + +For the purpose of keeping track whether all working threads have finished their +work, the wait group keeps an internal atomic counter. Initially, the waiting +thread might set it to a certain non-zero amount. When each working thread +completes the work, the internal counter is atomically decremented until it +reaches zero. When it reaches zero, the waiting thread is unblocked. The counter +is not allowed to become negative. + +**Note**: Just like any synchronization primitives, a wait group cannot be +copied after first use. See documentation for `Mutex` or `Cond`. +*/ Wait_Group :: struct #no_copy { counter: int, mutex: Mutex, cond: Cond, } +/* +Increment an internal counter of a wait group. + +This procedure atomicaly increments a number to the specified wait group's +internal counter by a specified amount. This operation can be done on any +thread. +*/ wait_group_add :: proc "contextless" (wg: ^Wait_Group, delta: int) { if delta == 0 { return @@ -32,10 +58,23 @@ wait_group_add :: proc "contextless" (wg: ^Wait_Group, delta: int) { } } +/* +Signal work done by a thread in a wait group. + +This procedure decrements the internal counter of the specified wait group and +wakes up the waiting thread. Once the internal counter reaches zero, the waiting +thread resumes execution. +*/ wait_group_done :: proc "contextless" (wg: ^Wait_Group) { wait_group_add(wg, -1) } +/* +Wait for all worker threads in the wait group. + +This procedure blocks the execution of the current thread, until the specified +wait group's internal counter reaches zero. +*/ wait_group_wait :: proc "contextless" (wg: ^Wait_Group) { guard(&wg.mutex) @@ -47,6 +86,14 @@ wait_group_wait :: proc "contextless" (wg: ^Wait_Group) { } } +/* +Wait for all worker threads in the wait group, or until timeout is reached. + +This procedure blocks the execution of the current thread, until the specified +wait group's internal counter reaches zero, or until the timeout is reached. + +This procedure returns `false`, if the timeout was reached, `true` otherwise. +*/ wait_group_wait_with_timeout :: proc "contextless" (wg: ^Wait_Group, duration: time.Duration) -> bool { if duration <= 0 { return false @@ -64,41 +111,43 @@ wait_group_wait_with_timeout :: proc "contextless" (wg: ^Wait_Group, duration: t return true } - - /* -A barrier enabling multiple threads to synchronize the beginning of some computation +Barrier. -Example: - package example +A barrier is a synchronization primitive enabling multiple threads to +synchronize the beginning of some computation. - import "core:fmt" - import "core:sync" - import "core:thread" +When `barrier_wait` procedure is called by any thread, that thread will block +the execution, until all threads associated with the barrier reach the same +point of execution and also call `barrier_wait`. - barrier := &sync.Barrier{} +when barrier is initialized, a `thread_count` parameter is passed, signifying +the amount of participant threads of the barrier. The barrier also keeps track +of an internal atomic counter. When a thread calls `barrier_wait`, the internal +counter is incremented. When the internal counter reaches `thread_count`, it is +reset and all threads waiting on the barrier are unblocked. - main :: proc "contextless" () { - fmt.println("Start") +This type of synchronization primitive can be used to synchronize "staged" +workloads, where the workload is split into stages, and until all threads have +completed the previous threads, no thread is allowed to start work on the next +stage. In this case, after each stage, a `barrier_wait` shall be inserted in the +thread procedure. - THREAD_COUNT :: 4 - threads: [THREAD_COUNT]^thread.Thread +**Example**: - sync.barrier_init(barrier, THREAD_COUNT) - - for _, i in threads { - threads[i] = thread.create_and_start(proc(t: ^thread.Thread) { - // Same messages will be printed together but without any interleaving - fmt.println("Getting ready!") - sync.barrier_wait(barrier) - fmt.println("Off their marks they go!") - }) - } - - for t in threads { - thread.destroy(t) // join and free thread - } - fmt.println("Finished") + THREAD_COUNT :: 4 + threads: [THREAD_COUNT]^thread.Thread + sync.barrier_init(barrier, THREAD_COUNT) + for _, i in threads { + threads[i] = thread.create_and_start(proc(t: ^thread.Thread) { + // Same messages will be printed together but without any interleaving + fmt.println("Getting ready!") + sync.barrier_wait(barrier) + fmt.println("Off their marks they go!") + }) + } + for t in threads { + thread.destroy(t) } */ Barrier :: struct #no_copy { @@ -109,6 +158,13 @@ Barrier :: struct #no_copy { thread_count: int, } +/* +Initialize a barrier. + + +This procedure initializes the barrier for the specified amount of participant +threads. +*/ barrier_init :: proc "contextless" (b: ^Barrier, thread_count: int) { when ODIN_VALGRIND_SUPPORT { vg.helgrind_barrier_resize_pre(b, uint(thread_count)) @@ -118,8 +174,13 @@ barrier_init :: proc "contextless" (b: ^Barrier, thread_count: int) { b.thread_count = thread_count } -// Block the current thread until all threads have rendezvoused -// Barrier can be reused after all threads rendezvoused once, and can be used continuously +/* +Block the current thread until all threads have rendezvoused. + +This procedure blocks the execution of the current thread, until all threads +have reached the same point in the execution of the thread proc. Multiple calls +to `barrier_wait` are allowed within the thread procedure. +*/ barrier_wait :: proc "contextless" (b: ^Barrier) -> (is_leader: bool) { when ODIN_VALGRIND_SUPPORT { vg.helgrind_barrier_wait_pre(b) @@ -140,15 +201,31 @@ barrier_wait :: proc "contextless" (b: ^Barrier) -> (is_leader: bool) { return true } +/* +Auto-reset event. +Represents a thread synchronization primitive that, when signalled, releases one +single waiting thread and then resets automatically to a state where it can be +signalled again. + +When a thread calls `auto_reset_event_wait`, it's execution will be blocked, +until the event is signalled by another thread. The call to +`auto_reset_event_signal` wakes up exactly one thread waiting for the event. +*/ Auto_Reset_Event :: struct #no_copy { // status == 0: Event is reset and no threads are waiting - // status == 1: Event is signaled + // status == 1: Event is signalled // status == -N: Event is reset and N threads are waiting status: i32, sema: Sema, } +/* +Signal an auto-reset event. + +This procedure signals an auto-reset event, waking up exactly one waiting +thread. +*/ auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) { old_status := atomic_load_explicit(&e.status, .Relaxed) for { @@ -163,6 +240,12 @@ auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) { } } +/* +Wait on an auto-reset event. + +This procedure blocks the execution of the current thread, until the event is +signalled by another thread. +*/ auto_reset_event_wait :: proc "contextless" (e: ^Auto_Reset_Event) { old_status := atomic_sub_explicit(&e.status, 1, .Acquire) if old_status < 1 { @@ -170,13 +253,35 @@ auto_reset_event_wait :: proc "contextless" (e: ^Auto_Reset_Event) { } } +/* +Ticket lock. +A ticket lock is a mutual exclusion lock that uses "tickets" to control which +thread is allowed into a critical section. +This synchronization primitive works just like spinlock, except that it implements +a "fairness" guarantee, making sure that each thread gets a roughly equal amount +of entries into the critical section. + +This type of synchronization primitive is applicable for short critical sections +in low-contention systems, as it uses a spinlock under the hood. +*/ Ticket_Mutex :: struct #no_copy { ticket: uint, serving: uint, } +/* +Acquire a lock on a ticket mutex. + +This procedure acquires a lock on a ticket mutex. If the ticket mutex is held +by another thread, this procedure also blocks the execution until the lock +can be acquired. + +Once the lock is acquired, any thread calling `ticket_mutex_lock` will be +blocked from entering any critical sections associated with the same ticket +mutex, until the lock is released. +*/ ticket_mutex_lock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) { ticket := atomic_add_explicit(&m.ticket, 1, .Relaxed) for ticket != atomic_load_explicit(&m.serving, .Acquire) { @@ -184,44 +289,147 @@ ticket_mutex_lock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) { } } +/* +Release a lock on a ticket mutex. + +This procedure releases the lock on a ticket mutex. If any of the threads are +waiting to acquire the lock, exactly one of those threads is unblocked and +allowed into the critical section. +*/ ticket_mutex_unlock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) { atomic_add_explicit(&m.serving, 1, .Relaxed) } + +/* +Guard the current scope with a lock on a ticket mutex. + +This procedure acquires a lock on a ticket mutex. The lock is automatically +released at the end of callee's scope. If the mutex was already locked, this +procedure also blocks until the lock can be acquired. + +When a lock has been acquired, all threads attempting to acquire a lock will be +blocked from entering any critical sections associated with the ticket mutex, +until the lock is released. + +This procedure always returns `true`. This makes it easy to define a critical +section by putting the function inside the `if` statement. + +**Example**: + + if ticket_mutex_guard(&m) { + ... + } +*/ @(deferred_in=ticket_mutex_unlock) ticket_mutex_guard :: proc "contextless" (m: ^Ticket_Mutex) -> bool { ticket_mutex_lock(m) return true } +/* +Benaphore. +A benaphore is a combination of an atomic variable and a semaphore that can +improve locking efficiency in a no-contention system. Acquiring a benaphore +lock doesn't call into an internal semaphore, if no other thread in a middle of +a critical section. + +Once a lock on a benaphore is acquired by a thread, no other thread is allowed +into any critical sections, associted with the same benaphore, until the lock +is released. +*/ Benaphore :: struct #no_copy { counter: i32, sema: Sema, } +/* +Acquire a lock on a benaphore. + +This procedure acquires a lock on the specified benaphore. If the lock on a +benaphore is already held, this procedure also blocks the execution of the +current thread, until the lock could be acquired. + +Once a lock is acquired, all threads attempting to take a lock will be blocked +from entering any critical sections associated with the same benaphore, until +until the lock is released. +*/ benaphore_lock :: proc "contextless" (b: ^Benaphore) { if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 { sema_wait(&b.sema) } } +/* +Try to acquire a lock on a benaphore. + +This procedure tries to acquire a lock on the specified benaphore. If it was +already locked, then the returned value is `false`, otherwise the lock is +acquired and the procedure returns `true`. + +If the lock is acquired, all threads that attempt to acquire a lock will be +blocked from entering any critical sections associated with the same benaphore, +until the lock is released. +*/ benaphore_try_lock :: proc "contextless" (b: ^Benaphore) -> bool { v, _ := atomic_compare_exchange_strong_explicit(&b.counter, 0, 1, .Acquire, .Acquire) return v == 0 } +/* +Release a lock on a benaphore. + +This procedure releases a lock on the specified benaphore. If any of the threads +are waiting on the lock, exactly one thread is allowed into a critical section +associated with the same banaphore. +*/ benaphore_unlock :: proc "contextless" (b: ^Benaphore) { if atomic_sub_explicit(&b.counter, 1, .Release) > 0 { sema_post(&b.sema) } } +/* +Guard the current scope with a lock on a benaphore. + +This procedure acquires a lock on a benaphore. The lock is automatically +released at the end of callee's scope. If the benaphore was already locked, this +procedure also blocks until the lock can be acquired. + +When a lock has been acquired, all threads attempting to acquire a lock will be +blocked from entering any critical sections associated with the same benaphore, +until the lock is released. + +This procedure always returns `true`. This makes it easy to define a critical +section by putting the function inside the `if` statement. + +**Example**: + + if benaphore_guard(&m) { + ... + } +*/ @(deferred_in=benaphore_unlock) benaphore_guard :: proc "contextless" (m: ^Benaphore) -> bool { benaphore_lock(m) return true } +/* +Recursive benaphore. + +Recurisve benaphore is just like a plain benaphore, except it allows reentrancy +into the critical section. + +When a lock is acquired on a benaphore, all other threads attempting to +acquire a lock on the same benaphore will be blocked from any critical sections, +associated with the same benaphore. + +When a lock is acquired on a benaphore by a thread, that thread is allowed +to acquire another lock on the same benaphore. When a thread has acquired the +lock on a benaphore, the benaphore will stay locked until the thread releases +the lock as many times as it has been locked by the thread. +*/ Recursive_Benaphore :: struct #no_copy { counter: int, owner: int, @@ -229,6 +437,16 @@ Recursive_Benaphore :: struct #no_copy { sema: Sema, } +/* +Acquire a lock on a recursive benaphore. + +This procedure acquires a lock on a recursive benaphore. If the benaphore is +held by another thread, this function blocks until the lock can be acquired. + +Once a lock is acquired, all other threads attempting to acquire a lock will +be blocked from entering any critical sections associated with the same +recursive benaphore, until the lock is released. +*/ recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) { tid := current_thread_id() if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 { @@ -241,6 +459,17 @@ recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) { b.recursion += 1 } +/* +Try to acquire a lock on a recursive benaphore. + +This procedure attempts to acquire a lock on recursive benaphore. If the +benaphore is already held by a different thread, this procedure returns `false`. +Otherwise the lock is acquired and the procedure returns `true`. + +If the lock is acquired, all other threads attempting to acquire a lock will +be blocked from entering any critical sections assciated with the same recursive +benaphore, until the lock is released. +*/ recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) -> bool { tid := current_thread_id() if b.owner == tid { @@ -256,6 +485,13 @@ recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) -> return true } +/* +Release a lock on a recursive benaphore. + +This procedure releases a lock on the specified recursive benaphore. It also +causes the critical sections associated with the same benaphore, to become open +for other threads for entering. +*/ recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) { tid := current_thread_id() _assert(tid == b.owner, "tid != b.owner") @@ -272,24 +508,50 @@ recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) { // outside the lock } +/* +Guard the current scope with a recursive benaphore. + +This procedure acquires a lock on the specified recursive benaphores and +automatically releases it at the end of the callee's scope. If the recursive +benaphore was already held by a another thread, this procedure also blocks until +the lock can be acquired. + +When the lock is acquired all other threads attempting to take a lock will be +blocked from entering any critical sections associated with the same benaphore, +until the lock is released. + +This procedure always returns `true`, which makes it easy to define a critical +section by calling this procedure inside an `if` statement. + +**Example**: + + if recursive_benaphore_guard(&m) { + ... + } +*/ @(deferred_in=recursive_benaphore_unlock) recursive_benaphore_guard :: proc "contextless" (m: ^Recursive_Benaphore) -> bool { recursive_benaphore_lock(m) return true } +/* +Once action. - - -// Once is a data value that will perform exactly on action. -// -// A Once must not be copied after first use. +`Once` a synchronization primitive, that only allows a single entry into a +critical section from a single thread. +*/ Once :: struct #no_copy { m: Mutex, done: bool, } -// once_do calls the procedure fn if and only if once_do is being called for the first for this instance of Once. +/* +Call a function once. + +The `once_do` procedure group calls a specified function, if it wasn't already +called from the perspective of a specific `Once` struct. +*/ once_do :: proc{ once_do_without_data, once_do_without_data_contextless, @@ -297,7 +559,9 @@ once_do :: proc{ once_do_with_data_contextless, } -// once_do_without_data calls the procedure fn if and only if once_do_without_data is being called for the first for this instance of Once. +/* +Call a function with no data once. +*/ once_do_without_data :: proc(o: ^Once, fn: proc()) { @(cold) do_slow :: proc(o: ^Once, fn: proc()) { @@ -313,7 +577,9 @@ once_do_without_data :: proc(o: ^Once, fn: proc()) { } } -// once_do_without_data calls the procedure fn if and only if once_do_without_data is being called for the first for this instance of Once. +/* +Call a contextless function with no data once. +*/ once_do_without_data_contextless :: proc(o: ^Once, fn: proc "contextless" ()) { @(cold) do_slow :: proc(o: ^Once, fn: proc "contextless" ()) { @@ -329,7 +595,9 @@ once_do_without_data_contextless :: proc(o: ^Once, fn: proc "contextless" ()) { } } -// once_do_with_data calls the procedure fn if and only if once_do_with_data is being called for the first for this instance of Once. +/* +Call a function with data once. +*/ once_do_with_data :: proc(o: ^Once, fn: proc(data: rawptr), data: rawptr) { @(cold) do_slow :: proc(o: ^Once, fn: proc(data: rawptr), data: rawptr) { @@ -345,7 +613,9 @@ once_do_with_data :: proc(o: ^Once, fn: proc(data: rawptr), data: rawptr) { } } -// once_do_with_data_contextless calls the procedure fn if and only if once_do_with_data_contextless is being called for the first for this instance of Once. +/* +Call a contextless function with data once. +*/ once_do_with_data_contextless :: proc "contextless" (o: ^Once, fn: proc "contextless" (data: rawptr), data: rawptr) { @(cold) do_slow :: proc "contextless" (o: ^Once, fn: proc "contextless" (data: rawptr), data: rawptr) { @@ -361,83 +631,112 @@ once_do_with_data_contextless :: proc "contextless" (o: ^Once, fn: proc "context } } +/* +A Parker is an associated token which is initially not present: - - - -// A Parker is an associated token which is initially not present: -// * The `park` procedure blocks the current thread unless or until the token -// is available, at which point the token is consumed. -// * The `park_with_timeout` procedures works the same as `park` but only -// blocks for the specified duration. -// * The `unpark` procedure automatically makes the token available if it -// was not already. +* The `park` procedure blocks the current thread unless or until the token + is available, at which point the token is consumed. +* The `park_with_timeout` procedures works the same as `park` but only + blocks for the specified duration. +* The `unpark` procedure automatically makes the token available if it + was not already. +*/ Parker :: struct #no_copy { state: Futex, } -// Blocks the current thread until the token is made available. -// -// Assumes this is only called by the thread that owns the Parker. +@(private="file") PARKER_EMPTY :: 0 +@(private="file") PARKER_NOTIFIED :: 1 +@(private="file") PARKER_PARKED :: max(u32) + +/* +Blocks until the token is available. + +This procedure blocks the execution of the current thread, until a token is +made available. + +**Note**: This procedure assumes this is only called by the thread that owns +the Parker. +*/ park :: proc "contextless" (p: ^Parker) { - EMPTY :: 0 - NOTIFIED :: 1 - PARKED :: max(u32) - if atomic_sub_explicit(&p.state, 1, .Acquire) == NOTIFIED { + if atomic_sub_explicit(&p.state, 1, .Acquire) == PARKER_NOTIFIED { return } for { - futex_wait(&p.state, PARKED) - if _, ok := atomic_compare_exchange_strong_explicit(&p.state, NOTIFIED, EMPTY, .Acquire, .Acquire); ok { + futex_wait(&p.state, PARKER_PARKED) + if _, ok := atomic_compare_exchange_strong_explicit(&p.state, PARKER_NOTIFIED, PARKER_EMPTY, .Acquire, .Acquire); ok { return } } } -// Blocks the current thread until the token is made available, but only -// for a limited duration. -// -// Assumes this is only called by the thread that owns the Parker +/* +Blocks until the token is available with timeout. + +This procedure blocks the execution of the current thread until a token is made +available, or until the timeout has expired, whatever happens first. + +**Note**: This procedure assumes this is only called by the thread that owns +the Parker. +*/ park_with_timeout :: proc "contextless" (p: ^Parker, duration: time.Duration) { - EMPTY :: 0 - NOTIFIED :: 1 - PARKED :: max(u32) - if atomic_sub_explicit(&p.state, 1, .Acquire) == NOTIFIED { + start_tick := time.tick_now() + remaining_duration := duration + if atomic_sub_explicit(&p.state, 1, .Acquire) == PARKER_NOTIFIED { return } - futex_wait_with_timeout(&p.state, PARKED, duration) - atomic_exchange_explicit(&p.state, EMPTY, .Acquire) + for { + if !futex_wait_with_timeout(&p.state, PARKER_PARKED, remaining_duration) { + return + } + old, ok := atomic_compare_exchange_weak_explicit((^u32)(&p.state), PARKER_PARKED, PARKER_EMPTY, .Acquire, .Relaxed) + if ok || old == PARKER_PARKED { + return + } + end_tick := time.tick_now() + remaining_duration -= time.tick_diff(start_tick, end_tick) + start_tick = end_tick + } } -// Automatically makes thee token available if it was not already. +/* +Make the token available. +*/ unpark :: proc "contextless" (p: ^Parker) { - EMPTY :: 0 - NOTIFIED :: 1 - PARKED :: max(Futex) - if atomic_exchange_explicit(&p.state, NOTIFIED, .Release) == PARKED { + if atomic_exchange_explicit((^u32)(&p.state), PARKER_NOTIFIED, .Release) == PARKER_PARKED { futex_signal(&p.state) } } +/* +One-shot event. +A one-shot event is an associated token which is initially not present: -// A One_Shot_Event is an associated token which is initially not present: -// * The `one_shot_event_wait` blocks the current thread until the event -// is made available -// * The `one_shot_event_signal` procedure automatically makes the token -// available if its was not already. +* The `one_shot_event_wait` blocks the current thread until the event + is made available +* The `one_shot_event_signal` procedure automatically makes the token + available if its was not already. +*/ One_Shot_Event :: struct #no_copy { state: Futex, } -// Blocks the current thread until the event is made available with `one_shot_event_signal`. +/* +Block until the event is made available. + +This procedure blocks the execution of the current thread, until the event is +made available. +*/ one_shot_event_wait :: proc "contextless" (e: ^One_Shot_Event) { for atomic_load_explicit(&e.state, .Acquire) == 0 { futex_wait(&e.state, 0) } } -// Releases any threads that are currently blocked by this event with `one_shot_event_wait`. +/* +Make event available. +*/ one_shot_event_signal :: proc "contextless" (e: ^One_Shot_Event) { atomic_store_explicit(&e.state, 1, .Release) futex_broadcast(&e.state) diff --git a/core/sync/primitives.odin b/core/sync/primitives.odin index 8fa3dd232..a22824481 100644 --- a/core/sync/primitives.odin +++ b/core/sync/primitives.odin @@ -3,46 +3,108 @@ package sync import "base:runtime" import "core:time" +/* +Obtain the current thread ID. +*/ current_thread_id :: proc "contextless" () -> int { return _current_thread_id() } -// A Mutex is a [[mutual exclusion lock; https://en.wikipedia.org/wiki/Mutual_exclusion]] -// It can be used to prevent more than one thread from executing the same piece of code, -// and thus prevent access to same piece of memory by multiple threads, at the same time. -// -// A Mutex's zero value represents an initial, *unlocked* state. -// -// If another thread tries to take the lock while another thread holds it, it will pause -// until the lock is released. Code or memory that is "surrounded" by a mutex lock is said -// to be "guarded by a mutex". -// -// A Mutex must not be copied after first use (e.g., after locking it the first time). -// This is because, in order to coordinate with other threads, all threads must watch -// the same memory address to know when the lock has been released. Trying to use a -// copy of the lock at a different memory address will result in broken and unsafe -// behavior. For this reason, Mutexes are marked as `#no_copy`. +/* +Mutual exclusion lock. + +A Mutex is a [[mutual exclusion lock; https://en.wikipedia.org/wiki/Mutual_exclusion]] +It can be used to prevent more than one thread from entering the critical +section, and thus prevent access to same piece of memory by multiple threads, at +the same time. + +Mutex's zero-initializzed value represents an initial, *unlocked* state. + +If another thread tries to acquire the lock, while it's already held (typically +by another thread), the thread's execution will be blocked, until the lock is +released. Code or memory that is "surrounded" by a mutex lock and unlock +operations is said to be "guarded by a mutex". + +**Note**: A Mutex must not be copied after first use (e.g., after locking it the +first time). This is because, in order to coordinate with other threads, all +threads must watch the same memory address to know when the lock has been +released. Trying to use a copy of the lock at a different memory address will +result in broken and unsafe behavior. For this reason, Mutexes are marked as +`#no_copy`. + +**Note**: If the current thread attempts to lock a mutex, while it's already +holding another lock, that will cause a trivial case of deadlock. Do not use +`Mutex` in recursive functions. In case multiple locks by the same thread are +desired, use `Recursive_Mutex`. +*/ Mutex :: struct #no_copy { impl: _Mutex, } -// mutex_lock locks m +/* +Acquire a lock on a mutex. + +This procedure acquires a lock with the specified mutex. If the mutex has been +already locked by any thread, this procedure also blocks until the lock can be +acquired. + +Once the lock is acquired, all other threads that attempt to acquire a lock will +be blocked from entering any critical sections associated with the same mutex, +until the the lock is released. + +**Note**: If the mutex is already locked by the current thread, a call to this +procedure will block indefinately. Do not use this in recursive procedures. +*/ mutex_lock :: proc "contextless" (m: ^Mutex) { _mutex_lock(m) } -// mutex_unlock unlocks m +/* +Release a lock on a mutex. + +This procedure releases the lock associated with the specified mutex. If the +mutex was not locked, this operation is a no-op. + +When the current thread, that holds a lock to the mutex calls `mutex_unlock`, +this allows one other thread waiting on the mutex to enter any critical sections +associated with the mutex. If there are no threads waiting on the mutex, the +critical sections will remain open. +*/ mutex_unlock :: proc "contextless" (m: ^Mutex) { _mutex_unlock(m) } -// mutex_try_lock tries to lock m, will return true on success, and false on failure +/* +Try to acquire a lock on a mutex. + +This procedure tries to acquire a lock on the specified mutex. If it was already +locked, then the returned value is `false`, otherwise the lock is acquired and +the procedure returns `true`. + +If the lock is acquired, all threads that attempt to acquire a lock will be +blocked from entering any critical sections associated with the same mutex, +until the lock is released. +*/ mutex_try_lock :: proc "contextless" (m: ^Mutex) -> bool { return _mutex_try_lock(m) } /* -Example: +Guard the current scope with a lock on a mutex. + +This procedure acquires a mutex lock. The lock is automatically released +at the end of callee's scope. If the mutex was already locked, this procedure +also blocks until the lock can be acquired. + +When a lock has been acquired, all threads attempting to acquire a lock will be +blocked from entering any critical sections associated with the mutex, until +the lock is released. + +This procedure always returns `true`. This makes it easy to define a critical +section by putting the function inside the `if` statement. + +**Example**: + if mutex_guard(&m) { ... } @@ -53,47 +115,145 @@ mutex_guard :: proc "contextless" (m: ^Mutex) -> bool { return true } -// A RW_Mutex is a reader/writer mutual exclusion lock -// The lock can be held by any arbitrary number of readers or a single writer -// The zero value for a RW_Mutex is an unlocked mutex -// -// A RW_Mutex must not be copied after first use +/* +Read-write mutual exclusion lock. + +An `RW_Mutex` is a reader/writer mutual exclusion lock. The lock can be held by +any number of readers or a single writer. + +This type of synchronization primitive supports two kinds of lock operations: + +- Exclusive lock (write lock) +- Shared lock (read lock) + +When an exclusive lock is acquired by any thread, all other threads, attempting +to acquire either an exclusive or shared lock, will be blocked from entering the +critical sections associated with the read-write mutex, until the exclusive +owner of the lock releases the lock. + +When a shared lock is acquired by any thread, any other thread attempting to +acquire a shared lock will also be able to enter all the critical sections +associated with the read-write mutex. However threads attempting to acquire +an exclusive lock will be blocked from entering those critical sections, until +all shared locks are released. + +**Note**: A read-write mutex must not be copied after first use (e.g., after +acquiring a lock). This is because, in order to coordinate with other threads, +all threads must watch the same memory address to know when the lock has been +released. Trying to use a copy of the lock at a different memory address will +result in broken and unsafe behavior. For this reason, mutexes are marked as +`#no_copy`. + +**Note**: A read-write mutex is not recursive. Do not attempt to acquire an +exclusive lock more than once from the same thread, or an exclusive and shared +lock on the same thread. Taking a shared lock multiple times is acceptable. +*/ RW_Mutex :: struct #no_copy { impl: _RW_Mutex, } -// rw_mutex_lock locks rw for writing (with a single writer) -// If the mutex is already locked for reading or writing, the mutex blocks until the mutex is available. +/* +Acquire an exclusive lock. + +This procedure acquires an exclusive lock on the specified read-write mutex. If +the lock is already held by any thread, this procedure also blocks until the +lock can be acquired. + +After a lock has been acquired, any thread attempting to acquire any lock +will be blocked from entering any critical sections associated with the same +read-write mutex, until the exclusive lock is released. +*/ rw_mutex_lock :: proc "contextless" (rw: ^RW_Mutex) { _rw_mutex_lock(rw) } -// rw_mutex_unlock unlocks rw for writing (with a single writer) +/* +Release an exclusive lock. + +This procedure releases an exclusive lock associated with the specified +read-write mutex. + +When the exclusive lock is released, all critical sections, associated with the +same read-write mutex, become open to other threads. +*/ rw_mutex_unlock :: proc "contextless" (rw: ^RW_Mutex) { _rw_mutex_unlock(rw) } -// rw_mutex_try_lock tries to lock rw for writing (with a single writer) +/* +Try to acquire an exclusive lock on a read-write mutex. + +This procedure tries to acquire an exclusive lock on the specified read-write +mutex. If the mutex was already locked, the procedure returns `false`. Otherwise +it acquires the exclusive lock and returns `true`. + +If the lock has been acquired, all threads attempting to acquire any lock +will be blocked from entering any critical sections associated with the same +read-write mutex, until the exclusive locked is released. +*/ rw_mutex_try_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool { return _rw_mutex_try_lock(rw) } -// rw_mutex_shared_lock locks rw for reading (with arbitrary number of readers) +/* +Acquire a shared lock on a read-write mutex. + +This procedure acquires a shared lock on the specified read-write mutex. If the +mutex already has an exclusive lock held, this procedure also blocks until the +lock can be acquired. + +After the shared lock is obtained, all threads attempting to acquire an +exclusive lock will be blocked from entering any critical sections associated +with the same read-write mutex, until all shared locks associated with the +specified read-write mutex are released. +*/ rw_mutex_shared_lock :: proc "contextless" (rw: ^RW_Mutex) { _rw_mutex_shared_lock(rw) } -// rw_mutex_shared_unlock unlocks rw for reading (with arbitrary number of readers) +/* +Release the shared lock on a read-write mutex. + +This procedure releases shared lock on the specified read-write mutex. When all +shared locks are released, all critical sections associated with the same +read-write mutex become open to other threads. +*/ rw_mutex_shared_unlock :: proc "contextless" (rw: ^RW_Mutex) { _rw_mutex_shared_unlock(rw) } -// rw_mutex_try_shared_lock tries to lock rw for reading (with arbitrary number of readers) +/* +Try to acquire a shared lock on a read-write mutex. + +This procedure attempts to acquire a lock on the specified read-write mutex. If +the mutex already has an exclusive lock held, this procedure returns `false`. +Otherwise, it acquires the lock on the mutex and returns `true`. + +If the shared lock has been acquired, it causes all threads attempting to +acquire the exclusive lock to be blocked from entering any critical sections +associated with the same read-write mutex, until all shared locks are released. +*/ rw_mutex_try_shared_lock :: proc "contextless" (rw: ^RW_Mutex) -> bool { return _rw_mutex_try_shared_lock(rw) } + /* -Example: +Guard the current scope with an exclusive lock on a read-write mutex. + +This procedure acquires an exclusive lock on the specified read-write mutex. +This procedure automatically releases the lock at the end of the callee's scope. +If the mutex was already locked by readers or a writer, this procedure blocks, +until a lock can be acquired. + +When an exclusive lock is acquired, all other threads attempting to acquire an +exclusive lock will be blocked from entering any critical sections associated +with the same read-write mutex, until the exclusive lock is released. + +This procedure always returns `true`, which makes it easy to define a critical +section by running this procedure inside an `if` statement. + +**Example**: + if rw_mutex_guard(&m) { ... } @@ -105,8 +265,23 @@ rw_mutex_guard :: proc "contextless" (m: ^RW_Mutex) -> bool { } /* -Example: - if rw_mutex_shared_guard(&m) { +Guard the current scope with a shared lock on a read-write mutex. + +This procedure acquires a shared lock on the specified read-write mutex. This +procedure automatically releases the lock at the end of the callee's scope. If +the mutex already has an associated exclusive lock, this procedure blocks, until +a lock can be acquired. + +When a shared lock is obtained, all other threads attempting to obtain an +exclusive lock will be blocked from any critical sections, associated with the +same read-write mutex, until all shared locks are released. + +This procedure always returns `true`, which makes it easy to define a critical +section by running this procedure inside an `if` statement. + +**Example**: + + if rw_mutex_guard(&m) { ... } */ @@ -116,30 +291,91 @@ rw_mutex_shared_guard :: proc "contextless" (m: ^RW_Mutex) -> bool { return true } +/* +Recursive mutual exclusion lock. +Recurisve mutex is just like a plain mutex, except it allows reentrancy. In +order for a thread to release the mutex for other threads, the mutex needs to +be unlocked as many times, as it was locked. -// A Recursive_Mutex is a recursive mutual exclusion lock -// The zero value for a Recursive_Mutex is an unlocked mutex -// -// A Recursive_Mutex must not be copied after first use +When a lock is acquired on a recursive mutex, all other threads attempting to +acquire a lock on the same mutex will be blocked from any critical sections, +associated with the same recrusive mutex. + +When a lock is acquired on a recursive mutex by a thread, that thread is allowed +to acquire another lock on the same mutex. When a thread has acquired the lock +on a recursive mutex, the recursive mutex will stay locked until the thread +releases the lock as many times as it has been locked by the thread. + +**Note**: A recursive mutex must not be copied after first use (e.g., after +acquiring a lock). This is because, in order to coordinate with other threads, +all threads must watch the same memory address to know when the lock has been +released. Trying to use a copy of the lock at a different memory address will +result in broken and unsafe behavior. For this reason, mutexes are marked as +`#no_copy`. +*/ Recursive_Mutex :: struct #no_copy { impl: _Recursive_Mutex, } +/* +Acquire a lock on a recursive mutex. + +This procedure acquires a lock on the specified recursive mutex. If the lock is +acquired by a different thread, this procedure also blocks until the lock can be +acquired. + +When the lock is acquired, all other threads attempting to acquire a lock will +be blocked from entering any critical sections associated with the same mutex, +until the lock is released. +*/ recursive_mutex_lock :: proc "contextless" (m: ^Recursive_Mutex) { _recursive_mutex_lock(m) } +/* +Release a lock on a recursive mutex. + +This procedure releases a lock on the specified recursive mutex. It also causes +the critical sections associated with the same mutex, to become open for other +threads for entering. +*/ recursive_mutex_unlock :: proc "contextless" (m: ^Recursive_Mutex) { _recursive_mutex_unlock(m) } +/* +Try to acquire a lock on a recursive mutex. + +This procedure attempts to acquire a lock on the specified recursive mutex. If +the recursive mutex is locked by other threads, this procedure returns `false`. +Otherwise it locks the mutex and returns `true`. + +If the lock is acquired, all other threads attempting to obtain a lock will be +blocked from entering any critical sections associated with the same mutex, +until the lock is released. +*/ recursive_mutex_try_lock :: proc "contextless" (m: ^Recursive_Mutex) -> bool { return _recursive_mutex_try_lock(m) } /* -Example: +Guard the scope with a recursive mutex lock. + +This procedure acquires a lock on the specified recursive mutex and +automatically releases it at the end of the callee's scope. If the recursive +mutex was already held by a another thread, this procedure also blocks until the +lock can be acquired. + +When the lock is acquired all other threads attempting to take a lock will be +blocked from entering any critical sections associated with the same mutex, +until the lock is released. + +This procedure always returns `true`, which makes it easy to define a critical +section by calling this procedure inside an `if` statement. + +**Example**: + if recursive_mutex_guard(&m) { ... } @@ -150,19 +386,69 @@ recursive_mutex_guard :: proc "contextless" (m: ^Recursive_Mutex) -> bool { return true } +/* +A condition variable. -// Cond implements a condition variable, a rendezvous point for threads -// waiting for signalling the occurence of an event -// -// A Cond must not be copied after first use +`Cond` implements a condition variable, a rendezvous point for threads waiting +for signalling the occurence of an event. Condition variables are used on +conjuction with mutexes to provide a shared access to one or more shared +variable. + +A typical usage of condition variable is as follows. A thread that intends to +modify a shared variable shall: + +1. Acquire a lock on a mutex. +2. Modify the shared memory. +3. Release the lock. +3. Call `cond_signal` or `cond_broadcast`. + +A thread that intends to wait on a shared variable shall: + +1. Acquire a lock on a mutex. +2. Call `cond_wait` or `cond_wait_with_timeout` (will release the mutex). +3. Check the condition and keep waiting in a loop if not satisfied with result. + +**Note**: A condition variable must not be copied after first use (e.g., after +waiting on it the first time). This is because, in order to coordinate with +other threads, all threads must watch the same memory address to know when the +lock has been released. Trying to use a copy of the lock at a different memory +address will result in broken and unsafe behavior. For this reason, condition +variables are marked as `#no_copy`. +*/ Cond :: struct #no_copy { impl: _Cond, } +/* +Wait until the condition variable is signalled and release the associated mutex. + +This procedure blocks the current thread until the specified condition variable +is signalled, or until a spurious wakeup occurs. In addition, if the condition +has been signalled, this procedure releases the lock on the specified mutex. + +The mutex must be held by the calling thread, before calling the procedure. + +**Note**: This procedure can return on a spurious wake-up, even if the condition +variable was not signalled by a thread. +*/ cond_wait :: proc "contextless" (c: ^Cond, m: ^Mutex) { _cond_wait(c, m) } +/* +Wait until the condition variable is signalled or timeout is reached and release +the associated mutex. + +This procedure blocks the current thread until the specified condition variable +is signalled, a timeout is reached, or until a spurious wakeup occurs. In +addition, if the condition has been signalled, this procedure releases the +lock on the specified mutex. + +If the timeout was reached, this procedure returns `false`. Otherwise it returns +`true`. + +Before this procedure is called the mutex must be held by the calling thread. +*/ cond_wait_with_timeout :: proc "contextless" (c: ^Cond, m: ^Mutex, duration: time.Duration) -> bool { if duration <= 0 { return false @@ -170,51 +456,123 @@ cond_wait_with_timeout :: proc "contextless" (c: ^Cond, m: ^Mutex, duration: tim return _cond_wait_with_timeout(c, m, duration) } +/* +Wake up one thread that waits on a condition variable. + +This procedure causes exactly one thread waiting on the condition variable to +wake up. +*/ cond_signal :: proc "contextless" (c: ^Cond) { _cond_signal(c) } +/* +Wake up all threads that wait on a condition variable. + +This procedure causes all threads waiting on the condition variable to wake up. +*/ cond_broadcast :: proc "contextless" (c: ^Cond) { _cond_broadcast(c) } +/* +Semaphore. -// When waited upon, blocks until the internal count is greater than zero, then subtracts one. -// Posting to the semaphore increases the count by one, or the provided amount. -// -// A Sema must not be copied after first use +When waited upon, semaphore blocks until the internal count is greater than +zero, then decrements the internal counter by one. Posting to the semaphore +increases the count by one, or the provided amount. + +This type of synchronization primitives can be useful for implementing queues. +The internal counter of the semaphore can be thought of as the amount of items +in the queue. After a data has been pushed to the queue, the thread shall call +`sema_post()` procedure, increasing the counter. When a thread takes an item +from the queue to do the job, it shall call `sema_wait()`, waiting on the +semaphore counter to become non-zero and decreasing it, if necessary. + +**Note**: A semaphore must not be copied after first use (e.g., after posting +to it). This is because, in order to coordinate with other threads, all threads +must watch the same memory address to know when the lock has been released. +Trying to use a copy of the lock at a different memory address will result in +broken and unsafe behavior. For this reason, semaphores are marked as `#no_copy`. +*/ Sema :: struct #no_copy { impl: _Sema, } +/* +Increment the internal counter on a semaphore by the specified amount. + +This procedure increments the internal counter of the semaphore. If any of the +threads were waiting on the semaphore, up to `count` of threads will continue +the execution and enter the critical section. +*/ sema_post :: proc "contextless" (s: ^Sema, count := 1) { _sema_post(s, count) } +/* +Wait on a semaphore until the internal counter is non-zero. + +This procedure blocks the execution of the current thread, until the semaphore +counter is non-zero, and atomically decrements it by one, once the wait has +ended. +*/ sema_wait :: proc "contextless" (s: ^Sema) { _sema_wait(s) } +/* +Wait on a semaphore until the internal counter is non-zero or a timeout is reached. + +This procedure blocks the execution of the current thread, until the semaphore +counter is non-zero, and if so atomically decrements it by one, once the wait +has ended. If the specified timeout is reached, the function returns `false`, +otherwise it returns `true`. +*/ sema_wait_with_timeout :: proc "contextless" (s: ^Sema, duration: time.Duration) -> bool { return _sema_wait_with_timeout(s, duration) } +/* +Fast userspace mutual exclusion lock. +Futex is a fast userspace mutual exclusion lock, that uses a pointer to a 32-bit +value as an identifier of the queue of waiting threads. The value pointed to +by that pointer can be used to store extra data. -// Futex is a fast userspace mutual exclusion lock, using a 32-bit memory address as a hint -// -// An Futex must not be copied after first use +**IMPORTANT**: A futex must not be copied after first use (e.g., after waiting +on it the first time, or signalling it). This is because, in order to coordinate +with other threads, all threads must watch the same memory address. Trying to +use a copy of the lock at a different memory address will result in broken and +unsafe behavior. +*/ Futex :: distinct u32 +/* +Sleep if the futex contains the expected value until it's signalled. + +If the value of the futex is `expected`, this procedure blocks the execution of +the current thread, until the futex is woken up, or until a spurious wakeup +occurs. +*/ futex_wait :: proc "contextless" (f: ^Futex, expected: u32) { if u32(atomic_load_explicit(f, .Acquire)) != expected { return } - - _assert(_futex_wait(f, expected), "futex_wait failure") + ok := _futex_wait(f, expected) + _assert(ok, "futex_wait failure") } -// returns true if the wait happened within the duration, false if it exceeded the time duration +/* +Sleep if the futex contains the expected value until it's signalled or the +timeout is reached. + +If the value of the futex is `expected`, this procedure blocks the execution of +the current thread, until the futex is signalled, a timeout is reached, or +until a spurious wakeup occurs. + +This procedure returns `false` if the timeout was reached, `true` otherwise. +*/ futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool { if u32(atomic_load_explicit(f, .Acquire)) != expected { return true @@ -226,10 +584,16 @@ futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duratio return _futex_wait_with_timeout(f, expected, duration) } +/* +Wake up a single thread waiting on a futex. +*/ futex_signal :: proc "contextless" (f: ^Futex) { _futex_signal(f) } +/* +Wake up multiple threads waiting on a futex. +*/ futex_broadcast :: proc "contextless" (f: ^Futex) { _futex_broadcast(f) }