From 571170fd30b116c2b6cf40835c2f4779631741cf Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 7 Aug 2021 14:25:48 +0100 Subject: [PATCH] Improve and simplify the memory layout of `MPMCQueue` --- src/array.cpp | 15 ++++++--- src/common.cpp | 22 ++++++++++++ src/queue.cpp | 90 ++++++++++++++++++++++++-------------------------- 3 files changed, 76 insertions(+), 51 deletions(-) diff --git a/src/array.cpp b/src/array.cpp index a7c9204b0..a2c802168 100644 --- a/src/array.cpp +++ b/src/array.cpp @@ -326,12 +326,17 @@ void array_set_capacity(Array *array, isize capacity) { array_resize(array, capacity); } - T *new_data = nullptr; - if (capacity > 0) { - new_data = gb_alloc_array(array->allocator, T, capacity); - gb_memmove(new_data, array->data, gb_size_of(T) * array->capacity); + // NOTE(bill): try gb_resize_align first, and then fallback to alloc+memmove+free + isize old_size = array->capacity * gb_size_of(T); + isize new_size = capacity * gb_size_of(T); + T *new_data = cast(T *)gb_resize_align(array->allocator, array->data, old_size, new_size, gb_align_of(T)); + if (new_data == nullptr) { + if (capacity > 0) { + new_data = gb_alloc_array(array->allocator, T, capacity); + gb_memmove(new_data, array->data, gb_size_of(T) * array->capacity); + } + gb_free(array->allocator, array->data); } - gb_free(array->allocator, array->data); array->data = new_data; array->capacity = capacity; } diff --git a/src/common.cpp b/src/common.cpp index 4aea379c3..5e5ec3490 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -228,6 +228,28 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) { return ptr; } + +template +void resize_array_raw(T **array, gbAllocator const &a, isize old_count, isize new_count) { + GB_ASSERT(new_count >= 0); + if (new_count == 0) { + gb_free(a, *array); + *array = nullptr; + return; + } + if (new_count < old_count) { + return; + } + isize old_size = old_count * gb_size_of(T); + isize new_size = new_count * gb_size_of(T); + isize alignment = gb_align_of(T); + auto new_data = cast(T *)gb_resize_align(a, *array, old_size, new_size, alignment); + GB_ASSERT(new_data != nullptr); + *array = new_data; +} + + + #include "unicode.cpp" #include "array.cpp" #include "string.cpp" diff --git a/src/queue.cpp b/src/queue.cpp index f30c9cd8c..a9bbbeee7 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -1,26 +1,16 @@ -template -struct MPMCQueueNode { - std::atomic idx; - T data; -}; - -template -struct MPMCQueueNodeNonAtomic { - i32 idx; - T data; -}; - #define MPMC_CACHE_LINE_SIZE 64 // Multiple Producer Multiple Consumer Queue template struct MPMCQueue { - static size_t const PAD0_OFFSET = (sizeof(Array>) + sizeof(BlockingMutex) + sizeof(i32) + sizeof(i32)); + static size_t const PAD0_OFFSET = (sizeof(T *) + sizeof(std::atomic *) + sizeof(gbAllocator) + sizeof(BlockingMutex) + sizeof(i32) + sizeof(i32)); - Array> buffer; - BlockingMutex mutex; - std::atomic count; - i32 mask; + T * nodes; + std::atomic *indices; + gbAllocator allocator; + BlockingMutex mutex; + std::atomic count; + i32 mask; char pad0[(MPMC_CACHE_LINE_SIZE*2 - PAD0_OFFSET) % MPMC_CACHE_LINE_SIZE]; std::atomic head_idx; @@ -30,23 +20,22 @@ struct MPMCQueue { }; -template -void mpmc_internal_init_buffer(Array> *buffer, i32 offset) { - i32 size = cast(i32)buffer->count; + +void mpmc_internal_init_indices(std::atomic *indices, i32 offset, i32 size) { GB_ASSERT(offset % 8 == 0); GB_ASSERT(size % 8 == 0); // NOTE(bill): pretend it's not atomic for performance - auto *raw_data = cast(MPMCQueueNodeNonAtomic *)buffer->data; + auto *raw_data = cast(i32 *)indices; for (i32 i = offset; i < size; i += 8) { - raw_data[i+0].idx = i+0; - raw_data[i+1].idx = i+1; - raw_data[i+2].idx = i+2; - raw_data[i+3].idx = i+3; - raw_data[i+4].idx = i+4; - raw_data[i+5].idx = i+5; - raw_data[i+6].idx = i+6; - raw_data[i+7].idx = i+7; + raw_data[i+0] = i+0; + raw_data[i+1] = i+1; + raw_data[i+2] = i+2; + raw_data[i+3] = i+3; + raw_data[i+4] = i+4; + raw_data[i+5] = i+5; + raw_data[i+6] = i+6; + raw_data[i+7] = i+7; } } @@ -63,9 +52,11 @@ void mpmc_init(MPMCQueue *q, gbAllocator a, isize size_i) { mutex_init(&q->mutex); q->mask = size-1; - array_init(&q->buffer, a, size); + q->allocator = a; + q->nodes = gb_alloc_array(a, T, size); + q->indices = cast(std::atomic *)gb_alloc_array(a, i32, size); - mpmc_internal_init_buffer(&q->buffer, 0); + mpmc_internal_init_indices(q->indices, 0, q->mask+1); } @@ -73,7 +64,8 @@ void mpmc_init(MPMCQueue *q, gbAllocator a, isize size_i) { template void mpmc_destroy(MPMCQueue *q) { mutex_destroy(&q->mutex); - gb_free(q->buffer.allocator, q->buffer.data); + gb_free(q->allocator, q->nodes); + gb_free(q->allocator, q->indices); } @@ -84,30 +76,35 @@ i32 mpmc_enqueue(MPMCQueue *q, T const &data) { i32 head_idx = q->head_idx.load(std::memory_order_relaxed); for (;;) { - auto node = &q->buffer.data[head_idx & q->mask]; - i32 node_idx = node->idx.load(std::memory_order_acquire); + auto node = &q->nodes[head_idx & q->mask]; + auto node_idx_ptr = &q->indices[head_idx & q->mask]; + i32 node_idx = node_idx_ptr->load(std::memory_order_acquire); i32 diff = node_idx - head_idx; if (diff == 0) { i32 next_head_idx = head_idx+1; if (q->head_idx.compare_exchange_weak(head_idx, next_head_idx)) { - node->data = data; - node->idx.store(next_head_idx, std::memory_order_release); + *node = data; + node_idx_ptr->store(next_head_idx, std::memory_order_release); return q->count.fetch_add(1, std::memory_order_release); } } else if (diff < 0) { mutex_lock(&q->mutex); - i32 old_size = cast(i32)q->buffer.count; + i32 old_size = q->mask+1; i32 new_size = old_size*2; - array_resize(&q->buffer, new_size); - if (q->buffer.data == nullptr) { + resize_array_raw(&q->nodes, q->allocator, old_size, new_size); + if (q->nodes == nullptr) { GB_PANIC("Unable to resize enqueue: %td -> %td", old_size, new_size); mutex_unlock(&q->mutex); return -1; } - // NOTE(bill): pretend it's not atomic for performance - auto *raw_data = cast(MPMCQueueNodeNonAtomic *)q->buffer.data; - mpmc_internal_init_buffer(&q->buffer, old_size); + resize_array_raw(&q->indices, q->allocator, old_size, new_size); + if (q->indices == nullptr) { + GB_PANIC("Unable to resize enqueue: %td -> %td", old_size, new_size); + mutex_unlock(&q->mutex); + return -1; + } + mpmc_internal_init_indices(q->indices, old_size, new_size); q->mask = new_size-1; mutex_unlock(&q->mutex); } else { @@ -125,15 +122,16 @@ bool mpmc_dequeue(MPMCQueue *q, T *data_) { i32 tail_idx = q->tail_idx.load(std::memory_order_relaxed); for (;;) { - auto node = &q->buffer.data[tail_idx & q->mask]; - i32 node_idx = node->idx.load(std::memory_order_acquire); + auto node_ptr = &q->nodes[tail_idx & q->mask]; + auto node_idx_ptr = &q->indices[tail_idx & q->mask]; + i32 node_idx = node_idx_ptr->load(std::memory_order_acquire); i32 diff = node_idx - (tail_idx+1); if (diff == 0) { i32 next_tail_idx = tail_idx+1; if (q->tail_idx.compare_exchange_weak(tail_idx, next_tail_idx)) { - if (data_) *data_ = node->data; - node->idx.store(tail_idx + q->mask + 1, std::memory_order_release); + if (data_) *data_ = *node_ptr; + node_idx_ptr->store(tail_idx + q->mask + 1, std::memory_order_release); q->count.fetch_sub(1, std::memory_order_release); return true; }