hashtable: Redesign the hashtable API.

This was intended to make the API public, so SDL_hashtable.h got an extreme
documentation makeover, but for now this remains a private header.

This makes several significant interface changes to SDL_HashTable, and
improves code that makes use of it in various ways.

- The ability to make "stackable" tables is removed. Apparently this still
  worked with the current implementation, but I could see a future
  implementation struggle mightily to support this. It'll be better for
  something external to build on top of the table if it needs it, inserting a
  linked list of stacked items as the hash values and managing them separately.
  There was only one place in SDL using this, unnecessarily, and that has also
  been cleaned up to not need it.
- You no longer specify "buckets" when creating a table, but rather an
  estimated number of items the table is meant to hold. The bucket count was
  crucial to our classic hashtable implementation, but meant less once we
  moved to an Open Addressing implementation anyhow, since the bucket count
  isn't static (and they aren't really "buckets" anymore either). Now you
  can just report how many items you think the hash will hold and SDL will
  allocate a reasonable default for you...or 0 to not guess, and SDL will
  start small and grow as necessary, which is often the correct thing to do.
- There's no more SDL_IterateHashTableKey because there's no more "stackable"
  hash tables.
- SDL_IterateHashTable() now uses a callback, which matches other parts of SDL,
  and also lets us hold the read-lock for the entire iteration and get rid of
  the goofy iterator state variable.
- SDL_InsertIntoHashTable() now lets you specify whether to replace existing
  keys or fail if the key already exists.
- Callbacks now use SDL conventions (userdata as the first param).
- Other naming convention fixes.

I discovered we use a lot of hash tables in SDL3 internally. :) So the bulk
of this work is fixing up that code to use the new interfaces, and simplifying
things (like checking for an item to remove it if it already exists before
inserting a replacement...just do the insert atomically, it'll do all that
for you!).
This commit is contained in:
Ryan C. Gordon
2025-02-13 16:13:43 -05:00
parent 4a9b579195
commit 84a236c92e
13 changed files with 1344 additions and 866 deletions

View File

@@ -19,34 +19,6 @@
3. This notice may not be removed or altered from any source distribution.
*/
#include "SDL_internal.h"
#include "SDL_hashtable.h"
// XXX: We can't use SDL_assert here because it's going to call into hashtable code
#ifdef NDEBUG
#define HT_ASSERT(x) (void)(0)
#else
#if (defined(_WIN32) || defined(SDL_PLATFORM_CYGWIN)) && !defined(SDL_PLATFORM_XBOXONE) && !defined(SDL_PLATFORM_XBOXSERIES)
#include <windows.h>
#endif
/* This is not declared in any header, although it is shared between some
parts of SDL, because we don't want anything calling it without an
extremely good reason. */
extern SDL_NORETURN void SDL_ExitProcess(int exitcode);
static void HT_ASSERT_FAIL(const char *msg)
{
const char *caption = "SDL_HashTable Assertion Failure!";
(void)caption;
#if (defined(_WIN32) || defined(SDL_PLATFORM_CYGWIN)) && !defined(SDL_PLATFORM_XBOXONE) && !defined(SDL_PLATFORM_XBOXSERIES)
MessageBoxA(NULL, msg, caption, MB_OK | MB_ICONERROR);
#elif defined(HAVE_STDIO_H)
fprintf(stderr, "\n\n%s\n%s\n\n", caption, msg);
fflush(stderr);
#endif
SDL_TriggerBreakpoint();
SDL_ExitProcess(-1);
}
#define HT_ASSERT(x) if (!(x)) HT_ASSERT_FAIL("SDL_HashTable Assertion Failure: " #x)
#endif
typedef struct SDL_HashItem
{
@@ -67,47 +39,49 @@ SDL_COMPILE_TIME_ASSERT(sizeof_SDL_HashItem, sizeof(SDL_HashItem) <= MAX_HASHITE
struct SDL_HashTable
{
SDL_RWLock *lock;
SDL_RWLock *lock; // NULL if not created threadsafe
SDL_HashItem *table;
SDL_HashTable_HashFn hash;
SDL_HashTable_KeyMatchFn keymatch;
SDL_HashTable_NukeFn nuke;
void *data;
SDL_HashCallback hash;
SDL_HashKeyMatchCallback keymatch;
SDL_HashDestroyCallback destroy;
void *userdata;
Uint32 hash_mask;
Uint32 max_probe_len;
Uint32 num_occupied_slots;
bool stackable;
};
SDL_HashTable *SDL_CreateHashTable(void *data,
Uint32 num_buckets,
SDL_HashTable_HashFn hashfn,
SDL_HashTable_KeyMatchFn keymatchfn,
SDL_HashTable_NukeFn nukefn,
bool threadsafe,
bool stackable)
static Uint32 CalculateHashBucketsFromEstimate(int estimated_capacity)
{
SDL_HashTable *table;
// num_buckets must be a power of two so we can derive the bucket index with just a bit-and.
if ((num_buckets < 1) || !SDL_HasExactlyOneBitSet32(num_buckets)) {
SDL_SetError("num_buckets must be a power of two");
return NULL;
if (estimated_capacity <= 0) {
return 4; // start small, grow as necessary.
}
if (num_buckets > MAX_HASHTABLE_SIZE) {
SDL_SetError("num_buckets is too large");
return NULL;
const Uint32 estimated32 = (Uint32) estimated_capacity;
Uint32 buckets = ((Uint32) 1) << SDL_MostSignificantBitIndex32(estimated32);
if (!SDL_HasExactlyOneBitSet32(estimated32)) {
buckets <<= 1; // need next power of two up to fit overflow capacity bits.
}
table = (SDL_HashTable *)SDL_calloc(1, sizeof(SDL_HashTable));
return SDL_min(buckets, MAX_HASHTABLE_SIZE);
}
SDL_HashTable *SDL_CreateHashTable(int estimated_capacity, bool threadsafe, SDL_HashCallback hash,
SDL_HashKeyMatchCallback keymatch,
SDL_HashDestroyCallback destroy, void *userdata)
{
const Uint32 num_buckets = CalculateHashBucketsFromEstimate(estimated_capacity);
SDL_HashTable *table = (SDL_HashTable *)SDL_calloc(1, sizeof(SDL_HashTable));
if (!table) {
return NULL;
}
if (threadsafe) {
// Don't fail if we can't create a lock (single threaded environment?)
table->lock = SDL_CreateRWLock();
if (!table->lock) {
SDL_DestroyHashTable(table);
return NULL;
}
}
table->table = (SDL_HashItem *)SDL_calloc(num_buckets, sizeof(SDL_HashItem));
@@ -117,24 +91,22 @@ SDL_HashTable *SDL_CreateHashTable(void *data,
}
table->hash_mask = num_buckets - 1;
table->stackable = stackable;
table->data = data;
table->hash = hashfn;
table->keymatch = keymatchfn;
table->nuke = nukefn;
table->userdata = userdata;
table->hash = hash;
table->keymatch = keymatch;
table->destroy = destroy;
return table;
}
static SDL_INLINE Uint32 calc_hash(const SDL_HashTable *table, const void *key)
{
const Uint32 BitMixer = 0x9E3779B1u;
return table->hash(key, table->data) * BitMixer;
return table->hash(table->userdata, key) * BitMixer;
}
static SDL_INLINE Uint32 get_probe_length(Uint32 zero_idx, Uint32 actual_idx, Uint32 num_buckets)
{
// returns the probe sequence length from zero_idx to actual_idx
if (actual_idx < zero_idx) {
return num_buckets - zero_idx + actual_idx;
}
@@ -149,7 +121,7 @@ static SDL_HashItem *find_item(const SDL_HashTable *ht, const void *key, Uint32
SDL_HashItem *table = ht->table;
for (;;) {
while (true) {
SDL_HashItem *item = table + *i;
Uint32 item_hash = item->hash;
@@ -157,12 +129,12 @@ static SDL_HashItem *find_item(const SDL_HashTable *ht, const void *key, Uint32
return NULL;
}
if (item_hash == hash && ht->keymatch(item->key, key, ht->data)) {
if (item_hash == hash && ht->keymatch(ht->userdata, item->key, key)) {
return item;
}
Uint32 item_probe_len = item->probe_len;
HT_ASSERT(item_probe_len == get_probe_length(item_hash & hash_mask, (Uint32)(item - table), hash_mask + 1));
SDL_assert(item_probe_len == get_probe_length(item_hash & hash_mask, (Uint32)(item - table), hash_mask + 1));
if (*probe_len > item_probe_len) {
return NULL;
@@ -185,23 +157,23 @@ static SDL_HashItem *find_first_item(const SDL_HashTable *ht, const void *key, U
static SDL_HashItem *insert_item(SDL_HashItem *item_to_insert, SDL_HashItem *table, Uint32 hash_mask, Uint32 *max_probe_len_ptr)
{
const Uint32 num_buckets = hash_mask + 1;
Uint32 idx = item_to_insert->hash & hash_mask;
SDL_HashItem temp_item, *target = NULL;
Uint32 num_buckets = hash_mask + 1;
SDL_HashItem *target = NULL;
SDL_HashItem temp_item;
for (;;) {
while (true) {
SDL_HashItem *candidate = table + idx;
if (!candidate->live) {
// Found an empty slot. Put it here and we're done.
*candidate = *item_to_insert;
if (target == NULL) {
target = candidate;
}
Uint32 probe_len = get_probe_length(candidate->hash & hash_mask, idx, num_buckets);
const Uint32 probe_len = get_probe_length(candidate->hash & hash_mask, idx, num_buckets);
candidate->probe_len = probe_len;
if (*max_probe_len_ptr < probe_len) {
@@ -211,9 +183,9 @@ static SDL_HashItem *insert_item(SDL_HashItem *item_to_insert, SDL_HashItem *tab
break;
}
Uint32 candidate_probe_len = candidate->probe_len;
HT_ASSERT(candidate_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets));
Uint32 new_probe_len = get_probe_length(item_to_insert->hash & hash_mask, idx, num_buckets);
const Uint32 candidate_probe_len = candidate->probe_len;
SDL_assert(candidate_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets));
const Uint32 new_probe_len = get_probe_length(item_to_insert->hash & hash_mask, idx, num_buckets);
if (candidate_probe_len < new_probe_len) {
// Robin Hood hashing: the item at idx has a better probe length than our item would at this position.
@@ -229,7 +201,7 @@ static SDL_HashItem *insert_item(SDL_HashItem *item_to_insert, SDL_HashItem *tab
*item_to_insert = temp_item;
HT_ASSERT(new_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets));
SDL_assert(new_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets));
candidate->probe_len = new_probe_len;
if (*max_probe_len_ptr < new_probe_len) {
@@ -245,17 +217,19 @@ static SDL_HashItem *insert_item(SDL_HashItem *item_to_insert, SDL_HashItem *tab
static void delete_item(SDL_HashTable *ht, SDL_HashItem *item)
{
Uint32 hash_mask = ht->hash_mask;
const Uint32 hash_mask = ht->hash_mask;
SDL_HashItem *table = ht->table;
if (ht->nuke) {
ht->nuke(item->key, item->value, ht->data);
if (ht->destroy) {
ht->destroy(ht->userdata, item->key, item->value);
}
SDL_assert(ht->num_occupied_slots > 0);
ht->num_occupied_slots--;
Uint32 idx = (Uint32)(item - ht->table);
for (;;) {
while (true) {
idx = (idx + 1) & hash_mask;
SDL_HashItem *next_item = table + idx;
@@ -266,22 +240,23 @@ static void delete_item(SDL_HashTable *ht, SDL_HashItem *item)
*item = *next_item;
item->probe_len -= 1;
HT_ASSERT(item->probe_len < ht->max_probe_len);
SDL_assert(item->probe_len < ht->max_probe_len);
item = next_item;
}
}
static bool resize(SDL_HashTable *ht, Uint32 new_size)
{
SDL_HashItem *old_table = ht->table;
Uint32 old_size = ht->hash_mask + 1;
Uint32 new_hash_mask = new_size - 1;
const Uint32 new_hash_mask = new_size - 1;
SDL_HashItem *new_table = SDL_calloc(new_size, sizeof(*new_table));
if (!new_table) {
return false;
}
SDL_HashItem *old_table = ht->table;
const Uint32 old_size = ht->hash_mask + 1;
ht->max_probe_len = 0;
ht->hash_mask = new_hash_mask;
ht->table = new_table;
@@ -299,14 +274,14 @@ static bool resize(SDL_HashTable *ht, Uint32 new_size)
static bool maybe_resize(SDL_HashTable *ht)
{
Uint32 capacity = ht->hash_mask + 1;
const Uint32 capacity = ht->hash_mask + 1;
if (capacity >= MAX_HASHTABLE_SIZE) {
return false;
}
Uint32 max_load_factor = 217; // range: 0-255; 217 is roughly 85%
Uint32 resize_threshold = (Uint32)((max_load_factor * (Uint64)capacity) >> 8);
const Uint32 max_load_factor = 217; // range: 0-255; 217 is roughly 85%
const Uint32 resize_threshold = (Uint32)((max_load_factor * (Uint64)capacity) >> 8);
if (ht->num_occupied_slots > resize_threshold) {
return resize(ht, capacity * 2);
@@ -315,72 +290,66 @@ static bool maybe_resize(SDL_HashTable *ht)
return true;
}
bool SDL_InsertIntoHashTable(SDL_HashTable *table, const void *key, const void *value)
bool SDL_InsertIntoHashTable(SDL_HashTable *table, const void *key, const void *value, bool replace)
{
SDL_HashItem *item;
Uint32 hash;
if (!table) {
return SDL_InvalidParamError("table");
}
bool result = false;
if (!table) {
return false;
SDL_LockRWLockForWriting(table->lock);
const Uint32 hash = calc_hash(table, key);
SDL_HashItem *item = find_first_item(table, key, hash);
bool do_insert = true;
if (item) {
if (replace) {
delete_item(table, item);
} else {
SDL_SetError("key already exists and replace is disabled");
do_insert = false;
}
}
if (table->lock) {
SDL_LockRWLockForWriting(table->lock);
if (do_insert) {
SDL_HashItem new_item;
new_item.key = key;
new_item.value = value;
new_item.hash = hash;
new_item.live = true;
new_item.probe_len = 0;
table->num_occupied_slots++;
if (!maybe_resize(table)) {
table->num_occupied_slots--;
} else {
// This never returns NULL
insert_item(&new_item, table->table, table->hash_mask, &table->max_probe_len);
result = true;
}
}
hash = calc_hash(table, key);
item = find_first_item(table, key, hash);
if (item && !table->stackable) {
// Allow overwrites, this might have been inserted on another thread
delete_item(table, item);
}
SDL_HashItem new_item;
new_item.key = key;
new_item.value = value;
new_item.hash = hash;
new_item.live = true;
new_item.probe_len = 0;
table->num_occupied_slots++;
if (!maybe_resize(table)) {
table->num_occupied_slots--;
goto done;
}
// This never returns NULL
insert_item(&new_item, table->table, table->hash_mask, &table->max_probe_len);
result = true;
done:
if (table->lock) {
SDL_UnlockRWLock(table->lock);
}
SDL_UnlockRWLock(table->lock);
return result;
}
bool SDL_FindInHashTable(const SDL_HashTable *table, const void *key, const void **value)
{
Uint32 hash;
SDL_HashItem *i;
bool result = false;
if (!table) {
if (value) {
*value = NULL;
}
return false;
return SDL_InvalidParamError("table");
}
if (table->lock) {
SDL_LockRWLockForReading(table->lock);
}
SDL_LockRWLockForReading(table->lock);
hash = calc_hash(table, key);
i = find_first_item(table, key, hash);
bool result = false;
const Uint32 hash = calc_hash(table, key);
SDL_HashItem *i = find_first_item(table, key, hash);
if (i) {
if (value) {
*value = i->value;
@@ -388,156 +357,91 @@ bool SDL_FindInHashTable(const SDL_HashTable *table, const void *key, const void
result = true;
}
if (table->lock) {
SDL_UnlockRWLock(table->lock);
}
SDL_UnlockRWLock(table->lock);
return result;
}
bool SDL_RemoveFromHashTable(SDL_HashTable *table, const void *key)
{
Uint32 hash;
SDL_HashItem *item;
bool result = false;
if (!table) {
return false;
return SDL_InvalidParamError("table");
}
if (table->lock) {
SDL_LockRWLockForWriting(table->lock);
SDL_LockRWLockForWriting(table->lock);
bool result = false;
const Uint32 hash = calc_hash(table, key);
SDL_HashItem *item = find_first_item(table, key, hash);
if (item) {
delete_item(table, item);
result = true;
}
// FIXME: what to do for stacking hashtables?
// The original code removes just one item.
// This hashtable happens to preserve the insertion order of multi-value keys,
// so deleting the first one will always delete the least-recently inserted one.
// But maybe it makes more sense to remove all matching items?
hash = calc_hash(table, key);
item = find_first_item(table, key, hash);
if (!item) {
goto done;
}
delete_item(table, item);
result = true;
done:
if (table->lock) {
SDL_UnlockRWLock(table->lock);
}
SDL_UnlockRWLock(table->lock);
return result;
}
bool SDL_IterateHashTableKey(const SDL_HashTable *table, const void *key, const void **_value, void **iter)
bool SDL_IterateHashTable(const SDL_HashTable *table, SDL_HashTableIterateCallback callback, void *userdata)
{
SDL_HashItem *item = (SDL_HashItem *)*iter;
if (!table) {
return false;
return SDL_InvalidParamError("table");
} else if (!callback) {
return SDL_InvalidParamError("callback");
}
Uint32 i, probe_len, hash;
if (item) {
HT_ASSERT(item >= table->table);
HT_ASSERT(item < table->table + (table->hash_mask + 1));
hash = item->hash;
probe_len = item->probe_len + 1;
i = ((Uint32)(item - table->table) + 1) & table->hash_mask;
item = table->table + i;
} else {
hash = calc_hash(table, key);
i = hash & table->hash_mask;
probe_len = 0;
}
item = find_item(table, key, hash, &i, &probe_len);
if (!item) {
*_value = NULL;
return false;
}
*_value = item->value;
*iter = item;
return true;
}
bool SDL_IterateHashTable(const SDL_HashTable *table, const void **_key, const void **_value, void **iter)
{
SDL_HashItem *item = (SDL_HashItem *)*iter;
if (!table) {
return false;
}
if (!item) {
item = table->table;
} else {
item++;
}
HT_ASSERT(item >= table->table);
SDL_LockRWLockForReading(table->lock);
SDL_HashItem *end = table->table + (table->hash_mask + 1);
Uint32 num_iterated = 0;
while (item < end && !item->live) {
++item;
}
HT_ASSERT(item <= end);
if (item == end) {
if (_key) {
*_key = NULL;
for (SDL_HashItem *item = table->table; item < end; item++) {
if (item->live) {
if (!callback(userdata, table, item->key, item->value)) {
break; // callback requested iteration stop.
} else if (++num_iterated >= table->num_occupied_slots) {
break; // we can drop out early because we've seen all the live items.
}
}
if (_value) {
*_value = NULL;
}
return false;
}
if (_key) {
*_key = item->key;
}
if (_value) {
*_value = item->value;
}
*iter = item;
SDL_UnlockRWLock(table->lock);
return true;
}
bool SDL_HashTableEmpty(SDL_HashTable *table)
{
return !(table && table->num_occupied_slots);
if (!table) {
return SDL_InvalidParamError("table");
}
SDL_LockRWLockForReading(table->lock);
const bool retval = (table->num_occupied_slots == 0);
SDL_UnlockRWLock(table->lock);
return retval;
}
static void nuke_all(SDL_HashTable *table)
{
void *data = table->data;
SDL_HashItem *end = table->table + (table->hash_mask + 1);
SDL_HashItem *i;
for (i = table->table; i < end; ++i) {
if (i->live) {
table->nuke(i->key, i->value, data);
static void destroy_all(SDL_HashTable *table)
{
SDL_HashDestroyCallback destroy = table->destroy;
if (destroy) {
void *userdata = table->userdata;
SDL_HashItem *end = table->table + (table->hash_mask + 1);
for (SDL_HashItem *i = table->table; i < end; ++i) {
if (i->live) {
i->live = false;
destroy(userdata, i->key, i->value);
}
}
}
}
void SDL_EmptyHashTable(SDL_HashTable *table)
void SDL_ClearHashTable(SDL_HashTable *table)
{
if (table) {
SDL_LockRWLockForWriting(table->lock);
{
if (table->nuke) {
nuke_all(table);
}
destroy_all(table);
SDL_memset(table->table, 0, sizeof(*table->table) * (table->hash_mask + 1));
table->num_occupied_slots = 0;
}
@@ -548,9 +452,10 @@ void SDL_EmptyHashTable(SDL_HashTable *table)
void SDL_DestroyHashTable(SDL_HashTable *table)
{
if (table) {
SDL_EmptyHashTable(table);
SDL_DestroyRWLock(table->lock);
destroy_all(table);
if (table->lock) {
SDL_DestroyRWLock(table->lock);
}
SDL_free(table->table);
SDL_free(table);
}
@@ -566,26 +471,26 @@ static SDL_INLINE Uint32 hash_string_djbxor(const char *str, size_t len)
return hash;
}
Uint32 SDL_HashPointer(const void *key, void *unused)
Uint32 SDL_HashPointer(void *unused, const void *key)
{
(void)unused;
return SDL_murmur3_32(&key, sizeof(key), 0);
}
bool SDL_KeyMatchPointer(const void *a, const void *b, void *unused)
bool SDL_KeyMatchPointer(void *unused, const void *a, const void *b)
{
(void)unused;
return (a == b);
}
Uint32 SDL_HashString(const void *key, void *unused)
Uint32 SDL_HashString(void *unused, const void *key)
{
(void)unused;
const char *str = (const char *)key;
return hash_string_djbxor(str, SDL_strlen(str));
}
bool SDL_KeyMatchString(const void *a, const void *b, void *unused)
bool SDL_KeyMatchString(void *unused, const void *a, const void *b)
{
const char *a_string = (const char *)a;
const char *b_string = (const char *)b;
@@ -604,26 +509,33 @@ bool SDL_KeyMatchString(const void *a, const void *b, void *unused)
// We assume we can fit the ID in the key directly
SDL_COMPILE_TIME_ASSERT(SDL_HashID_KeySize, sizeof(Uint32) <= sizeof(const void *));
Uint32 SDL_HashID(const void *key, void *unused)
Uint32 SDL_HashID(void *unused, const void *key)
{
(void)unused;
return (Uint32)(uintptr_t)key;
}
bool SDL_KeyMatchID(const void *a, const void *b, void *unused)
bool SDL_KeyMatchID(void *unused, const void *a, const void *b)
{
(void)unused;
return (a == b);
}
void SDL_NukeFreeKey(const void *key, const void *value, void *unused)
void SDL_DestroyHashKeyAndValue(void *unused, const void *key, const void *value)
{
(void)unused;
SDL_free((void *)key);
SDL_free((void *)value);
}
void SDL_DestroyHashKey(void *unused, const void *key, const void *value)
{
(void)value;
(void)unused;
SDL_free((void *)key);
}
void SDL_NukeFreeValue(const void *key, const void *value, void *unused)
void SDL_DestroyHashValue(void *unused, const void *key, const void *value)
{
(void)key;
(void)unused;