Minimize StringMap structure usage

This commit is contained in:
gingerBill
2023-01-14 12:33:42 +00:00
parent 1064bcd060
commit 1ab90de493
9 changed files with 146 additions and 119 deletions

View File

@@ -1,3 +1,5 @@
GB_STATIC_ASSERT(sizeof(MapIndex) == sizeof(u32));
struct StringHashKey {
u32 hash;
String string;
@@ -9,101 +11,108 @@ struct StringHashKey {
return this->string;
}
};
gb_internal gb_inline u32 string_hash(String const &s) {
return fnv32a(s.text, s.len) & 0x7fffffff;
}
gb_internal gb_inline StringHashKey string_hash_string(String const &s) {
StringHashKey hash_key = {};
hash_key.hash = fnv32a(s.text, s.len);
hash_key.hash = string_hash(s);
hash_key.string = s;
return hash_key;
}
gb_internal gb_inline bool string_hash_key_equal(StringHashKey const &a, StringHashKey const &b) {
if (a.hash == b.hash) {
// NOTE(bill): If two string's hashes collide, compare the strings themselves
return a.string == b.string;
}
return false;
}
template <typename T>
struct StringMapEntry {
StringHashKey key;
String key;
u32 hash;
MapIndex next;
T value;
};
template <typename T>
struct StringMap {
Slice<MapIndex> hashes;
Array<StringMapEntry<T> > entries;
MapIndex * hashes;
usize hashes_count;
StringMapEntry<T> *entries;
u32 count;
u32 entries_capacity;
};
template <typename T> gb_internal void string_map_init (StringMap<T> *h, isize capacity = 16);
template <typename T> gb_internal void string_map_destroy (StringMap<T> *h);
template <typename T> gb_internal void string_map_init (StringMap<T> *h, usize capacity = 16);
template <typename T> gb_internal void string_map_destroy (StringMap<T> *h);
template <typename T> gb_internal T * string_map_get (StringMap<T> *h, char const *key);
template <typename T> gb_internal T * string_map_get (StringMap<T> *h, String const &key);
template <typename T> gb_internal T * string_map_get (StringMap<T> *h, StringHashKey const &key);
template <typename T> gb_internal T * string_map_get (StringMap<T> *h, char const *key);
template <typename T> gb_internal T * string_map_get (StringMap<T> *h, String const &key);
template <typename T> gb_internal T * string_map_get (StringMap<T> *h, StringHashKey const &key);
template <typename T> gb_internal T & string_map_must_get (StringMap<T> *h, char const *key);
template <typename T> gb_internal T & string_map_must_get (StringMap<T> *h, String const &key);
template <typename T> gb_internal T & string_map_must_get (StringMap<T> *h, StringHashKey const &key);
template <typename T> gb_internal T & string_map_must_get(StringMap<T> *h, char const *key);
template <typename T> gb_internal T & string_map_must_get(StringMap<T> *h, String const &key);
template <typename T> gb_internal T & string_map_must_get(StringMap<T> *h, StringHashKey const &key);
template <typename T> gb_internal void string_map_set (StringMap<T> *h, char const *key, T const &value);
template <typename T> gb_internal void string_map_set (StringMap<T> *h, String const &key, T const &value);
template <typename T> gb_internal void string_map_set (StringMap<T> *h, StringHashKey const &key, T const &value);
template <typename T> gb_internal void string_map_set (StringMap<T> *h, char const *key, T const &value);
template <typename T> gb_internal void string_map_set (StringMap<T> *h, String const &key, T const &value);
template <typename T> gb_internal void string_map_set (StringMap<T> *h, StringHashKey const &key, T const &value);
// template <typename T> gb_internal void string_map_remove (StringMap<T> *h, StringHashKey const &key);
template <typename T> gb_internal void string_map_clear (StringMap<T> *h);
template <typename T> gb_internal void string_map_grow (StringMap<T> *h);
template <typename T> gb_internal void string_map_reserve (StringMap<T> *h, isize new_count);
// template <typename T> gb_internal void string_map_remove (StringMap<T> *h, StringHashKey const &key);
template <typename T> gb_internal void string_map_clear (StringMap<T> *h);
template <typename T> gb_internal void string_map_grow (StringMap<T> *h);
template <typename T> gb_internal void string_map_reserve (StringMap<T> *h, usize new_count);
gb_internal gbAllocator string_map_allocator(void) {
return heap_allocator();
}
template <typename T>
gb_internal gb_inline void string_map_init(StringMap<T> *h, isize capacity) {
gb_internal gb_inline void string_map_init(StringMap<T> *h, usize capacity) {
capacity = next_pow2_isize(capacity);
slice_init(&h->hashes, string_map_allocator(), capacity);
array_init(&h->entries, string_map_allocator(), 0, capacity);
for (isize i = 0; i < capacity; i++) {
h->hashes.data[i] = MAP_SENTINEL;
}
string_map_reserve(h, capacity);
}
template <typename T>
gb_internal gb_inline void string_map_destroy(StringMap<T> *h) {
if (h->entries.allocator.proc == nullptr) {
h->entries.allocator = string_map_allocator();
}
slice_free(&h->hashes, h->entries.allocator);
array_free(&h->entries);
gb_free(string_map_allocator(), h->hashes);
gb_free(string_map_allocator(), h->entries);
}
template <typename T>
gb_internal MapIndex string_map__add_entry(StringMap<T> *h, StringHashKey const &key) {
gb_internal void string_map__resize_hashes(StringMap<T> *h, usize count) {
h->hashes_count = cast(u32)resize_array_raw(&h->hashes, string_map_allocator(), h->hashes_count, count);
}
template <typename T>
gb_internal void string_map__reserve_entries(StringMap<T> *h, usize capacity) {
h->entries_capacity = cast(u32)resize_array_raw(&h->entries, string_map_allocator(), h->entries_capacity, capacity);
}
template <typename T>
gb_internal MapIndex string_map__add_entry(StringMap<T> *h, u32 hash, String const &key) {
StringMapEntry<T> e = {};
e.key = key;
e.hash = hash;
e.next = MAP_SENTINEL;
array_add(&h->entries, e);
return cast(MapIndex)(h->entries.count-1);
string_map__reserve_entries(h, h->count+1);
h->entries[h->count++] = e;
return cast(MapIndex)(h->count-1);
}
template <typename T>
gb_internal MapFindResult string_map__find(StringMap<T> *h, StringHashKey const &key) {
gb_internal MapFindResult string_map__find(StringMap<T> *h, u32 hash, String const &key) {
MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL};
if (h->hashes.count != 0) {
fr.hash_index = cast(MapIndex)(key.hash & (h->hashes.count-1));
fr.entry_index = h->hashes.data[fr.hash_index];
if (h->hashes_count != 0) {
fr.hash_index = cast(MapIndex)(hash & (h->hashes_count-1));
fr.entry_index = h->hashes[fr.hash_index];
while (fr.entry_index != MAP_SENTINEL) {
if (string_hash_key_equal(h->entries.data[fr.entry_index].key, key)) {
auto *entry = &h->entries[fr.entry_index];
if (entry->hash == hash && entry->key == key) {
return fr;
}
fr.entry_prev = fr.entry_index;
fr.entry_index = h->entries.data[fr.entry_index].next;
fr.entry_index = entry->next;
}
}
return fr;
@@ -112,15 +121,16 @@ gb_internal MapFindResult string_map__find(StringMap<T> *h, StringHashKey const
template <typename T>
gb_internal MapFindResult string_map__find_from_entry(StringMap<T> *h, StringMapEntry<T> *e) {
MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL};
if (h->hashes.count != 0) {
fr.hash_index = cast(MapIndex)(e->key.hash & (h->hashes.count-1));
fr.entry_index = h->hashes.data[fr.hash_index];
if (h->hashes_count != 0) {
fr.hash_index = cast(MapIndex)(e->hash & (h->hashes_count-1));
fr.entry_index = h->hashes[fr.hash_index];
while (fr.entry_index != MAP_SENTINEL) {
if (&h->entries.data[fr.entry_index] == e) {
auto *entry = &h->entries[fr.entry_index];
if (entry == e) {
return fr;
}
fr.entry_prev = fr.entry_index;
fr.entry_index = h->entries.data[fr.entry_index].next;
fr.entry_index = entry->next;
}
}
return fr;
@@ -128,24 +138,24 @@ gb_internal MapFindResult string_map__find_from_entry(StringMap<T> *h, StringMap
template <typename T>
gb_internal b32 string_map__full(StringMap<T> *h) {
return 0.75f * h->hashes.count <= h->entries.count;
return 0.75f * h->hashes_count <= h->count;
}
template <typename T>
gb_inline void string_map_grow(StringMap<T> *h) {
isize new_count = gb_max(h->hashes.count<<1, 16);
isize new_count = gb_max(h->hashes_count<<1, 16);
string_map_reserve(h, new_count);
}
template <typename T>
gb_internal void string_map_reset_entries(StringMap<T> *h) {
for (isize i = 0; i < h->hashes.count; i++) {
h->hashes.data[i] = MAP_SENTINEL;
for (u32 i = 0; i < h->hashes_count; i++) {
h->hashes[i] = MAP_SENTINEL;
}
for (isize i = 0; i < h->entries.count; i++) {
for (isize i = 0; i < h->count; i++) {
MapFindResult fr;
StringMapEntry<T> *e = &h->entries.data[i];
StringMapEntry<T> *e = &h->entries[i];
e->next = MAP_SENTINEL;
fr = string_map__find_from_entry(h, e);
if (fr.entry_prev == MAP_SENTINEL) {
@@ -157,27 +167,24 @@ gb_internal void string_map_reset_entries(StringMap<T> *h) {
}
template <typename T>
gb_internal void string_map_reserve(StringMap<T> *h, isize cap) {
if (h->entries.allocator.proc == nullptr) {
h->entries.allocator = string_map_allocator();
}
array_reserve(&h->entries, cap);
if (h->entries.count*2 < h->hashes.count) {
gb_internal void string_map_reserve(StringMap<T> *h, usize cap) {
if (h->count*2 < h->hashes_count) {
return;
}
slice_resize(&h->hashes, h->entries.allocator, cap*2);
string_map__reserve_entries(h, cap);
string_map__resize_hashes(h, cap*2);
string_map_reset_entries(h);
}
template <typename T>
gb_internal T *string_map_get(StringMap<T> *h, StringHashKey const &key) {
gb_internal T *string_map_get(StringMap<T> *h, u32 hash, String const &key) {
MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL};
if (h->hashes.count != 0) {
fr.hash_index = cast(MapIndex)(key.hash & (h->hashes.count-1));
fr.entry_index = h->hashes.data[fr.hash_index];
if (h->hashes_count != 0) {
fr.hash_index = cast(MapIndex)(hash & (h->hashes_count-1));
fr.entry_index = h->hashes[fr.hash_index];
while (fr.entry_index != MAP_SENTINEL) {
auto *entry = &h->entries.data[fr.entry_index];
if (string_hash_key_equal(entry->key, key)) {
auto *entry = &h->entries[fr.entry_index];
if (entry->hash == hash && entry->key == key) {
return &entry->value;
}
fr.entry_prev = fr.entry_index;
@@ -187,52 +194,65 @@ gb_internal T *string_map_get(StringMap<T> *h, StringHashKey const &key) {
return nullptr;
}
template <typename T>
gb_internal gb_inline T *string_map_get(StringMap<T> *h, StringHashKey const &key) {
return string_map_get(h, key.hash, key.string);
}
template <typename T>
gb_internal gb_inline T *string_map_get(StringMap<T> *h, String const &key) {
return string_map_get(h, string_hash_string(key));
return string_map_get(h, string_hash(key), key);
}
template <typename T>
gb_internal gb_inline T *string_map_get(StringMap<T> *h, char const *key) {
return string_map_get(h, string_hash_string(make_string_c(key)));
String k = make_string_c(key);
return string_map_get(h, string_hash(k), k);
}
template <typename T>
gb_internal T &string_map_must_get(StringMap<T> *h, u32 hash, String const &key) {
isize index = string_map__find(h, hash, key).entry_index;
GB_ASSERT(index != MAP_SENTINEL);
return h->entries[index].value;
}
template <typename T>
gb_internal T &string_map_must_get(StringMap<T> *h, StringHashKey const &key) {
isize index = string_map__find(h, key).entry_index;
GB_ASSERT(index != MAP_SENTINEL);
return h->entries.data[index].value;
return string_map_must_get(h, key.hash, key.string);
}
template <typename T>
gb_internal gb_inline T &string_map_must_get(StringMap<T> *h, String const &key) {
return string_map_must_get(h, string_hash_string(key));
return string_map_must_get(h, string_hash(key), key);
}
template <typename T>
gb_internal gb_inline T &string_map_must_get(StringMap<T> *h, char const *key) {
return string_map_must_get(h, string_hash_string(make_string_c(key)));
String k = make_string_c(key);
return string_map_must_get(h, string_hash(k), k);
}
template <typename T>
gb_internal void string_map_set(StringMap<T> *h, StringHashKey const &key, T const &value) {
gb_internal void string_map_set(StringMap<T> *h, u32 hash, String const &key, T const &value) {
MapIndex index;
MapFindResult fr;
if (h->hashes.count == 0) {
if (h->hashes_count == 0) {
string_map_grow(h);
}
fr = string_map__find(h, key);
fr = string_map__find(h, hash, key);
if (fr.entry_index != MAP_SENTINEL) {
index = fr.entry_index;
} else {
index = string_map__add_entry(h, key);
index = string_map__add_entry(h, hash, key);
if (fr.entry_prev != MAP_SENTINEL) {
h->entries.data[fr.entry_prev].next = index;
h->entries[fr.entry_prev].next = index;
} else {
h->hashes.data[fr.hash_index] = index;
h->hashes[fr.hash_index] = index;
}
}
h->entries.data[index].value = value;
h->entries[index].value = value;
if (string_map__full(h)) {
string_map_grow(h);
@@ -249,25 +269,31 @@ gb_internal gb_inline void string_map_set(StringMap<T> *h, char const *key, T co
string_map_set(h, string_hash_string(make_string_c(key)), value);
}
template <typename T>
gb_internal gb_inline void string_map_set(StringMap<T> *h, StringHashKey const &key, T const &value) {
string_map_set(h, key.hash, key.string, value);
}
// template <typename T>
// gb_internal void string_map__erase(StringMap<T> *h, MapFindResult const &fr) {
// MapFindResult last;
// if (fr.entry_prev == MAP_SENTINEL) {
// h->hashes.data[fr.hash_index] = h->entries.data[fr.entry_index].next;
// h->hashes[fr.hash_index] = h->entries[fr.entry_index].next;
// } else {
// h->entries.data[fr.entry_prev].next = h->entries.data[fr.entry_index].next;
// h->entries[fr.entry_prev].next = h->entries[fr.entry_index].next;
// }
// if (fr.entry_index == h->entries.count-1) {
// if (fr.entry_index == h->count-1) {
// array_pop(&h->entries);
// return;
// }
// h->entries.data[fr.entry_index] = h->entries.data[h->entries.count-1];
// last = string_map__find(h, h->entries.data[fr.entry_index].key);
// h->entries[fr.entry_index] = h->entries[h->count-1];
// last = string_map__find(h, h->entries[fr.entry_index].key);
// if (last.entry_prev != MAP_SENTINEL) {
// h->entries.data[last.entry_prev].next = fr.entry_index;
// h->entries[last.entry_prev].next = fr.entry_index;
// } else {
// h->hashes.data[last.hash_index] = fr.entry_index;
// h->hashes[last.hash_index] = fr.entry_index;
// }
// }
@@ -281,9 +307,9 @@ gb_internal gb_inline void string_map_set(StringMap<T> *h, char const *key, T co
template <typename T>
gb_internal gb_inline void string_map_clear(StringMap<T> *h) {
array_clear(&h->entries);
for (isize i = 0; i < h->hashes.count; i++) {
h->hashes.data[i] = MAP_SENTINEL;
h->count = 0;
for (u32 i = 0; i < h->hashes_count; i++) {
h->hashes[i] = MAP_SENTINEL;
}
}
@@ -291,20 +317,20 @@ gb_internal gb_inline void string_map_clear(StringMap<T> *h) {
template <typename T>
gb_internal StringMapEntry<T> *begin(StringMap<T> &m) noexcept {
return m.entries.data;
return m.entries;
}
template <typename T>
gb_internal StringMapEntry<T> const *begin(StringMap<T> const &m) noexcept {
return m.entries.data;
return m.entries;
}
template <typename T>
gb_internal StringMapEntry<T> *end(StringMap<T> &m) {
return m.entries.data + m.entries.count;
return m.entries + m.count;
}
template <typename T>
gb_internal StringMapEntry<T> const *end(StringMap<T> const &m) noexcept {
return m.entries.data + m.entries.count;
return m.entries + m.count;
}