Add hcode. Re-factor rawGet. Fix infinite loop.

Replace state enum with a cached hash code which has the same memory overhead
and locality as the enum, but can really speed things up with non-integer-like
keys (keys for which either hash() or == take more than couple cycles, or where
the key data is "indirect" and might incur another cache miss).  To function as
both empty/filled state and a hash code cache, it only needs to be ensured that
hash codes are non-zero for any real key.  That is done at the one place in the
whole file hash() is called.  Keep convention clear via isFilled() & isEmpty().
An isDeleted state will no longer be necessary as per below excl/inf loop fix.

Since some use sites know hc and some do not, re-factor rawGet into two forms -
one with known hash code and one with an unknown HC that returns it. Both forms
still return <0 on missing, but returns the much more informative "-1 - index".
That return can be quickly inverted by -1 - result to recover the index where
insert should happen, provided no modifications are made to the table in the
meantime.  This protocol retains the prior <0 interface and also makes it easy
to avoid unnecessary duplicate search work in procs like containsOrInclImpl
(which formerly searched in the initial get and AGAIN in rawInsert).  Strip the
searching part out of rawInsert to "make it even more raw".  swap(s.data, n) a
bit earlier so rawGet and rawGetKnownHC can have similar parameter lists and
integrate well with rawInsert/code sharing between Set and OrderedSet impls.

This PR also fixes infinite looping upon too many deletes. [ The deleted state
(aka "tombstone") approach is vulnerable to the table filling up with deleted
items which forces giant scans for missing keys which could be anywhere.  In
the version prior to this PR, table wraparound wasn't even detected yielding
infinite loops. ] This PR changes excl() from marking slots as deleted to Knuth
algo 6.4R, "local/incremental moveback rehashing" - adapted from Knuth's h->h-1
to the cache-friendlier h->h+1 probe sequence and adapted from "gotos" to a new
doWhile template.  This method restores the table to a state that would have
resulted from pure inserts (in some order).  Update nextTry accordingly.  Since
linear probing can degrade a little faster, 50% rather than 66% may be a better
default growth threshold, but users should be able to adjust threshold anyway.

Old unit tests all pass.  More extensive testing in this module is probably
warranted before taking similar enhancements over to collections.tables.
This commit is contained in:
Charles Blake
2015-02-06 09:24:20 -05:00
parent 53f4c7758b
commit 65ce08f38c

View File

@@ -24,9 +24,12 @@ import
when not defined(nimhygiene):
{.pragma: dirty.}
# For "integer-like A" that are too big for intsets/bit-vectors to be practical,
# it would be best to shrink hcode to the same size as the integer. Larger
# codes should never be needed, and this can pack more entries per cache-line.
# Losing hcode entirely is also possible - if some element value is forbidden.
type
SlotEnum = enum seEmpty, seFilled, seDeleted
KeyValuePair[A] = tuple[slot: SlotEnum, key: A]
KeyValuePair[A] = tuple[hcode: THash, key: A]
KeyValuePairSeq[A] = seq[KeyValuePair[A]]
HashSet* {.myShallow.}[A] = object ## \
## A generic hash set.
@@ -38,6 +41,14 @@ type
{.deprecated: [TSet: HashSet].}
# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These
# two procs retain clarity of that encoding without the space cost of an enum.
proc isEmpty(hcode: THash): bool {.inline.} =
result = hcode == 0
proc isFilled(hcode: THash): bool {.inline.} =
result = hcode != 0
proc isValid*[A](s: HashSet[A]): bool =
## Returns `true` if the set has been initialized with `initSet <#initSet>`_.
##
@@ -94,7 +105,7 @@ iterator items*[A](s: HashSet[A]): A =
## # --> {(a: 1, b: 3), (a: 0, b: 4)}
assert s.isValid, "The set needs to be initialized."
for h in 0..high(s.data):
if s.data[h].slot == seFilled: yield s.data[h].key
if isFilled(s.data[h].hcode): yield s.data[h].key
const
growthFactor = 2
@@ -104,24 +115,34 @@ proc mustRehash(length, counter: int): bool {.inline.} =
result = (length * 2 < counter * 3) or (length - counter < 4)
proc nextTry(h, maxHash: THash): THash {.inline.} =
result = ((5 * h) + 1) and maxHash
result = (h + 1) and maxHash
template rawGetImpl() {.dirty.} =
var h: THash = hash(key) and high(s.data) # start with real hash value
while s.data[h].slot != seEmpty:
if s.data[h].key == key and s.data[h].slot == seFilled:
template rawGetKnownHCImpl() {.dirty.} =
var h: THash = hc and high(s.data) # start with real hash value
while isFilled(s.data[h].hcode):
# Compare hc THEN key with boolean short circuit. This makes the common case
# zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
# It does slow down succeeding lookups by one extra THash cmp&and..usually
# just a few clock cycles, generally worth it for any non-integer-like A.
if s.data[h].hcode == hc and s.data[h].key == key: # compare hc THEN key
return h
h = nextTry(h, high(s.data))
result = -1
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
template rawGetImpl() {.dirty.} =
hc = hash(key)
if hc == 0: # This almost never taken branch should be very predictable.
hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
rawGetKnownHCImpl()
template rawInsertImpl() {.dirty.} =
var h: THash = hash(key) and high(data)
while data[h].slot == seFilled:
h = nextTry(h, high(data))
data[h].key = key
data[h].slot = seFilled
data[h].hcode = hc
proc rawGet[A](s: HashSet[A], key: A): int =
proc rawGetKnownHC[A](s: HashSet[A], key: A, hc: THash): int {.inline.} =
rawGetKnownHCImpl()
proc rawGet[A](s: HashSet[A], key: A, hc: var THash): int {.inline.} =
rawGetImpl()
proc mget*[A](s: var HashSet[A], key: A): var A =
@@ -130,7 +151,8 @@ proc mget*[A](s: var HashSet[A], key: A): var A =
## when one overloaded 'hash' and '==' but still needs reference semantics
## for sharing.
assert s.isValid, "The set needs to be initialized."
var index = rawGet(s, key)
var hc: THash
var index = rawGet(s, key, hc)
if index >= 0: result = s.data[index].key
else: raise newException(KeyError, "key not found: " & $key)
@@ -147,33 +169,43 @@ proc contains*[A](s: HashSet[A], key: A): bool =
## values.excl(2)
## assert(not values.contains(2))
assert s.isValid, "The set needs to be initialized."
var index = rawGet(s, key)
var hc: THash
var index = rawGet(s, key, hc)
result = index >= 0
proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A) =
proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,
hc: THash, h: THash) =
rawInsertImpl()
proc enlarge[A](s: var HashSet[A]) =
var n: KeyValuePairSeq[A]
newSeq(n, len(s.data) * growthFactor)
for i in countup(0, high(s.data)):
if s.data[i].slot == seFilled: rawInsert(s, n, s.data[i].key)
swap(s.data, n)
swap(s.data, n) # n is now old seq
for i in countup(0, high(n)):
if isFilled(n[i].hcode):
var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
rawInsert(s, s.data, n[i].key, n[i].hcode, j)
template inclImpl() {.dirty.} =
var index = rawGet(s, key)
var hc: THash
var index = rawGet(s, key, hc)
if index < 0:
if mustRehash(len(s.data), s.counter): enlarge(s)
rawInsert(s, s.data, key)
if mustRehash(len(s.data), s.counter):
enlarge(s)
index = rawGetKnownHC(s, key, hc)
rawInsert(s, s.data, key, hc, -1 - index)
inc(s.counter)
template containsOrInclImpl() {.dirty.} =
var index = rawGet(s, key)
var hc: THash
var index = rawGet(s, key, hc)
if index >= 0:
result = true
else:
if mustRehash(len(s.data), s.counter): enlarge(s)
rawInsert(s, s.data, key)
if mustRehash(len(s.data), s.counter):
enlarge(s)
index = rawGetKnownHC(s, key, hc)
rawInsert(s, s.data, key, hc, -1 - index)
inc(s.counter)
proc incl*[A](s: var HashSet[A], key: A) =
@@ -204,6 +236,10 @@ proc incl*[A](s: var HashSet[A], other: HashSet[A]) =
assert other.isValid, "The set `other` needs to be initialized."
for item in other: incl(s, item)
template doWhile(a: expr, b: stmt): stmt =
b
while a: b
proc excl*[A](s: var HashSet[A], key: A) =
## Excludes `key` from the set `s`.
##
@@ -215,10 +251,22 @@ proc excl*[A](s: var HashSet[A], key: A) =
## s.excl(2)
## assert s.len == 3
assert s.isValid, "The set needs to be initialized."
var index = rawGet(s, key)
if index >= 0:
s.data[index].slot = seDeleted
var hc: THash
var i = rawGet(s, key, hc)
var msk = high(s.data)
if i >= 0:
s.data[i].hcode = 0
dec(s.counter)
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
var j = i # The correctness of this depends on (h+1) in nextTry,
var r = j # though may be adaptable to other simple sequences.
s.data[i].hcode = 0 # mark current EMPTY
doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
i = (i + 1) and msk # increment mod table size
if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
return
r = s.data[i].hcode and msk # "home" location of key@i
s.data[j] = s.data[i] # data[j] will be marked EMPTY next loop
proc excl*[A](s: var HashSet[A], other: HashSet[A]) =
## Excludes everything in `other` from `s`.
@@ -494,7 +542,7 @@ proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
type
OrderedKeyValuePair[A] = tuple[
slot: SlotEnum, next: int, key: A]
hcode: THash, next: int, key: A]
OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]]
OrderedSet* {.myShallow.}[A] = object ## \
## A generic hash set that remembers insertion order.
@@ -546,7 +594,7 @@ template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} =
var h = s.first
while h >= 0:
var nxt = s.data[h].next
if s.data[h].slot == seFilled: yieldStmt
if isFilled(s.data[h].hcode): yieldStmt
h = nxt
iterator items*[A](s: OrderedSet[A]): A =
@@ -571,7 +619,10 @@ iterator items*[A](s: OrderedSet[A]): A =
forAllOrderedPairs:
yield s.data[h].key
proc rawGet[A](s: OrderedSet[A], key: A): int =
proc rawGetKnownHC[A](s: OrderedSet[A], key: A, hc: THash): int {.inline.} =
rawGetKnownHCImpl()
proc rawGet[A](s: OrderedSet[A], key: A, hc: var THash): int {.inline.} =
rawGetImpl()
proc contains*[A](s: OrderedSet[A], key: A): bool =
@@ -585,11 +636,12 @@ proc contains*[A](s: OrderedSet[A], key: A): bool =
## values.incl(2)
## assert values.contains(2)
assert s.isValid, "The set needs to be initialized."
var index = rawGet(s, key)
var hc: THash
var index = rawGet(s, key, hc)
result = index >= 0
proc rawInsert[A](s: var OrderedSet[A],
data: var OrderedKeyValuePairSeq[A], key: A) =
proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A],
key: A, hc: THash, h: THash) =
rawInsertImpl()
data[h].next = -1
if s.first < 0: s.first = h
@@ -602,12 +654,13 @@ proc enlarge[A](s: var OrderedSet[A]) =
var h = s.first
s.first = -1
s.last = -1
while h >= 0:
var nxt = s.data[h].next
if s.data[h].slot == seFilled:
rawInsert(s, n, s.data[h].key)
h = nxt
swap(s.data, n)
while h >= 0:
var nxt = n[h].next
if isFilled(n[h].hcode):
var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
rawInsert(s, s.data, n[h].key, n[h].hcode, j)
h = nxt
proc incl*[A](s: var OrderedSet[A], key: A) =
## Includes an element `key` in `s`.
@@ -726,7 +779,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
while h >= 0 and g >= 0:
var nxh = s.data[h].next
var nxg = t.data[g].next
if s.data[h].slot == seFilled and s.data[g].slot == seFilled:
if isFilled(s.data[h].hcode) and isFilled(s.data[g].hcode):
if s.data[h].key == s.data[g].key:
inc compared
else: