mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-09 06:23:25 +00:00
Add hcode,rightSize,rawGetKnownHC. Fix inf loop.
Make similar changes to those made in sets.nim, including hcode, rightSize
rawGet/rawGetKnownHC result protocol, nextTry probe sequence to be the cache
friendlier h=h+1 which in turn allows supporting changing deletion to fix the
infinite loop bug with local rehashing which in turn has desirable properties
of graceful table aging when deletes do happen and also making insert-only
usage patterns no longer pay any time/space cost to check deleted status.
Unlike collections.sets, this module has add() for duplicate key inserts and
a 3rd type of table, CountTable. The first wrinkle is handled by introducing
a rawGetDeep for unconditionally adding entries along collision chains. This
point of CountTable seems to be space efficiency at 2 items per slot. These
changes retain that by keeping the val==0 => EMPTY rule and not caching hash
codes. putImpl is expanded in-place for CountTable since the new putImpl() is
too different. { Depending on table size relative to caches & key expense,
regular Table[A,B] may become faster than CountTable, especially if the basic
count update could be something like inc(mGetOrPut(t, key, 0)). }
Unit tests pass, but in this module those are much more of just a demo than
probing for bugs. Should exercise/test this a little more before merging.
This commit is contained in:
@@ -71,8 +71,7 @@ import
|
|||||||
{.pragma: myShallow.}
|
{.pragma: myShallow.}
|
||||||
|
|
||||||
type
|
type
|
||||||
SlotEnum = enum seEmpty, seFilled, seDeleted
|
KeyValuePair[A, B] = tuple[hcode: THash, key: A, val: B]
|
||||||
KeyValuePair[A, B] = tuple[slot: SlotEnum, key: A, val: B]
|
|
||||||
KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]]
|
KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]]
|
||||||
Table* {.myShallow.}[A, B] = object ## generic hash table
|
Table* {.myShallow.}[A, B] = object ## generic hash table
|
||||||
data: KeyValuePairSeq[A, B]
|
data: KeyValuePairSeq[A, B]
|
||||||
@@ -84,6 +83,14 @@ type
|
|||||||
when not defined(nimhygiene):
|
when not defined(nimhygiene):
|
||||||
{.pragma: dirty.}
|
{.pragma: dirty.}
|
||||||
|
|
||||||
|
# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These
|
||||||
|
# two procs retain clarity of that encoding without the space cost of an enum.
|
||||||
|
proc isEmpty(hcode: THash): bool {.inline.} =
|
||||||
|
result = hcode == 0
|
||||||
|
|
||||||
|
proc isFilled(hcode: THash): bool {.inline.} =
|
||||||
|
result = hcode != 0
|
||||||
|
|
||||||
proc len*[A, B](t: Table[A, B]): int =
|
proc len*[A, B](t: Table[A, B]): int =
|
||||||
## returns the number of keys in `t`.
|
## returns the number of keys in `t`.
|
||||||
result = t.counter
|
result = t.counter
|
||||||
@@ -91,28 +98,28 @@ proc len*[A, B](t: Table[A, B]): int =
|
|||||||
iterator pairs*[A, B](t: Table[A, B]): tuple[key: A, val: B] =
|
iterator pairs*[A, B](t: Table[A, B]): tuple[key: A, val: B] =
|
||||||
## iterates over any (key, value) pair in the table `t`.
|
## iterates over any (key, value) pair in the table `t`.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
|
if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val)
|
||||||
|
|
||||||
iterator mpairs*[A, B](t: var Table[A, B]): tuple[key: A, val: var B] =
|
iterator mpairs*[A, B](t: var Table[A, B]): tuple[key: A, val: var B] =
|
||||||
## iterates over any (key, value) pair in the table `t`. The values
|
## iterates over any (key, value) pair in the table `t`. The values
|
||||||
## can be modified.
|
## can be modified.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
|
if isFilled(t.data[h].slot): yield (t.data[h].key, t.data[h].val)
|
||||||
|
|
||||||
iterator keys*[A, B](t: Table[A, B]): A =
|
iterator keys*[A, B](t: Table[A, B]): A =
|
||||||
## iterates over any key in the table `t`.
|
## iterates over any key in the table `t`.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield t.data[h].key
|
if isFilled(t.data[h].hcode): yield t.data[h].key
|
||||||
|
|
||||||
iterator values*[A, B](t: Table[A, B]): B =
|
iterator values*[A, B](t: Table[A, B]): B =
|
||||||
## iterates over any value in the table `t`.
|
## iterates over any value in the table `t`.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield t.data[h].val
|
if isFilled(t.data[h].hcode): yield t.data[h].val
|
||||||
|
|
||||||
iterator mvalues*[A, B](t: var Table[A, B]): var B =
|
iterator mvalues*[A, B](t: var Table[A, B]): var B =
|
||||||
## iterates over any value in the table `t`. The values can be modified.
|
## iterates over any value in the table `t`. The values can be modified.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield t.data[h].val
|
if isFilled(t.data[h].hcode): yield t.data[h].val
|
||||||
|
|
||||||
const
|
const
|
||||||
growthFactor = 2
|
growthFactor = 2
|
||||||
@@ -121,26 +128,57 @@ proc mustRehash(length, counter: int): bool {.inline.} =
|
|||||||
assert(length > counter)
|
assert(length > counter)
|
||||||
result = (length * 2 < counter * 3) or (length - counter < 4)
|
result = (length * 2 < counter * 3) or (length - counter < 4)
|
||||||
|
|
||||||
proc nextTry(h, maxHash: THash): THash {.inline.} =
|
proc rightSize*(count: int): int {.inline.} =
|
||||||
result = ((5 * h) + 1) and maxHash
|
## Return the value of `initialSize` to support `count` items.
|
||||||
|
##
|
||||||
|
## If more items are expected to be added, simply add that
|
||||||
|
## expected extra amount to the parameter before calling this.
|
||||||
|
##
|
||||||
|
## Internally, we want mustRehash(rightSize(x), x) == false.
|
||||||
|
result = nextPowerOfTwo(count * 3 div 2 + 4)
|
||||||
|
|
||||||
template rawGetImpl() {.dirty.} =
|
proc nextTry(h, maxHash: THash): THash {.inline.} =
|
||||||
var h: THash = hash(key) and high(t.data) # start with real hash value
|
result = (h + 1) and maxHash
|
||||||
while t.data[h].slot != seEmpty:
|
|
||||||
if t.data[h].key == key and t.data[h].slot == seFilled:
|
template rawGetKnownHCImpl() {.dirty.} =
|
||||||
|
var h: THash = hc and high(t.data) # start with real hash value
|
||||||
|
while isFilled(t.data[h].hcode):
|
||||||
|
# Compare hc THEN key with boolean short circuit. This makes the common case
|
||||||
|
# zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
|
||||||
|
# It does slow down succeeding lookups by one extra THash cmp&and..usually
|
||||||
|
# just a few clock cycles, generally worth it for any non-integer-like A.
|
||||||
|
if t.data[h].hcode == hc and t.data[h].key == key:
|
||||||
return h
|
return h
|
||||||
h = nextTry(h, high(t.data))
|
h = nextTry(h, high(t.data))
|
||||||
result = -1
|
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
|
||||||
|
|
||||||
|
template rawGetImpl() {.dirty.} =
|
||||||
|
hc = hash(key)
|
||||||
|
if hc == 0: # This almost never taken branch should be very predictable.
|
||||||
|
hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
|
||||||
|
rawGetKnownHCImpl()
|
||||||
|
|
||||||
|
template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add
|
||||||
|
hc = hash(key)
|
||||||
|
if hc == 0:
|
||||||
|
hc = 314159265
|
||||||
|
var h: THash = hc and high(t.data)
|
||||||
|
while isFilled(t.data[h].hcode):
|
||||||
|
h = nextTry(h, high(t.data))
|
||||||
|
result = h
|
||||||
|
|
||||||
template rawInsertImpl() {.dirty.} =
|
template rawInsertImpl() {.dirty.} =
|
||||||
var h: THash = hash(key) and high(data)
|
|
||||||
while data[h].slot == seFilled:
|
|
||||||
h = nextTry(h, high(data))
|
|
||||||
data[h].key = key
|
data[h].key = key
|
||||||
data[h].val = val
|
data[h].val = val
|
||||||
data[h].slot = seFilled
|
data[h].hcode = hc
|
||||||
|
|
||||||
proc rawGet[A, B](t: Table[A, B], key: A): int =
|
proc rawGetKnownHC[A, B](t: Table[A, B], key: A, hc: THash): int {.inline.} =
|
||||||
|
rawGetKnownHCImpl()
|
||||||
|
|
||||||
|
proc rawGetDeep[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} =
|
||||||
|
rawGetDeepImpl()
|
||||||
|
|
||||||
|
proc rawGet[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} =
|
||||||
rawGetImpl()
|
rawGetImpl()
|
||||||
|
|
||||||
proc `[]`*[A, B](t: Table[A, B], key: A): B =
|
proc `[]`*[A, B](t: Table[A, B], key: A): B =
|
||||||
@@ -148,50 +186,62 @@ proc `[]`*[A, B](t: Table[A, B], key: A): B =
|
|||||||
## default empty value for the type `B` is returned
|
## default empty value for the type `B` is returned
|
||||||
## and no exception is raised. One can check with ``hasKey`` whether the key
|
## and no exception is raised. One can check with ``hasKey`` whether the key
|
||||||
## exists.
|
## exists.
|
||||||
var index = rawGet(t, key)
|
var hc: THash
|
||||||
|
var index = rawGet(t, key, hc)
|
||||||
if index >= 0: result = t.data[index].val
|
if index >= 0: result = t.data[index].val
|
||||||
|
|
||||||
proc mget*[A, B](t: var Table[A, B], key: A): var B =
|
proc mget*[A, B](t: var Table[A, B], key: A): var B =
|
||||||
## retrieves the value at ``t[key]``. The value can be modified.
|
## retrieves the value at ``t[key]``. The value can be modified.
|
||||||
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised.
|
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised.
|
||||||
var index = rawGet(t, key)
|
var hc: THash
|
||||||
|
var index = rawGet(t, key, hc)
|
||||||
if index >= 0: result = t.data[index].val
|
if index >= 0: result = t.data[index].val
|
||||||
else: raise newException(KeyError, "key not found: " & $key)
|
else: raise newException(KeyError, "key not found: " & $key)
|
||||||
|
|
||||||
iterator allValues*[A, B](t: Table[A, B]; key: A): B =
|
iterator allValues*[A, B](t: Table[A, B]; key: A): B =
|
||||||
## iterates over any value in the table `t` that belongs to the given `key`.
|
## iterates over any value in the table `t` that belongs to the given `key`.
|
||||||
var h: THash = hash(key) and high(t.data)
|
var h: THash = hash(key) and high(t.data)
|
||||||
while t.data[h].slot != seEmpty:
|
while isFilled(t.data[h].hcode):
|
||||||
if t.data[h].key == key and t.data[h].slot == seFilled:
|
if t.data[h].key == key:
|
||||||
yield t.data[h].val
|
yield t.data[h].val
|
||||||
h = nextTry(h, high(t.data))
|
h = nextTry(h, high(t.data))
|
||||||
|
|
||||||
proc hasKey*[A, B](t: Table[A, B], key: A): bool =
|
proc hasKey*[A, B](t: Table[A, B], key: A): bool =
|
||||||
## returns true iff `key` is in the table `t`.
|
## returns true iff `key` is in the table `t`.
|
||||||
result = rawGet(t, key) >= 0
|
var hc: THash
|
||||||
|
result = rawGet(t, key, hc) >= 0
|
||||||
|
|
||||||
proc rawInsert[A, B](t: var Table[A, B], data: var KeyValuePairSeq[A, B],
|
proc rawInsert[A, B](t: var Table[A, B], data: var KeyValuePairSeq[A, B],
|
||||||
key: A, val: B) =
|
key: A, val: B, hc: THash, h: THash) =
|
||||||
rawInsertImpl()
|
rawInsertImpl()
|
||||||
|
|
||||||
proc enlarge[A, B](t: var Table[A, B]) =
|
proc enlarge[A, B](t: var Table[A, B]) =
|
||||||
var n: KeyValuePairSeq[A, B]
|
var n: KeyValuePairSeq[A, B]
|
||||||
newSeq(n, len(t.data) * growthFactor)
|
newSeq(n, len(t.data) * growthFactor)
|
||||||
for i in countup(0, high(t.data)):
|
|
||||||
if t.data[i].slot == seFilled: rawInsert(t, n, t.data[i].key, t.data[i].val)
|
|
||||||
swap(t.data, n)
|
swap(t.data, n)
|
||||||
|
for i in countup(0, high(n)):
|
||||||
|
if isFilled(n[i].hcode):
|
||||||
|
var j = -1 - rawGetKnownHC(t, n[i].key, n[i].hcode)
|
||||||
|
rawInsert(t, t.data, n[i].key, n[i].val, n[i].hcode, j)
|
||||||
|
|
||||||
template addImpl() {.dirty.} =
|
template addImpl() {.dirty.} =
|
||||||
if mustRehash(len(t.data), t.counter): enlarge(t)
|
if mustRehash(len(t.data), t.counter): enlarge(t)
|
||||||
rawInsert(t, t.data, key, val)
|
var hc: THash
|
||||||
|
var j = rawGetDeep(t, key, hc)
|
||||||
|
rawInsert(t, t.data, key, val, hc, j)
|
||||||
inc(t.counter)
|
inc(t.counter)
|
||||||
|
|
||||||
template putImpl() {.dirty.} =
|
template putImpl() {.dirty.} =
|
||||||
var index = rawGet(t, key)
|
var hc: THash
|
||||||
|
var index = rawGet(t, key, hc)
|
||||||
if index >= 0:
|
if index >= 0:
|
||||||
t.data[index].val = val
|
t.data[index].val = val
|
||||||
else:
|
else:
|
||||||
addImpl()
|
if mustRehash(len(t.data), t.counter):
|
||||||
|
enlarge(t)
|
||||||
|
index = rawGetKnownHC(t, key, hc)
|
||||||
|
rawInsert(t, t.data, key, val, hc, -1 - index)
|
||||||
|
inc(t.counter)
|
||||||
|
|
||||||
when false:
|
when false:
|
||||||
# not yet used:
|
# not yet used:
|
||||||
@@ -213,13 +263,30 @@ proc `[]=`*[A, B](t: var Table[A, B], key: A, val: B) =
|
|||||||
proc add*[A, B](t: var Table[A, B], key: A, val: B) =
|
proc add*[A, B](t: var Table[A, B], key: A, val: B) =
|
||||||
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
|
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
|
||||||
addImpl()
|
addImpl()
|
||||||
|
|
||||||
|
template doWhile(a: expr, b: stmt): stmt =
|
||||||
|
while true:
|
||||||
|
b
|
||||||
|
if not a: break
|
||||||
|
|
||||||
proc del*[A, B](t: var Table[A, B], key: A) =
|
proc del*[A, B](t: var Table[A, B], key: A) =
|
||||||
## deletes `key` from hash table `t`.
|
## deletes `key` from hash table `t`.
|
||||||
let index = rawGet(t, key)
|
var hc: THash
|
||||||
if index >= 0:
|
var i = rawGet(t, key, hc)
|
||||||
t.data[index].slot = seDeleted
|
let msk = high(t.data)
|
||||||
|
if i >= 0:
|
||||||
|
t.data[i].hcode = 0
|
||||||
dec(t.counter)
|
dec(t.counter)
|
||||||
|
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
|
||||||
|
var j = i # The correctness of this depends on (h+1) in nextTry,
|
||||||
|
var r = j # though may be adaptable to other simple sequences.
|
||||||
|
t.data[i].hcode = 0 # mark current EMPTY
|
||||||
|
doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
|
||||||
|
i = (i + 1) and msk # increment mod table size
|
||||||
|
if isEmpty(t.data[i].hcode): # end of collision cluster; So all done
|
||||||
|
return
|
||||||
|
r = t.data[i].hcode and msk # "home" location of key@i
|
||||||
|
t.data[j] = t.data[i] # data[j] will be marked EMPTY next loop
|
||||||
|
|
||||||
proc initTable*[A, B](initialSize=64): Table[A, B] =
|
proc initTable*[A, B](initialSize=64): Table[A, B] =
|
||||||
## creates a new hash table that is empty.
|
## creates a new hash table that is empty.
|
||||||
@@ -234,7 +301,7 @@ proc initTable*[A, B](initialSize=64): Table[A, B] =
|
|||||||
proc toTable*[A, B](pairs: openArray[tuple[key: A,
|
proc toTable*[A, B](pairs: openArray[tuple[key: A,
|
||||||
val: B]]): Table[A, B] =
|
val: B]]): Table[A, B] =
|
||||||
## creates a new hash table that contains the given `pairs`.
|
## creates a new hash table that contains the given `pairs`.
|
||||||
result = initTable[A, B](nextPowerOfTwo(pairs.len+10))
|
result = initTable[A, B](rightSize(pairs.len))
|
||||||
for key, val in items(pairs): result[key] = val
|
for key, val in items(pairs): result[key] = val
|
||||||
|
|
||||||
template dollarImpl(): stmt {.dirty.} =
|
template dollarImpl(): stmt {.dirty.} =
|
||||||
@@ -252,7 +319,7 @@ template dollarImpl(): stmt {.dirty.} =
|
|||||||
proc `$`*[A, B](t: Table[A, B]): string =
|
proc `$`*[A, B](t: Table[A, B]): string =
|
||||||
## The `$` operator for hash tables.
|
## The `$` operator for hash tables.
|
||||||
dollarImpl()
|
dollarImpl()
|
||||||
|
|
||||||
template equalsImpl() =
|
template equalsImpl() =
|
||||||
if s.counter == t.counter:
|
if s.counter == t.counter:
|
||||||
# different insertion orders mean different 'data' seqs, so we have
|
# different insertion orders mean different 'data' seqs, so we have
|
||||||
@@ -262,10 +329,10 @@ template equalsImpl() =
|
|||||||
if not t.hasKey(key): return false
|
if not t.hasKey(key): return false
|
||||||
if t[key] != val: return false
|
if t[key] != val: return false
|
||||||
return true
|
return true
|
||||||
|
|
||||||
proc `==`*[A, B](s, t: Table[A, B]): bool =
|
proc `==`*[A, B](s, t: Table[A, B]): bool =
|
||||||
equalsImpl()
|
equalsImpl()
|
||||||
|
|
||||||
proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] =
|
proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] =
|
||||||
## Index the collection with the proc provided.
|
## Index the collection with the proc provided.
|
||||||
# TODO: As soon as supported, change collection: A to collection: A[B]
|
# TODO: As soon as supported, change collection: A to collection: A[B]
|
||||||
@@ -280,28 +347,28 @@ proc len*[A, B](t: TableRef[A, B]): int =
|
|||||||
iterator pairs*[A, B](t: TableRef[A, B]): tuple[key: A, val: B] =
|
iterator pairs*[A, B](t: TableRef[A, B]): tuple[key: A, val: B] =
|
||||||
## iterates over any (key, value) pair in the table `t`.
|
## iterates over any (key, value) pair in the table `t`.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
|
if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val)
|
||||||
|
|
||||||
iterator mpairs*[A, B](t: TableRef[A, B]): tuple[key: A, val: var B] =
|
iterator mpairs*[A, B](t: TableRef[A, B]): tuple[key: A, val: var B] =
|
||||||
## iterates over any (key, value) pair in the table `t`. The values
|
## iterates over any (key, value) pair in the table `t`. The values
|
||||||
## can be modified.
|
## can be modified.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val)
|
if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val)
|
||||||
|
|
||||||
iterator keys*[A, B](t: TableRef[A, B]): A =
|
iterator keys*[A, B](t: TableRef[A, B]): A =
|
||||||
## iterates over any key in the table `t`.
|
## iterates over any key in the table `t`.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield t.data[h].key
|
if isFilled(t.data[h].hcode): yield t.data[h].key
|
||||||
|
|
||||||
iterator values*[A, B](t: TableRef[A, B]): B =
|
iterator values*[A, B](t: TableRef[A, B]): B =
|
||||||
## iterates over any value in the table `t`.
|
## iterates over any value in the table `t`.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield t.data[h].val
|
if isFilled(t.data[h].hcode): yield t.data[h].val
|
||||||
|
|
||||||
iterator mvalues*[A, B](t: TableRef[A, B]): var B =
|
iterator mvalues*[A, B](t: TableRef[A, B]): var B =
|
||||||
## iterates over any value in the table `t`. The values can be modified.
|
## iterates over any value in the table `t`. The values can be modified.
|
||||||
for h in 0..high(t.data):
|
for h in 0..high(t.data):
|
||||||
if t.data[h].slot == seFilled: yield t.data[h].val
|
if isFilled(t.data[h].hcode): yield t.data[h].val
|
||||||
|
|
||||||
proc `[]`*[A, B](t: TableRef[A, B], key: A): B =
|
proc `[]`*[A, B](t: TableRef[A, B], key: A): B =
|
||||||
## retrieves the value at ``t[key]``. If `key` is not in `t`,
|
## retrieves the value at ``t[key]``. If `key` is not in `t`,
|
||||||
@@ -326,7 +393,7 @@ proc `[]=`*[A, B](t: TableRef[A, B], key: A, val: B) =
|
|||||||
proc add*[A, B](t: TableRef[A, B], key: A, val: B) =
|
proc add*[A, B](t: TableRef[A, B], key: A, val: B) =
|
||||||
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
|
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
|
||||||
t[].add(key, val)
|
t[].add(key, val)
|
||||||
|
|
||||||
proc del*[A, B](t: TableRef[A, B], key: A) =
|
proc del*[A, B](t: TableRef[A, B], key: A) =
|
||||||
## deletes `key` from hash table `t`.
|
## deletes `key` from hash table `t`.
|
||||||
t[].del(key)
|
t[].del(key)
|
||||||
@@ -360,7 +427,7 @@ proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): TableRef[C, B]
|
|||||||
|
|
||||||
type
|
type
|
||||||
OrderedKeyValuePair[A, B] = tuple[
|
OrderedKeyValuePair[A, B] = tuple[
|
||||||
slot: SlotEnum, next: int, key: A, val: B]
|
hcode: THash, next: int, key: A, val: B]
|
||||||
OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]]
|
OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]]
|
||||||
OrderedTable* {.
|
OrderedTable* {.
|
||||||
myShallow.}[A, B] = object ## table that remembers insertion order
|
myShallow.}[A, B] = object ## table that remembers insertion order
|
||||||
@@ -378,7 +445,7 @@ template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} =
|
|||||||
var h = t.first
|
var h = t.first
|
||||||
while h >= 0:
|
while h >= 0:
|
||||||
var nxt = t.data[h].next
|
var nxt = t.data[h].next
|
||||||
if t.data[h].slot == seFilled: yieldStmt
|
if isFilled(t.data[h].hcode): yieldStmt
|
||||||
h = nxt
|
h = nxt
|
||||||
|
|
||||||
iterator pairs*[A, B](t: OrderedTable[A, B]): tuple[key: A, val: B] =
|
iterator pairs*[A, B](t: OrderedTable[A, B]): tuple[key: A, val: B] =
|
||||||
@@ -409,7 +476,13 @@ iterator mvalues*[A, B](t: var OrderedTable[A, B]): var B =
|
|||||||
forAllOrderedPairs:
|
forAllOrderedPairs:
|
||||||
yield t.data[h].val
|
yield t.data[h].val
|
||||||
|
|
||||||
proc rawGet[A, B](t: OrderedTable[A, B], key: A): int =
|
proc rawGetKnownHC[A, B](t: OrderedTable[A, B], key: A, hc: THash): int =
|
||||||
|
rawGetKnownHCImpl()
|
||||||
|
|
||||||
|
proc rawGetDeep[A, B](t: OrderedTable[A, B], key: A, hc: var THash): int {.inline.} =
|
||||||
|
rawGetDeepImpl()
|
||||||
|
|
||||||
|
proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var THash): int =
|
||||||
rawGetImpl()
|
rawGetImpl()
|
||||||
|
|
||||||
proc `[]`*[A, B](t: OrderedTable[A, B], key: A): B =
|
proc `[]`*[A, B](t: OrderedTable[A, B], key: A): B =
|
||||||
@@ -433,7 +506,7 @@ proc hasKey*[A, B](t: OrderedTable[A, B], key: A): bool =
|
|||||||
|
|
||||||
proc rawInsert[A, B](t: var OrderedTable[A, B],
|
proc rawInsert[A, B](t: var OrderedTable[A, B],
|
||||||
data: var OrderedKeyValuePairSeq[A, B],
|
data: var OrderedKeyValuePairSeq[A, B],
|
||||||
key: A, val: B) =
|
key: A, val: B, hc: THash, h: THash) =
|
||||||
rawInsertImpl()
|
rawInsertImpl()
|
||||||
data[h].next = -1
|
data[h].next = -1
|
||||||
if t.first < 0: t.first = h
|
if t.first < 0: t.first = h
|
||||||
@@ -446,12 +519,13 @@ proc enlarge[A, B](t: var OrderedTable[A, B]) =
|
|||||||
var h = t.first
|
var h = t.first
|
||||||
t.first = -1
|
t.first = -1
|
||||||
t.last = -1
|
t.last = -1
|
||||||
while h >= 0:
|
|
||||||
var nxt = t.data[h].next
|
|
||||||
if t.data[h].slot == seFilled:
|
|
||||||
rawInsert(t, n, t.data[h].key, t.data[h].val)
|
|
||||||
h = nxt
|
|
||||||
swap(t.data, n)
|
swap(t.data, n)
|
||||||
|
while h >= 0:
|
||||||
|
var nxt = n[h].next
|
||||||
|
if isFilled(n[h].hcode):
|
||||||
|
var j = -1 - rawGetKnownHC(t, n[h].key, n[h].hcode)
|
||||||
|
rawInsert(t, t.data, n[h].key, n[h].val, n[h].hcode, j)
|
||||||
|
h = nxt
|
||||||
|
|
||||||
proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: B) =
|
proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: B) =
|
||||||
## puts a (key, value)-pair into `t`.
|
## puts a (key, value)-pair into `t`.
|
||||||
@@ -476,7 +550,7 @@ proc initOrderedTable*[A, B](initialSize=64): OrderedTable[A, B] =
|
|||||||
proc toOrderedTable*[A, B](pairs: openArray[tuple[key: A,
|
proc toOrderedTable*[A, B](pairs: openArray[tuple[key: A,
|
||||||
val: B]]): OrderedTable[A, B] =
|
val: B]]): OrderedTable[A, B] =
|
||||||
## creates a new ordered hash table that contains the given `pairs`.
|
## creates a new ordered hash table that contains the given `pairs`.
|
||||||
result = initOrderedTable[A, B](nextPowerOfTwo(pairs.len+10))
|
result = initOrderedTable[A, B](rightSize(pairs.len))
|
||||||
for key, val in items(pairs): result[key] = val
|
for key, val in items(pairs): result[key] = val
|
||||||
|
|
||||||
proc `$`*[A, B](t: OrderedTable[A, B]): string =
|
proc `$`*[A, B](t: OrderedTable[A, B]): string =
|
||||||
@@ -537,7 +611,7 @@ template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} =
|
|||||||
var h = t.first
|
var h = t.first
|
||||||
while h >= 0:
|
while h >= 0:
|
||||||
var nxt = t.data[h].next
|
var nxt = t.data[h].next
|
||||||
if t.data[h].slot == seFilled: yieldStmt
|
if isFilled(t.data[h].hcode): yieldStmt
|
||||||
h = nxt
|
h = nxt
|
||||||
|
|
||||||
iterator pairs*[A, B](t: OrderedTableRef[A, B]): tuple[key: A, val: B] =
|
iterator pairs*[A, B](t: OrderedTableRef[A, B]): tuple[key: A, val: B] =
|
||||||
@@ -604,7 +678,7 @@ proc newOrderedTable*[A, B](initialSize=64): OrderedTableRef[A, B] =
|
|||||||
proc newOrderedTable*[A, B](pairs: openArray[tuple[key: A,
|
proc newOrderedTable*[A, B](pairs: openArray[tuple[key: A,
|
||||||
val: B]]): OrderedTableRef[A, B] =
|
val: B]]): OrderedTableRef[A, B] =
|
||||||
## creates a new ordered hash table that contains the given `pairs`.
|
## creates a new ordered hash table that contains the given `pairs`.
|
||||||
result = newOrderedTable[A, B](nextPowerOfTwo(pairs.len+10))
|
result = newOrderedTable[A, B](rightSize(pairs.len))
|
||||||
for key, val in items(pairs): result[key] = val
|
for key, val in items(pairs): result[key] = val
|
||||||
|
|
||||||
proc `$`*[A, B](t: OrderedTableRef[A, B]): string =
|
proc `$`*[A, B](t: OrderedTableRef[A, B]): string =
|
||||||
@@ -665,7 +739,7 @@ proc rawGet[A](t: CountTable[A], key: A): int =
|
|||||||
while t.data[h].val != 0:
|
while t.data[h].val != 0:
|
||||||
if t.data[h].key == key: return h
|
if t.data[h].key == key: return h
|
||||||
h = nextTry(h, high(t.data))
|
h = nextTry(h, high(t.data))
|
||||||
result = -1
|
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
|
||||||
|
|
||||||
proc `[]`*[A](t: CountTable[A], key: A): int =
|
proc `[]`*[A](t: CountTable[A], key: A): int =
|
||||||
## retrieves the value at ``t[key]``. If `key` is not in `t`,
|
## retrieves the value at ``t[key]``. If `key` is not in `t`,
|
||||||
@@ -702,21 +776,27 @@ proc enlarge[A](t: var CountTable[A]) =
|
|||||||
proc `[]=`*[A](t: var CountTable[A], key: A, val: int) =
|
proc `[]=`*[A](t: var CountTable[A], key: A, val: int) =
|
||||||
## puts a (key, value)-pair into `t`. `val` has to be positive.
|
## puts a (key, value)-pair into `t`. `val` has to be positive.
|
||||||
assert val > 0
|
assert val > 0
|
||||||
putImpl()
|
var h = rawGet(t, key)
|
||||||
|
if h >= 0:
|
||||||
|
t.data[h].val = val
|
||||||
|
else:
|
||||||
|
h = -1 - h
|
||||||
|
t.data[h].key = key
|
||||||
|
t.data[h].val = val
|
||||||
|
|
||||||
proc initCountTable*[A](initialSize=64): CountTable[A] =
|
proc initCountTable*[A](initialSize=64): CountTable[A] =
|
||||||
## creates a new count table that is empty.
|
## creates a new count table that is empty.
|
||||||
##
|
##
|
||||||
## `initialSize` needs to be a power of two. If you need to accept runtime
|
## `initialSize` needs to be a power of two. If you need to accept runtime
|
||||||
## values for this you could use the ``nextPowerOfTwo`` proc from the
|
## values for this you could use the ``nextPowerOfTwo`` proc from the
|
||||||
## `math <math.html>`_ module.
|
## `math <math.html>`_ module or the ``rightSize`` method in this module.
|
||||||
assert isPowerOfTwo(initialSize)
|
assert isPowerOfTwo(initialSize)
|
||||||
result.counter = 0
|
result.counter = 0
|
||||||
newSeq(result.data, initialSize)
|
newSeq(result.data, initialSize)
|
||||||
|
|
||||||
proc toCountTable*[A](keys: openArray[A]): CountTable[A] =
|
proc toCountTable*[A](keys: openArray[A]): CountTable[A] =
|
||||||
## creates a new count table with every key in `keys` having a count of 1.
|
## creates a new count table with every key in `keys` having a count of 1.
|
||||||
result = initCountTable[A](nextPowerOfTwo(keys.len+10))
|
result = initCountTable[A](rightSize(keys.len))
|
||||||
for key in items(keys): result[key] = 1
|
for key in items(keys): result[key] = 1
|
||||||
|
|
||||||
proc `$`*[A](t: CountTable[A]): string =
|
proc `$`*[A](t: CountTable[A]): string =
|
||||||
@@ -827,13 +907,13 @@ proc newCountTable*[A](initialSize=64): CountTableRef[A] =
|
|||||||
##
|
##
|
||||||
## `initialSize` needs to be a power of two. If you need to accept runtime
|
## `initialSize` needs to be a power of two. If you need to accept runtime
|
||||||
## values for this you could use the ``nextPowerOfTwo`` proc from the
|
## values for this you could use the ``nextPowerOfTwo`` proc from the
|
||||||
## `math <math.html>`_ module.
|
## `math <math.html>`_ module or the ``rightSize`` method in this module.
|
||||||
new(result)
|
new(result)
|
||||||
result[] = initCountTable[A](initialSize)
|
result[] = initCountTable[A](initialSize)
|
||||||
|
|
||||||
proc newCountTable*[A](keys: openArray[A]): CountTableRef[A] =
|
proc newCountTable*[A](keys: openArray[A]): CountTableRef[A] =
|
||||||
## creates a new count table with every key in `keys` having a count of 1.
|
## creates a new count table with every key in `keys` having a count of 1.
|
||||||
result = newCountTable[A](nextPowerOfTwo(keys.len+10))
|
result = newCountTable[A](rightSize(keys.len))
|
||||||
for key in items(keys): result[key] = 1
|
for key in items(keys): result[key] = 1
|
||||||
|
|
||||||
proc `$`*[A](t: CountTableRef[A]): string =
|
proc `$`*[A](t: CountTableRef[A]): string =
|
||||||
|
|||||||
Reference in New Issue
Block a user