mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 17:34:43 +00:00
195 lines
6.3 KiB
Nim
195 lines
6.3 KiB
Nim
import hashes, math
|
|
|
|
type
|
|
TSlotEnum = enum seEmpty, seFilled, seDeleted
|
|
TKeyValuePair[A, B] = tuple[slot: TSlotEnum, key: A, val: B]
|
|
TKeyValuePairSeq[A, B] = seq[TKeyValuePair[A, B]]
|
|
|
|
TOrderedKeyValuePair[A, B] = tuple[
|
|
slot: TSlotEnum, next: int, key: A, val: B]
|
|
TOrderedKeyValuePairSeq[A, B] = seq[TOrderedKeyValuePair[A, B]]
|
|
TOrderedTable*[A, B] = object ## table that remembers insertion order
|
|
data: TOrderedKeyValuePairSeq[A, B]
|
|
counter, first, last: int
|
|
|
|
const
|
|
growthFactor = 2
|
|
|
|
proc mustRehash(length, counter: int): bool {.inline.} =
|
|
assert(length > counter)
|
|
result = (length * 2 < counter * 3) or (length - counter < 4)
|
|
|
|
proc nextTry(h, maxHash: Hash): Hash {.inline.} =
|
|
result = ((5 * h) + 1) and maxHash
|
|
|
|
template rawGetImpl() {.dirty.} =
|
|
var h: Hash = hash(key) and high(t.data) # start with real hash value
|
|
while t.data[h].slot != seEmpty:
|
|
if t.data[h].key == key and t.data[h].slot == seFilled:
|
|
return h
|
|
h = nextTry(h, high(t.data))
|
|
result = -1
|
|
|
|
template rawInsertImpl() {.dirty.} =
|
|
var h: Hash = hash(key) and high(data)
|
|
while data[h].slot == seFilled:
|
|
h = nextTry(h, high(data))
|
|
data[h].key = key
|
|
data[h].val = val
|
|
data[h].slot = seFilled
|
|
|
|
template addImpl() {.dirty.} =
|
|
if mustRehash(len(t.data), t.counter): enlarge(t)
|
|
rawInsert(t, t.data, key, val)
|
|
inc(t.counter)
|
|
|
|
template putImpl() {.dirty.} =
|
|
var index = rawGet(t, key)
|
|
if index >= 0:
|
|
t.data[index].val = val
|
|
else:
|
|
addImpl()
|
|
|
|
proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} =
|
|
## returns the number of keys in `t`.
|
|
result = t.counter
|
|
|
|
template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} =
|
|
var h = t.first
|
|
while h >= 0:
|
|
var nxt = t.data[h].next
|
|
if t.data[h].slot == seFilled: yieldStmt
|
|
h = nxt
|
|
|
|
iterator pairs*[A, B](t: TOrderedTable[A, B]): tuple[key: A, val: B] =
|
|
## iterates over any (key, value) pair in the table `t` in insertion
|
|
## order.
|
|
forAllOrderedPairs:
|
|
yield (t.data[h].key, t.data[h].val)
|
|
|
|
iterator mpairs*[A, B](t: var TOrderedTable[A, B]): tuple[key: A, val: var B] =
|
|
## iterates over any (key, value) pair in the table `t` in insertion
|
|
## order. The values can be modified.
|
|
forAllOrderedPairs:
|
|
yield (t.data[h].key, t.data[h].val)
|
|
|
|
iterator keys*[A, B](t: TOrderedTable[A, B]): A =
|
|
## iterates over any key in the table `t` in insertion order.
|
|
forAllOrderedPairs:
|
|
yield t.data[h].key
|
|
|
|
iterator values*[A, B](t: TOrderedTable[A, B]): B =
|
|
## iterates over any value in the table `t` in insertion order.
|
|
forAllOrderedPairs:
|
|
yield t.data[h].val
|
|
|
|
iterator mvalues*[A, B](t: var TOrderedTable[A, B]): var B =
|
|
## iterates over any value in the table `t` in insertion order. The values
|
|
## can be modified.
|
|
forAllOrderedPairs:
|
|
yield t.data[h].val
|
|
|
|
proc rawGet[A, B](t: TOrderedTable[A, B], key: A): int =
|
|
rawGetImpl()
|
|
|
|
proc `[]`*[A, B](t: TOrderedTable[A, B], key: A): B =
|
|
## retrieves the value at ``t[key]``. If `key` is not in `t`,
|
|
## default empty value for the type `B` is returned
|
|
## and no exception is raised. One can check with ``hasKey`` whether the key
|
|
## exists.
|
|
var index = rawGet(t, key)
|
|
if index >= 0: result = t.data[index].val
|
|
|
|
proc mget*[A, B](t: var TOrderedTable[A, B], key: A): var B =
|
|
## retrieves the value at ``t[key]``. The value can be modified.
|
|
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised.
|
|
var index = rawGet(t, key)
|
|
if index >= 0: result = t.data[index].val
|
|
else: raise newException(KeyError, "key not found: " & $key)
|
|
|
|
proc hasKey*[A, B](t: TOrderedTable[A, B], key: A): bool =
|
|
## returns true iff `key` is in the table `t`.
|
|
result = rawGet(t, key) >= 0
|
|
|
|
proc rawInsert[A, B](t: var TOrderedTable[A, B],
|
|
data: var TOrderedKeyValuePairSeq[A, B],
|
|
key: A, val: B) =
|
|
rawInsertImpl()
|
|
data[h].next = -1
|
|
if t.first < 0: t.first = h
|
|
if t.last >= 0: data[t.last].next = h
|
|
t.last = h
|
|
|
|
proc enlarge[A, B](t: var TOrderedTable[A, B]) =
|
|
var n: TOrderedKeyValuePairSeq[A, B]
|
|
newSeq(n, len(t.data) * growthFactor)
|
|
var h = t.first
|
|
t.first = -1
|
|
t.last = -1
|
|
while h >= 0:
|
|
var nxt = t.data[h].next
|
|
if t.data[h].slot == seFilled:
|
|
rawInsert(t, n, t.data[h].key, t.data[h].val)
|
|
h = nxt
|
|
swap(t.data, n)
|
|
|
|
proc `[]=`*[A, B](t: var TOrderedTable[A, B], key: A, val: B) =
|
|
## puts a (key, value)-pair into `t`.
|
|
putImpl()
|
|
|
|
proc add*[A, B](t: var TOrderedTable[A, B], key: A, val: B) =
|
|
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists.
|
|
addImpl()
|
|
|
|
proc initOrderedTable*[A, B](initialSize=64): TOrderedTable[A, B] =
|
|
## creates a new ordered hash table that is empty. `initialSize` needs to be
|
|
## a power of two.
|
|
assert isPowerOfTwo(initialSize)
|
|
result.counter = 0
|
|
result.first = -1
|
|
result.last = -1
|
|
newSeq(result.data, initialSize)
|
|
|
|
proc toOrderedTable*[A, B](pairs: openarray[tuple[key: A,
|
|
val: B]]): TOrderedTable[A, B] =
|
|
## creates a new ordered hash table that contains the given `pairs`.
|
|
result = initOrderedTable[A, B](nextPowerOfTwo(pairs.len+10))
|
|
for key, val in items(pairs): result[key] = val
|
|
|
|
proc sort*[A, B](t: var TOrderedTable[A,B],
|
|
cmp: proc (x, y: tuple[key: A, val: B]): int {.closure.}) =
|
|
## sorts the ordered table so that the entry with the highest counter comes
|
|
## first. This is destructive (with the advantage of being efficient)!
|
|
## You must not modify `t` afterwards!
|
|
## You can use the iterators `pairs`, `keys`, and `values` to iterate over
|
|
## `t` in the sorted order.
|
|
|
|
# we use shellsort here; fast enough and simple
|
|
var h = 1
|
|
while true:
|
|
h = 3 * h + 1
|
|
if h >= high(t.data): break
|
|
while true:
|
|
h = h div 3
|
|
for i in countup(h, high(t.data)):
|
|
var j = i
|
|
#echo(t.data.len, " ", j, " - ", h)
|
|
#echo(repr(t.data[j-h]))
|
|
proc rawCmp(x, y: TOrderedKeyValuePair[A, B]): int =
|
|
if x.slot in {seEmpty, seDeleted} and y.slot in {seEmpty, seDeleted}:
|
|
return 0
|
|
elif x.slot in {seEmpty, seDeleted}:
|
|
return -1
|
|
elif y.slot in {seEmpty, seDeleted}:
|
|
return 1
|
|
else:
|
|
let item1 = (x.key, x.val)
|
|
let item2 = (y.key, y.val)
|
|
return cmp(item1, item2)
|
|
|
|
while rawCmp(t.data[j-h], t.data[j]) <= 0:
|
|
swap(t.data[j], t.data[j-h])
|
|
j = j-h
|
|
if j < h: break
|
|
if h == 1: break
|