mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-19 22:10:33 +00:00
Unwind just the "pseudorandom probing" part of recent sets,tables changes (#13816)
* Unwind just the "pseudorandom probing" (whole hash-code-keyed variable stride double hashing) part of recent sets & tables changes (which has still been causing bugs over a month later (e.g., two days ago https://github.com/nim-lang/Nim/issues/13794) as well as still having several "figure this out" implementation question comments in them (see just diffs of this PR). This topic has been discussed in many places: https://github.com/nim-lang/Nim/issues/13393 https://github.com/nim-lang/Nim/pull/13418 https://github.com/nim-lang/Nim/pull/13440 https://github.com/nim-lang/Nim/issues/13794 Alternative/non-mandatory stronger integer hashes (or vice-versa opt-in identity hashes) are a better solution that is more general (no illusion of one hard-coded sequence solving all problems) while retaining the virtues of linear probing such as cache obliviousness and age-less tables under delete-heavy workloads (still untested after a month of this change). The only real solution for truly adversarial keys is a hash keyed off of data unobservable to attackers. That all fits better with a few families of user-pluggable/define-switchable hashes which can be provided in a separate PR more about `hashes.nim`. This PR carefully preserves the better (but still hard coded!) probing of the `intsets` and other recent fixes like `move` annotations, hash order invariant tests, `intsets.missingOrExcl` fixing, and the move of `rightSize` into `hashcommon.nim`. * Fix `data.len` -> `dataLen` problem.
This commit is contained in:
@@ -18,87 +18,43 @@ when not defined(nimHasDefault):
|
||||
var v: T
|
||||
v
|
||||
|
||||
const freeMarker = 0
|
||||
const deletedMarker = -1
|
||||
|
||||
type UHash = uint
|
||||
|
||||
# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These
|
||||
# two procs retain clarity of that encoding without the space cost of an enum.
|
||||
proc isFilledAndValid(hcode: Hash): bool {.inline.} =
|
||||
result = hcode != 0 and hcode != deletedMarker
|
||||
# performance: we could use bit magic if needed
|
||||
proc isEmpty(hcode: Hash): bool {.inline.} =
|
||||
result = hcode == 0
|
||||
|
||||
proc isFilled(hcode: Hash): bool {.inline.} =
|
||||
result = hcode != 0
|
||||
|
||||
|
||||
proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
|
||||
result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
|
||||
|
||||
proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
|
||||
# FACTOR between hashcommon.nextTry, intsets.nextTry
|
||||
# an optimization would be to use `(h + 1) and maxHash` for a few iterations
|
||||
# and then switch to the formula below, to get "best of both worlds": good
|
||||
# cache locality, except when a collision cluster is detected (ie, large number
|
||||
# of iterations).
|
||||
const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)`
|
||||
result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
|
||||
perturb = perturb shr PERTURB_SHIFT
|
||||
proc nextTry(h, maxHash: Hash): Hash {.inline.} =
|
||||
result = (h + 1) and maxHash
|
||||
|
||||
proc mustRehash[T](t: T): bool {.inline.} =
|
||||
# FACTOR between hashcommon.mustRehash, intsets.mustRehash
|
||||
let counter2 = t.counter + t.countDeleted
|
||||
let length = t.dataLen
|
||||
assert(length > counter2)
|
||||
result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize`
|
||||
assert(t.dataLen > t.counter)
|
||||
result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4)
|
||||
|
||||
proc rightSize*(count: Natural): int {.inline.} =
|
||||
## Return the value of `initialSize` to support `count` items.
|
||||
## Return the value of ``initialSize`` to support ``count`` items.
|
||||
##
|
||||
## If more items are expected to be added, simply add that
|
||||
## expected extra amount to the parameter before calling this.
|
||||
##
|
||||
## Internally, we want `mustRehash(t) == false` for t that was just resized.
|
||||
#
|
||||
# Make sure to synchronize with `mustRehash`
|
||||
result = nextPowerOfTwo(count * 3 div 2 + 4)
|
||||
|
||||
template getPerturb(t: typed, hc: Hash): UHash =
|
||||
# we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
|
||||
# so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
|
||||
let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
|
||||
# this makes a major difference for cases like #13393; it causes the bits
|
||||
# that were masked out in 1st position so they'll be masked in instead, and
|
||||
# influence the recursion in nextTry earlier rather than later.
|
||||
translateBits(cast[uint](hc), numBitsMask)
|
||||
|
||||
template rawGetKnownHCImpl() {.dirty.} =
|
||||
if t.dataLen == 0:
|
||||
return -1
|
||||
var h: Hash = hc and maxHash(t) # start with real hash value
|
||||
var perturb = t.getPerturb(hc)
|
||||
var deletedIndex = -1
|
||||
while true:
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
# Compare hc THEN key with boolean short circuit. This makes the common case
|
||||
# zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
|
||||
# It does slow down succeeding lookups by one extra Hash cmp&and..usually
|
||||
# just a few clock cycles, generally worth it for any non-integer-like A.
|
||||
# performance: we optimize this: depending on type(key), skip hc comparison
|
||||
if t.data[h].hcode == hc and t.data[h].key == key:
|
||||
return h
|
||||
h = nextTry(h, maxHash(t), perturb)
|
||||
elif t.data[h].hcode == deletedMarker:
|
||||
if deletedIndex == -1:
|
||||
deletedIndex = h
|
||||
h = nextTry(h, maxHash(t), perturb)
|
||||
else:
|
||||
break
|
||||
if deletedIndex == -1:
|
||||
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
|
||||
else:
|
||||
# we prefer returning a (in fact the 1st found) deleted index
|
||||
result = -1 - deletedIndex
|
||||
while isFilled(t.data[h].hcode):
|
||||
# Compare hc THEN key with boolean short circuit. This makes the common case
|
||||
# zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
|
||||
# It does slow down succeeding lookups by one extra Hash cmp&and..usually
|
||||
# just a few clock cycles, generally worth it for any non-integer-like A.
|
||||
if t.data[h].hcode == hc and t.data[h].key == key:
|
||||
return h
|
||||
h = nextTry(h, maxHash(t))
|
||||
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
|
||||
|
||||
proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
|
||||
rawGetKnownHCImpl()
|
||||
@@ -107,8 +63,6 @@ template genHashImpl(key, hc: typed) =
|
||||
hc = hash(key)
|
||||
if hc == 0: # This almost never taken branch should be very predictable.
|
||||
hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
|
||||
elif hc == deletedMarker:
|
||||
hc = 214159261
|
||||
|
||||
template genHash(key: typed): Hash =
|
||||
var res: Hash
|
||||
|
||||
@@ -46,20 +46,16 @@ type
|
||||
IntSet* = object ## An efficient set of `int` implemented as a sparse bit set.
|
||||
elems: int # only valid for small numbers
|
||||
counter, max: int
|
||||
countDeleted: int
|
||||
head: PTrunk
|
||||
data: TrunkSeq
|
||||
a: array[0..33, int] # profiling shows that 34 elements are enough
|
||||
|
||||
proc mustRehash[T](t: T): bool {.inline.} =
|
||||
# FACTOR between hashcommon.mustRehash, intsets.mustRehash
|
||||
let counter2 = t.counter + t.countDeleted
|
||||
let length = t.max + 1
|
||||
assert length > counter2
|
||||
result = (length * 2 < counter2 * 3) or (length - counter2 < 4)
|
||||
assert length > t.counter
|
||||
result = (length * 2 < t.counter * 3) or (length - t.counter < 4)
|
||||
|
||||
proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
|
||||
# FACTOR between hashcommon.nextTry, intsets.nextTry
|
||||
const PERTURB_SHIFT = 5
|
||||
var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
|
||||
perturb = cast[Hash](perturb2)
|
||||
|
||||
@@ -35,11 +35,10 @@ proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,
|
||||
|
||||
proc enlarge[A](s: var HashSet[A]) =
|
||||
var n: KeyValuePairSeq[A]
|
||||
newSeq(n, s.counter.rightSize)
|
||||
newSeq(n, len(s.data) * growthFactor)
|
||||
swap(s.data, n) # n is now old seq
|
||||
s.countDeleted = 0
|
||||
for i in countup(0, high(n)):
|
||||
if isFilledAndValid(n[i].hcode):
|
||||
if isFilled(n[i].hcode):
|
||||
var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
|
||||
rawInsert(s, s.data, n[i].key, n[i].hcode, j)
|
||||
|
||||
@@ -69,6 +68,11 @@ template containsOrInclImpl() {.dirty.} =
|
||||
rawInsert(s, s.data, key, hc, -1 - index)
|
||||
inc(s.counter)
|
||||
|
||||
template doWhile(a, b) =
|
||||
while true:
|
||||
b
|
||||
if not a: break
|
||||
|
||||
proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
|
||||
var hc: Hash
|
||||
var i = rawGet(s, key, hc)
|
||||
@@ -78,9 +82,17 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
|
||||
if i >= 0:
|
||||
result = false
|
||||
dec(s.counter)
|
||||
inc(s.countDeleted)
|
||||
s.data[i].hcode = deletedMarker
|
||||
s.data[i].key = default(type(s.data[i].key))
|
||||
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
|
||||
var j = i # The correctness of this depends on (h+1) in nextTry,
|
||||
var r = j # though may be adaptable to other simple sequences.
|
||||
s.data[i].hcode = 0 # mark current EMPTY
|
||||
s.data[i].key = default(type(s.data[i].key))
|
||||
doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
|
||||
i = (i + 1) and msk # increment mod table size
|
||||
if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
|
||||
return
|
||||
r = s.data[i].hcode and msk # "home" location of key@i
|
||||
s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop
|
||||
|
||||
template dollarImpl() {.dirty.} =
|
||||
result = "{"
|
||||
@@ -113,7 +125,7 @@ proc enlarge[A](s: var OrderedSet[A]) =
|
||||
swap(s.data, n)
|
||||
while h >= 0:
|
||||
var nxt = n[h].next
|
||||
if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
|
||||
if isFilled(n[h].hcode):
|
||||
var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
|
||||
rawInsert(s, s.data, n[h].key, n[h].hcode, j)
|
||||
h = nxt
|
||||
@@ -131,7 +143,7 @@ proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} =
|
||||
result = true
|
||||
while h >= 0:
|
||||
var nxt = n[h].next
|
||||
if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
|
||||
if isFilled(n[h].hcode):
|
||||
if n[h].hcode == hc and n[h].key == key:
|
||||
dec s.counter
|
||||
result = false
|
||||
|
||||
@@ -68,7 +68,6 @@ type
|
||||
## before calling other procs on it.
|
||||
data: KeyValuePairSeq[A]
|
||||
counter: int
|
||||
countDeleted: int
|
||||
|
||||
type
|
||||
OrderedKeyValuePair[A] = tuple[
|
||||
@@ -81,7 +80,6 @@ type
|
||||
## <#initOrderedSet,int>`_ before calling other procs on it.
|
||||
data: OrderedKeyValuePairSeq[A]
|
||||
counter, first, last: int
|
||||
countDeleted: int
|
||||
|
||||
const
|
||||
defaultInitialSize* = 64
|
||||
@@ -249,7 +247,7 @@ iterator items*[A](s: HashSet[A]): A =
|
||||
## echo b
|
||||
## # --> {(a: 1, b: 3), (a: 0, b: 4)}
|
||||
for h in 0 .. high(s.data):
|
||||
if isFilledAndValid(s.data[h].hcode): yield s.data[h].key
|
||||
if isFilled(s.data[h].hcode): yield s.data[h].key
|
||||
|
||||
proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
|
||||
## Includes `key` in the set `s` and tells if `key` was already in `s`.
|
||||
@@ -341,7 +339,7 @@ proc pop*[A](s: var HashSet[A]): A =
|
||||
doAssertRaises(KeyError, echo s.pop)
|
||||
|
||||
for h in 0 .. high(s.data):
|
||||
if isFilledAndValid(s.data[h].hcode):
|
||||
if isFilled(s.data[h].hcode):
|
||||
result = s.data[h].key
|
||||
excl(s, result)
|
||||
return result
|
||||
@@ -574,8 +572,7 @@ proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
|
||||
proc hash*[A](s: HashSet[A]): Hash =
|
||||
## Hashing of HashSet.
|
||||
for h in 0 .. high(s.data):
|
||||
if isFilledAndValid(s.data[h].hcode):
|
||||
result = result xor s.data[h].hcode
|
||||
result = result xor s.data[h].hcode
|
||||
result = !$result
|
||||
|
||||
proc `$`*[A](s: HashSet[A]): string =
|
||||
@@ -593,6 +590,7 @@ proc `$`*[A](s: HashSet[A]): string =
|
||||
## # --> {no, esc'aping, is " provided}
|
||||
dollarImpl()
|
||||
|
||||
|
||||
proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated:
|
||||
"Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize)
|
||||
|
||||
@@ -624,7 +622,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
|
||||
var idx = 0
|
||||
while h >= 0:
|
||||
var nxt = s.data[h].next
|
||||
if isFilledAndValid(s.data[h].hcode):
|
||||
if isFilled(s.data[h].hcode):
|
||||
yieldStmt
|
||||
inc(idx)
|
||||
h = nxt
|
||||
@@ -858,7 +856,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
|
||||
while h >= 0 and g >= 0:
|
||||
var nxh = s.data[h].next
|
||||
var nxg = t.data[g].next
|
||||
if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode):
|
||||
if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
|
||||
if s.data[h].key == t.data[g].key:
|
||||
inc compared
|
||||
else:
|
||||
|
||||
@@ -25,7 +25,6 @@ type
|
||||
SharedTable*[A, B] = object ## generic hash SharedTable
|
||||
data: KeyValuePairSeq[A, B]
|
||||
counter, dataLen: int
|
||||
countDeleted: int
|
||||
lock: Lock
|
||||
|
||||
template maxHash(t): untyped = t.dataLen-1
|
||||
@@ -50,10 +49,9 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
|
||||
for i in 0..<oldSize:
|
||||
let eh = n[i].hcode
|
||||
if isFilled(eh):
|
||||
var perturb = t.getPerturb(eh)
|
||||
var j: Hash = eh and maxHash(t)
|
||||
while isFilled(t.data[j].hcode):
|
||||
j = nextTry(j, maxHash(t), perturb)
|
||||
j = nextTry(j, maxHash(t))
|
||||
rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
|
||||
deallocShared(n)
|
||||
|
||||
|
||||
@@ -14,20 +14,13 @@ include hashcommon
|
||||
template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add
|
||||
genHashImpl(key, hc)
|
||||
var h: Hash = hc and maxHash(t)
|
||||
var perturb = t.getPerturb(hc)
|
||||
while true:
|
||||
let hcode = t.data[h].hcode
|
||||
if hcode == deletedMarker or hcode == freeMarker:
|
||||
break
|
||||
else:
|
||||
h = nextTry(h, maxHash(t), perturb)
|
||||
while isFilled(t.data[h].hcode):
|
||||
h = nextTry(h, maxHash(t))
|
||||
result = h
|
||||
|
||||
template rawInsertImpl(t) {.dirty.} =
|
||||
template rawInsertImpl() {.dirty.} =
|
||||
data[h].key = key
|
||||
data[h].val = val
|
||||
if data[h].hcode == deletedMarker:
|
||||
t.countDeleted.dec
|
||||
data[h].hcode = hc
|
||||
|
||||
proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
|
||||
@@ -35,7 +28,7 @@ proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
|
||||
|
||||
proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
|
||||
key: A, val: B, hc: Hash, h: Hash) =
|
||||
rawInsertImpl(t)
|
||||
rawInsertImpl()
|
||||
|
||||
template checkIfInitialized() =
|
||||
when compiles(defaultInitialSize):
|
||||
@@ -51,6 +44,7 @@ template addImpl(enlarge) {.dirty.} =
|
||||
inc(t.counter)
|
||||
|
||||
template maybeRehashPutImpl(enlarge) {.dirty.} =
|
||||
checkIfInitialized()
|
||||
if mustRehash(t):
|
||||
enlarge(t)
|
||||
index = rawGetKnownHC(t, key, hc)
|
||||
@@ -88,11 +82,24 @@ template delImplIdx(t, i) =
|
||||
let msk = maxHash(t)
|
||||
if i >= 0:
|
||||
dec(t.counter)
|
||||
inc(t.countDeleted)
|
||||
t.data[i].hcode = deletedMarker
|
||||
t.data[i].key = default(type(t.data[i].key))
|
||||
t.data[i].val = default(type(t.data[i].val))
|
||||
# mustRehash + enlarge not needed because counter+countDeleted doesn't change
|
||||
block outer:
|
||||
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
|
||||
var j = i # The correctness of this depends on (h+1) in nextTry,
|
||||
var r = j # though may be adaptable to other simple sequences.
|
||||
t.data[i].hcode = 0 # mark current EMPTY
|
||||
t.data[i].key = default(type(t.data[i].key))
|
||||
t.data[i].val = default(type(t.data[i].val))
|
||||
while true:
|
||||
i = (i + 1) and msk # increment mod table size
|
||||
if isEmpty(t.data[i].hcode): # end of collision cluster; So all done
|
||||
break outer
|
||||
r = t.data[i].hcode and msk # "home" location of key@i
|
||||
if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
|
||||
break
|
||||
when defined(js):
|
||||
t.data[j] = t.data[i]
|
||||
else:
|
||||
t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop
|
||||
|
||||
template delImpl() {.dirty.} =
|
||||
var hc: Hash
|
||||
@@ -108,7 +115,6 @@ template clearImpl() {.dirty.} =
|
||||
t.counter = 0
|
||||
|
||||
template ctAnd(a, b): bool =
|
||||
# pending https://github.com/nim-lang/Nim/issues/13502
|
||||
when a:
|
||||
when b: true
|
||||
else: false
|
||||
@@ -126,7 +132,7 @@ template initImpl(result: typed, size: int) =
|
||||
result.last = -1
|
||||
|
||||
template insertImpl() = # for CountTable
|
||||
checkIfInitialized()
|
||||
if t.dataLen == 0: initImpl(t, defaultInitialSize)
|
||||
if mustRehash(t): enlarge(t)
|
||||
ctRawInsert(t, t.data, key, val)
|
||||
inc(t.counter)
|
||||
|
||||
@@ -233,7 +233,6 @@ type
|
||||
## For creating an empty Table, use `initTable proc<#initTable,int>`_.
|
||||
data: KeyValuePairSeq[A, B]
|
||||
counter: int
|
||||
countDeleted: int
|
||||
TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_.
|
||||
##
|
||||
## For creating a new empty TableRef, use `newTable proc
|
||||
@@ -266,16 +265,14 @@ template get(t, key): untyped =
|
||||
|
||||
proc enlarge[A, B](t: var Table[A, B]) =
|
||||
var n: KeyValuePairSeq[A, B]
|
||||
newSeq(n, t.counter.rightSize)
|
||||
newSeq(n, len(t.data) * growthFactor)
|
||||
swap(t.data, n)
|
||||
t.countDeleted = 0
|
||||
for i in countup(0, high(n)):
|
||||
let eh = n[i].hcode
|
||||
if isFilledAndValid(eh):
|
||||
if isFilled(eh):
|
||||
var j: Hash = eh and maxHash(t)
|
||||
var perturb = t.getPerturb(eh)
|
||||
while isFilled(t.data[j].hcode):
|
||||
j = nextTry(j, maxHash(t), perturb)
|
||||
j = nextTry(j, maxHash(t))
|
||||
when defined(js):
|
||||
rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
|
||||
else:
|
||||
@@ -662,7 +659,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) =
|
||||
## # value: [1, 5, 7, 9]
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield (t.data[h].key, t.data[h].val)
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -684,7 +681,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =
|
||||
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield (t.data[h].key, t.data[h].val)
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -705,7 +702,7 @@ iterator keys*[A, B](t: Table[A, B]): A =
|
||||
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield t.data[h].key
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -726,7 +723,7 @@ iterator values*[A, B](t: Table[A, B]): B =
|
||||
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield t.data[h].val
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -748,27 +745,10 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =
|
||||
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield t.data[h].val
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
template hasKeyOrPutCache(cache, h): bool =
|
||||
# using `IntSet` would be an option worth considering to avoid quadratic
|
||||
# behavior in case user misuses Table with lots of duplicate keys; but it
|
||||
# has overhead in the common case of small number of duplicates.
|
||||
# However: when lots of duplicates are used, all operations would be slow
|
||||
# anyway because the full `hash(key)` is identical for these, which makes
|
||||
# `nextTry` follow the exact same path for each key, resulting in large
|
||||
# collision clusters. Alternatives could involve modifying the hash/retrieval
|
||||
# based on duplicate key count.
|
||||
var ret = false
|
||||
for hi in cache:
|
||||
if hi == h:
|
||||
ret = true
|
||||
break
|
||||
if not ret: cache.add h
|
||||
ret
|
||||
|
||||
iterator allValues*[A, B](t: Table[A, B]; key: A): B =
|
||||
## Iterates over any value in the table ``t`` that belongs to the given ``key``.
|
||||
##
|
||||
@@ -782,20 +762,13 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
|
||||
for i in 1..3: a.add('z', 10*i)
|
||||
doAssert toSeq(a.pairs).sorted == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)]
|
||||
doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30]
|
||||
|
||||
let hc = genHash(key)
|
||||
var h: Hash = hc and high(t.data)
|
||||
var h: Hash = genHash(key) and high(t.data)
|
||||
let L = len(t)
|
||||
var perturb = t.getPerturb(hc)
|
||||
|
||||
var num = 0
|
||||
var cache: seq[Hash]
|
||||
while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues`
|
||||
if t.data[h].hcode == hc and t.data[h].key == key:
|
||||
if not hasKeyOrPutCache(cache, h):
|
||||
yield t.data[h].val
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
h = nextTry(h, high(t.data), perturb)
|
||||
while isFilled(t.data[h].hcode):
|
||||
if t.data[h].key == key:
|
||||
yield t.data[h].val
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
h = nextTry(h, high(t.data))
|
||||
|
||||
|
||||
|
||||
@@ -1118,7 +1091,7 @@ iterator pairs*[A, B](t: TableRef[A, B]): (A, B) =
|
||||
## # value: [1, 5, 7, 9]
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield (t.data[h].key, t.data[h].val)
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -1140,7 +1113,7 @@ iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) =
|
||||
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield (t.data[h].key, t.data[h].val)
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -1161,7 +1134,7 @@ iterator keys*[A, B](t: TableRef[A, B]): A =
|
||||
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield t.data[h].key
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -1182,7 +1155,7 @@ iterator values*[A, B](t: TableRef[A, B]): B =
|
||||
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield t.data[h].val
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -1203,7 +1176,7 @@ iterator mvalues*[A, B](t: TableRef[A, B]): var B =
|
||||
|
||||
let L = len(t)
|
||||
for h in 0 .. high(t.data):
|
||||
if isFilledAndValid(t.data[h].hcode):
|
||||
if isFilled(t.data[h].hcode):
|
||||
yield t.data[h].val
|
||||
assert(len(t) == L, "the length of the table changed while iterating over it")
|
||||
|
||||
@@ -1229,8 +1202,6 @@ type
|
||||
## <#initOrderedTable,int>`_.
|
||||
data: OrderedKeyValuePairSeq[A, B]
|
||||
counter, first, last: int
|
||||
countDeleted: int
|
||||
|
||||
OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of
|
||||
## `OrderedTable<#OrderedTable>`_.
|
||||
##
|
||||
@@ -1252,7 +1223,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int =
|
||||
proc rawInsert[A, B](t: var OrderedTable[A, B],
|
||||
data: var OrderedKeyValuePairSeq[A, B],
|
||||
key: A, val: B, hc: Hash, h: Hash) =
|
||||
rawInsertImpl(t)
|
||||
rawInsertImpl()
|
||||
data[h].next = -1
|
||||
if t.first < 0: t.first = h
|
||||
if t.last >= 0: data[t.last].next = h
|
||||
@@ -1260,20 +1231,18 @@ proc rawInsert[A, B](t: var OrderedTable[A, B],
|
||||
|
||||
proc enlarge[A, B](t: var OrderedTable[A, B]) =
|
||||
var n: OrderedKeyValuePairSeq[A, B]
|
||||
newSeq(n, t.counter.rightSize)
|
||||
newSeq(n, len(t.data) * growthFactor)
|
||||
var h = t.first
|
||||
t.first = -1
|
||||
t.last = -1
|
||||
swap(t.data, n)
|
||||
t.countDeleted = 0
|
||||
while h >= 0:
|
||||
var nxt = n[h].next
|
||||
let eh = n[h].hcode
|
||||
if isFilledAndValid(eh):
|
||||
if isFilled(eh):
|
||||
var j: Hash = eh and maxHash(t)
|
||||
var perturb = t.getPerturb(eh)
|
||||
while isFilled(t.data[j].hcode):
|
||||
j = nextTry(j, maxHash(t), perturb)
|
||||
j = nextTry(j, maxHash(t))
|
||||
rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j)
|
||||
h = nxt
|
||||
|
||||
@@ -1282,10 +1251,6 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
|
||||
var h = t.first
|
||||
while h >= 0:
|
||||
var nxt = t.data[h].next
|
||||
# For OrderedTable/OrderedTableRef, isFilled is ok because `del` is O(n)
|
||||
# and doesn't create tombsones, but if it does start using tombstones,
|
||||
# carefully replace `isFilled` by `isFilledAndValid` as appropriate for these
|
||||
# table types only, ditto with `OrderedSet`.
|
||||
if isFilled(t.data[h].hcode):
|
||||
yieldStmt
|
||||
h = nxt
|
||||
@@ -2229,7 +2194,6 @@ type
|
||||
## <#initCountTable,int>`_.
|
||||
data: seq[tuple[key: A, val: int]]
|
||||
counter: int
|
||||
countDeleted: int
|
||||
isSorted: bool
|
||||
CountTableRef*[A] = ref CountTable[A] ## Ref version of
|
||||
## `CountTable<#CountTable>`_.
|
||||
@@ -2242,10 +2206,8 @@ type
|
||||
|
||||
proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
|
||||
key: A, val: int) =
|
||||
let hc = hash(key)
|
||||
var perturb = t.getPerturb(hc)
|
||||
var h: Hash = hc and high(data)
|
||||
while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
|
||||
var h: Hash = hash(key) and high(data)
|
||||
while data[h].val != 0: h = nextTry(h, high(data))
|
||||
data[h].key = key
|
||||
data[h].val = val
|
||||
|
||||
@@ -2272,12 +2234,10 @@ proc remove[A](t: var CountTable[A], key: A) =
|
||||
proc rawGet[A](t: CountTable[A], key: A): int =
|
||||
if t.data.len == 0:
|
||||
return -1
|
||||
let hc = hash(key)
|
||||
var perturb = t.getPerturb(hc)
|
||||
var h: Hash = hc and high(t.data) # start with real hash value
|
||||
while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
|
||||
var h: Hash = hash(key) and high(t.data) # start with real hash value
|
||||
while t.data[h].val != 0:
|
||||
if t.data[h].key == key: return h
|
||||
h = nextTry(h, high(t.data), perturb)
|
||||
h = nextTry(h, high(t.data))
|
||||
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
|
||||
|
||||
template ctget(t, key, default: untyped): untyped =
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
discard """
|
||||
cmd: "nim c --gc:arc $file"
|
||||
nimout: '''(a: true, n: doAssert)
|
||||
Table[system.string, trepr.MyType](data: @[], counter: 0, countDeleted: 0)
|
||||
Table[system.string, trepr.MyType](data: @[], counter: 0)
|
||||
nil
|
||||
'''
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user