Unwind just the "pseudorandom probing" part of recent sets,tables changes (#13816)

* Unwind just the "pseudorandom probing" (whole hash-code-keyed variable
stride double hashing) part of recent sets & tables changes (which has
still been causing bugs over a month later (e.g., two days ago
https://github.com/nim-lang/Nim/issues/13794) as well as still having
several "figure this out" implementation question comments in them (see
just diffs of this PR).

This topic has been discussed in many places:
  https://github.com/nim-lang/Nim/issues/13393
  https://github.com/nim-lang/Nim/pull/13418
  https://github.com/nim-lang/Nim/pull/13440
  https://github.com/nim-lang/Nim/issues/13794

Alternative/non-mandatory stronger integer hashes (or vice-versa opt-in
identity hashes) are a better solution that is more general (no illusion
of one hard-coded sequence solving all problems) while retaining the
virtues of linear probing such as cache obliviousness and age-less tables
under delete-heavy workloads (still untested after a month of this change).

The only real solution for truly adversarial keys is a hash keyed off of
data unobservable to attackers.  That all fits better with a few families
of user-pluggable/define-switchable hashes which can be provided in a
separate PR more about `hashes.nim`.

This PR carefully preserves the better (but still hard coded!) probing
of the  `intsets` and other recent fixes like `move` annotations, hash
order invariant tests, `intsets.missingOrExcl` fixing, and the move of
`rightSize` into `hashcommon.nim`.

* Fix `data.len` -> `dataLen` problem.
This commit is contained in:
c-blake
2020-03-31 13:18:45 -04:00
committed by GitHub
parent 7abeba6aeb
commit b1aa3b1eea
8 changed files with 99 additions and 175 deletions

View File

@@ -18,87 +18,43 @@ when not defined(nimHasDefault):
var v: T
v
const freeMarker = 0
const deletedMarker = -1
type UHash = uint
# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These
# two procs retain clarity of that encoding without the space cost of an enum.
proc isFilledAndValid(hcode: Hash): bool {.inline.} =
result = hcode != 0 and hcode != deletedMarker
# performance: we could use bit magic if needed
proc isEmpty(hcode: Hash): bool {.inline.} =
result = hcode == 0
proc isFilled(hcode: Hash): bool {.inline.} =
result = hcode != 0
proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
# FACTOR between hashcommon.nextTry, intsets.nextTry
# an optimization would be to use `(h + 1) and maxHash` for a few iterations
# and then switch to the formula below, to get "best of both worlds": good
# cache locality, except when a collision cluster is detected (ie, large number
# of iterations).
const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)`
result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
perturb = perturb shr PERTURB_SHIFT
proc nextTry(h, maxHash: Hash): Hash {.inline.} =
result = (h + 1) and maxHash
proc mustRehash[T](t: T): bool {.inline.} =
# FACTOR between hashcommon.mustRehash, intsets.mustRehash
let counter2 = t.counter + t.countDeleted
let length = t.dataLen
assert(length > counter2)
result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize`
assert(t.dataLen > t.counter)
result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4)
proc rightSize*(count: Natural): int {.inline.} =
## Return the value of `initialSize` to support `count` items.
## Return the value of ``initialSize`` to support ``count`` items.
##
## If more items are expected to be added, simply add that
## expected extra amount to the parameter before calling this.
##
## Internally, we want `mustRehash(t) == false` for t that was just resized.
#
# Make sure to synchronize with `mustRehash`
result = nextPowerOfTwo(count * 3 div 2 + 4)
template getPerturb(t: typed, hc: Hash): UHash =
# we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
# so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
# this makes a major difference for cases like #13393; it causes the bits
# that were masked out in 1st position so they'll be masked in instead, and
# influence the recursion in nextTry earlier rather than later.
translateBits(cast[uint](hc), numBitsMask)
template rawGetKnownHCImpl() {.dirty.} =
if t.dataLen == 0:
return -1
var h: Hash = hc and maxHash(t) # start with real hash value
var perturb = t.getPerturb(hc)
var deletedIndex = -1
while true:
if isFilledAndValid(t.data[h].hcode):
# Compare hc THEN key with boolean short circuit. This makes the common case
# zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
# It does slow down succeeding lookups by one extra Hash cmp&and..usually
# just a few clock cycles, generally worth it for any non-integer-like A.
# performance: we optimize this: depending on type(key), skip hc comparison
if t.data[h].hcode == hc and t.data[h].key == key:
return h
h = nextTry(h, maxHash(t), perturb)
elif t.data[h].hcode == deletedMarker:
if deletedIndex == -1:
deletedIndex = h
h = nextTry(h, maxHash(t), perturb)
else:
break
if deletedIndex == -1:
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
else:
# we prefer returning a (in fact the 1st found) deleted index
result = -1 - deletedIndex
while isFilled(t.data[h].hcode):
# Compare hc THEN key with boolean short circuit. This makes the common case
# zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
# It does slow down succeeding lookups by one extra Hash cmp&and..usually
# just a few clock cycles, generally worth it for any non-integer-like A.
if t.data[h].hcode == hc and t.data[h].key == key:
return h
h = nextTry(h, maxHash(t))
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
rawGetKnownHCImpl()
@@ -107,8 +63,6 @@ template genHashImpl(key, hc: typed) =
hc = hash(key)
if hc == 0: # This almost never taken branch should be very predictable.
hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
elif hc == deletedMarker:
hc = 214159261
template genHash(key: typed): Hash =
var res: Hash

View File

@@ -46,20 +46,16 @@ type
IntSet* = object ## An efficient set of `int` implemented as a sparse bit set.
elems: int # only valid for small numbers
counter, max: int
countDeleted: int
head: PTrunk
data: TrunkSeq
a: array[0..33, int] # profiling shows that 34 elements are enough
proc mustRehash[T](t: T): bool {.inline.} =
# FACTOR between hashcommon.mustRehash, intsets.mustRehash
let counter2 = t.counter + t.countDeleted
let length = t.max + 1
assert length > counter2
result = (length * 2 < counter2 * 3) or (length - counter2 < 4)
assert length > t.counter
result = (length * 2 < t.counter * 3) or (length - t.counter < 4)
proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
# FACTOR between hashcommon.nextTry, intsets.nextTry
const PERTURB_SHIFT = 5
var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
perturb = cast[Hash](perturb2)

View File

@@ -35,11 +35,10 @@ proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,
proc enlarge[A](s: var HashSet[A]) =
var n: KeyValuePairSeq[A]
newSeq(n, s.counter.rightSize)
newSeq(n, len(s.data) * growthFactor)
swap(s.data, n) # n is now old seq
s.countDeleted = 0
for i in countup(0, high(n)):
if isFilledAndValid(n[i].hcode):
if isFilled(n[i].hcode):
var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
rawInsert(s, s.data, n[i].key, n[i].hcode, j)
@@ -69,6 +68,11 @@ template containsOrInclImpl() {.dirty.} =
rawInsert(s, s.data, key, hc, -1 - index)
inc(s.counter)
template doWhile(a, b) =
while true:
b
if not a: break
proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
var hc: Hash
var i = rawGet(s, key, hc)
@@ -78,9 +82,17 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
if i >= 0:
result = false
dec(s.counter)
inc(s.countDeleted)
s.data[i].hcode = deletedMarker
s.data[i].key = default(type(s.data[i].key))
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
var j = i # The correctness of this depends on (h+1) in nextTry,
var r = j # though may be adaptable to other simple sequences.
s.data[i].hcode = 0 # mark current EMPTY
s.data[i].key = default(type(s.data[i].key))
doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
i = (i + 1) and msk # increment mod table size
if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
return
r = s.data[i].hcode and msk # "home" location of key@i
s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop
template dollarImpl() {.dirty.} =
result = "{"
@@ -113,7 +125,7 @@ proc enlarge[A](s: var OrderedSet[A]) =
swap(s.data, n)
while h >= 0:
var nxt = n[h].next
if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
if isFilled(n[h].hcode):
var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
rawInsert(s, s.data, n[h].key, n[h].hcode, j)
h = nxt
@@ -131,7 +143,7 @@ proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} =
result = true
while h >= 0:
var nxt = n[h].next
if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
if isFilled(n[h].hcode):
if n[h].hcode == hc and n[h].key == key:
dec s.counter
result = false

View File

@@ -68,7 +68,6 @@ type
## before calling other procs on it.
data: KeyValuePairSeq[A]
counter: int
countDeleted: int
type
OrderedKeyValuePair[A] = tuple[
@@ -81,7 +80,6 @@ type
## <#initOrderedSet,int>`_ before calling other procs on it.
data: OrderedKeyValuePairSeq[A]
counter, first, last: int
countDeleted: int
const
defaultInitialSize* = 64
@@ -249,7 +247,7 @@ iterator items*[A](s: HashSet[A]): A =
## echo b
## # --> {(a: 1, b: 3), (a: 0, b: 4)}
for h in 0 .. high(s.data):
if isFilledAndValid(s.data[h].hcode): yield s.data[h].key
if isFilled(s.data[h].hcode): yield s.data[h].key
proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
## Includes `key` in the set `s` and tells if `key` was already in `s`.
@@ -341,7 +339,7 @@ proc pop*[A](s: var HashSet[A]): A =
doAssertRaises(KeyError, echo s.pop)
for h in 0 .. high(s.data):
if isFilledAndValid(s.data[h].hcode):
if isFilled(s.data[h].hcode):
result = s.data[h].key
excl(s, result)
return result
@@ -574,8 +572,7 @@ proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
proc hash*[A](s: HashSet[A]): Hash =
## Hashing of HashSet.
for h in 0 .. high(s.data):
if isFilledAndValid(s.data[h].hcode):
result = result xor s.data[h].hcode
result = result xor s.data[h].hcode
result = !$result
proc `$`*[A](s: HashSet[A]): string =
@@ -593,6 +590,7 @@ proc `$`*[A](s: HashSet[A]): string =
## # --> {no, esc'aping, is " provided}
dollarImpl()
proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated:
"Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize)
@@ -624,7 +622,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
var idx = 0
while h >= 0:
var nxt = s.data[h].next
if isFilledAndValid(s.data[h].hcode):
if isFilled(s.data[h].hcode):
yieldStmt
inc(idx)
h = nxt
@@ -858,7 +856,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
while h >= 0 and g >= 0:
var nxh = s.data[h].next
var nxg = t.data[g].next
if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode):
if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
if s.data[h].key == t.data[g].key:
inc compared
else:

View File

@@ -25,7 +25,6 @@ type
SharedTable*[A, B] = object ## generic hash SharedTable
data: KeyValuePairSeq[A, B]
counter, dataLen: int
countDeleted: int
lock: Lock
template maxHash(t): untyped = t.dataLen-1
@@ -50,10 +49,9 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
for i in 0..<oldSize:
let eh = n[i].hcode
if isFilled(eh):
var perturb = t.getPerturb(eh)
var j: Hash = eh and maxHash(t)
while isFilled(t.data[j].hcode):
j = nextTry(j, maxHash(t), perturb)
j = nextTry(j, maxHash(t))
rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
deallocShared(n)

View File

@@ -14,20 +14,13 @@ include hashcommon
template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add
genHashImpl(key, hc)
var h: Hash = hc and maxHash(t)
var perturb = t.getPerturb(hc)
while true:
let hcode = t.data[h].hcode
if hcode == deletedMarker or hcode == freeMarker:
break
else:
h = nextTry(h, maxHash(t), perturb)
while isFilled(t.data[h].hcode):
h = nextTry(h, maxHash(t))
result = h
template rawInsertImpl(t) {.dirty.} =
template rawInsertImpl() {.dirty.} =
data[h].key = key
data[h].val = val
if data[h].hcode == deletedMarker:
t.countDeleted.dec
data[h].hcode = hc
proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
@@ -35,7 +28,7 @@ proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
key: A, val: B, hc: Hash, h: Hash) =
rawInsertImpl(t)
rawInsertImpl()
template checkIfInitialized() =
when compiles(defaultInitialSize):
@@ -51,6 +44,7 @@ template addImpl(enlarge) {.dirty.} =
inc(t.counter)
template maybeRehashPutImpl(enlarge) {.dirty.} =
checkIfInitialized()
if mustRehash(t):
enlarge(t)
index = rawGetKnownHC(t, key, hc)
@@ -88,11 +82,24 @@ template delImplIdx(t, i) =
let msk = maxHash(t)
if i >= 0:
dec(t.counter)
inc(t.countDeleted)
t.data[i].hcode = deletedMarker
t.data[i].key = default(type(t.data[i].key))
t.data[i].val = default(type(t.data[i].val))
# mustRehash + enlarge not needed because counter+countDeleted doesn't change
block outer:
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
var j = i # The correctness of this depends on (h+1) in nextTry,
var r = j # though may be adaptable to other simple sequences.
t.data[i].hcode = 0 # mark current EMPTY
t.data[i].key = default(type(t.data[i].key))
t.data[i].val = default(type(t.data[i].val))
while true:
i = (i + 1) and msk # increment mod table size
if isEmpty(t.data[i].hcode): # end of collision cluster; So all done
break outer
r = t.data[i].hcode and msk # "home" location of key@i
if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
break
when defined(js):
t.data[j] = t.data[i]
else:
t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop
template delImpl() {.dirty.} =
var hc: Hash
@@ -108,7 +115,6 @@ template clearImpl() {.dirty.} =
t.counter = 0
template ctAnd(a, b): bool =
# pending https://github.com/nim-lang/Nim/issues/13502
when a:
when b: true
else: false
@@ -126,7 +132,7 @@ template initImpl(result: typed, size: int) =
result.last = -1
template insertImpl() = # for CountTable
checkIfInitialized()
if t.dataLen == 0: initImpl(t, defaultInitialSize)
if mustRehash(t): enlarge(t)
ctRawInsert(t, t.data, key, val)
inc(t.counter)

View File

@@ -233,7 +233,6 @@ type
## For creating an empty Table, use `initTable proc<#initTable,int>`_.
data: KeyValuePairSeq[A, B]
counter: int
countDeleted: int
TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_.
##
## For creating a new empty TableRef, use `newTable proc
@@ -266,16 +265,14 @@ template get(t, key): untyped =
proc enlarge[A, B](t: var Table[A, B]) =
var n: KeyValuePairSeq[A, B]
newSeq(n, t.counter.rightSize)
newSeq(n, len(t.data) * growthFactor)
swap(t.data, n)
t.countDeleted = 0
for i in countup(0, high(n)):
let eh = n[i].hcode
if isFilledAndValid(eh):
if isFilled(eh):
var j: Hash = eh and maxHash(t)
var perturb = t.getPerturb(eh)
while isFilled(t.data[j].hcode):
j = nextTry(j, maxHash(t), perturb)
j = nextTry(j, maxHash(t))
when defined(js):
rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
else:
@@ -662,7 +659,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) =
## # value: [1, 5, 7, 9]
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield (t.data[h].key, t.data[h].val)
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -684,7 +681,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield (t.data[h].key, t.data[h].val)
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -705,7 +702,7 @@ iterator keys*[A, B](t: Table[A, B]): A =
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield t.data[h].key
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -726,7 +723,7 @@ iterator values*[A, B](t: Table[A, B]): B =
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield t.data[h].val
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -748,27 +745,10 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield t.data[h].val
assert(len(t) == L, "the length of the table changed while iterating over it")
template hasKeyOrPutCache(cache, h): bool =
# using `IntSet` would be an option worth considering to avoid quadratic
# behavior in case user misuses Table with lots of duplicate keys; but it
# has overhead in the common case of small number of duplicates.
# However: when lots of duplicates are used, all operations would be slow
# anyway because the full `hash(key)` is identical for these, which makes
# `nextTry` follow the exact same path for each key, resulting in large
# collision clusters. Alternatives could involve modifying the hash/retrieval
# based on duplicate key count.
var ret = false
for hi in cache:
if hi == h:
ret = true
break
if not ret: cache.add h
ret
iterator allValues*[A, B](t: Table[A, B]; key: A): B =
## Iterates over any value in the table ``t`` that belongs to the given ``key``.
##
@@ -782,20 +762,13 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
for i in 1..3: a.add('z', 10*i)
doAssert toSeq(a.pairs).sorted == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)]
doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30]
let hc = genHash(key)
var h: Hash = hc and high(t.data)
var h: Hash = genHash(key) and high(t.data)
let L = len(t)
var perturb = t.getPerturb(hc)
var num = 0
var cache: seq[Hash]
while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues`
if t.data[h].hcode == hc and t.data[h].key == key:
if not hasKeyOrPutCache(cache, h):
yield t.data[h].val
assert(len(t) == L, "the length of the table changed while iterating over it")
h = nextTry(h, high(t.data), perturb)
while isFilled(t.data[h].hcode):
if t.data[h].key == key:
yield t.data[h].val
assert(len(t) == L, "the length of the table changed while iterating over it")
h = nextTry(h, high(t.data))
@@ -1118,7 +1091,7 @@ iterator pairs*[A, B](t: TableRef[A, B]): (A, B) =
## # value: [1, 5, 7, 9]
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield (t.data[h].key, t.data[h].val)
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -1140,7 +1113,7 @@ iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) =
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield (t.data[h].key, t.data[h].val)
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -1161,7 +1134,7 @@ iterator keys*[A, B](t: TableRef[A, B]): A =
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield t.data[h].key
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -1182,7 +1155,7 @@ iterator values*[A, B](t: TableRef[A, B]): B =
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield t.data[h].val
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -1203,7 +1176,7 @@ iterator mvalues*[A, B](t: TableRef[A, B]): var B =
let L = len(t)
for h in 0 .. high(t.data):
if isFilledAndValid(t.data[h].hcode):
if isFilled(t.data[h].hcode):
yield t.data[h].val
assert(len(t) == L, "the length of the table changed while iterating over it")
@@ -1229,8 +1202,6 @@ type
## <#initOrderedTable,int>`_.
data: OrderedKeyValuePairSeq[A, B]
counter, first, last: int
countDeleted: int
OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of
## `OrderedTable<#OrderedTable>`_.
##
@@ -1252,7 +1223,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int =
proc rawInsert[A, B](t: var OrderedTable[A, B],
data: var OrderedKeyValuePairSeq[A, B],
key: A, val: B, hc: Hash, h: Hash) =
rawInsertImpl(t)
rawInsertImpl()
data[h].next = -1
if t.first < 0: t.first = h
if t.last >= 0: data[t.last].next = h
@@ -1260,20 +1231,18 @@ proc rawInsert[A, B](t: var OrderedTable[A, B],
proc enlarge[A, B](t: var OrderedTable[A, B]) =
var n: OrderedKeyValuePairSeq[A, B]
newSeq(n, t.counter.rightSize)
newSeq(n, len(t.data) * growthFactor)
var h = t.first
t.first = -1
t.last = -1
swap(t.data, n)
t.countDeleted = 0
while h >= 0:
var nxt = n[h].next
let eh = n[h].hcode
if isFilledAndValid(eh):
if isFilled(eh):
var j: Hash = eh and maxHash(t)
var perturb = t.getPerturb(eh)
while isFilled(t.data[j].hcode):
j = nextTry(j, maxHash(t), perturb)
j = nextTry(j, maxHash(t))
rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j)
h = nxt
@@ -1282,10 +1251,6 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
var h = t.first
while h >= 0:
var nxt = t.data[h].next
# For OrderedTable/OrderedTableRef, isFilled is ok because `del` is O(n)
# and doesn't create tombsones, but if it does start using tombstones,
# carefully replace `isFilled` by `isFilledAndValid` as appropriate for these
# table types only, ditto with `OrderedSet`.
if isFilled(t.data[h].hcode):
yieldStmt
h = nxt
@@ -2229,7 +2194,6 @@ type
## <#initCountTable,int>`_.
data: seq[tuple[key: A, val: int]]
counter: int
countDeleted: int
isSorted: bool
CountTableRef*[A] = ref CountTable[A] ## Ref version of
## `CountTable<#CountTable>`_.
@@ -2242,10 +2206,8 @@ type
proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
key: A, val: int) =
let hc = hash(key)
var perturb = t.getPerturb(hc)
var h: Hash = hc and high(data)
while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
var h: Hash = hash(key) and high(data)
while data[h].val != 0: h = nextTry(h, high(data))
data[h].key = key
data[h].val = val
@@ -2272,12 +2234,10 @@ proc remove[A](t: var CountTable[A], key: A) =
proc rawGet[A](t: CountTable[A], key: A): int =
if t.data.len == 0:
return -1
let hc = hash(key)
var perturb = t.getPerturb(hc)
var h: Hash = hc and high(t.data) # start with real hash value
while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
var h: Hash = hash(key) and high(t.data) # start with real hash value
while t.data[h].val != 0:
if t.data[h].key == key: return h
h = nextTry(h, high(t.data), perturb)
h = nextTry(h, high(t.data))
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
template ctget(t, key, default: untyped): untyped =

View File

@@ -1,7 +1,7 @@
discard """
cmd: "nim c --gc:arc $file"
nimout: '''(a: true, n: doAssert)
Table[system.string, trepr.MyType](data: @[], counter: 0, countDeleted: 0)
Table[system.string, trepr.MyType](data: @[], counter: 0)
nil
'''
"""