[backport] pseudorandom probing for hash collision (#13418)

2026-02-19 01:18:32 +00:00 · 2020-02-19 08:19:55 -08:00
parent 273a93581f
commit 8c22518d67
12 changed files with 258 additions and 178 deletions
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -18,34 +18,87 @@ when not defined(nimHasDefault):
    var v: T
    v

+const freeMarker = 0
+const deletedMarker = -1
+
+type UHash = uint
+
 # hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
 # two procs retain clarity of that encoding without the space cost of an enum.
-proc isEmpty(hcode: Hash): bool {.inline.} =
-  result = hcode == 0
+proc isFilledAndValid(hcode: Hash): bool {.inline.} =
+  result = hcode != 0 and hcode != deletedMarker
+    # performance: we could use bit magic if needed

 proc isFilled(hcode: Hash): bool {.inline.} =
  result = hcode != 0

-proc nextTry(h, maxHash: Hash): Hash {.inline.} =
-  result = (h + 1) and maxHash

-proc mustRehash(length, counter: int): bool {.inline.} =
-  assert(length > counter)
-  result = (length * 2 < counter * 3) or (length - counter < 4)
+proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
+  result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
+
+proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
+  # FACTOR between hashcommon.nextTry, intsets.nextTry
+  # an optimization would be to use `(h + 1) and maxHash` for a few iterations
+  # and then switch to the formula below, to get "best of both worlds": good
+  # cache locality, except when a collision cluster is detected (ie, large number
+  # of iterations).
+  const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)`
+  result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
+  perturb = perturb shr PERTURB_SHIFT
+
+proc mustRehash[T](t: T): bool {.inline.} =
+  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
+  let counter2 = t.counter + t.countDeleted
+  let length = t.dataLen
+  assert(length > counter2)
+  result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize`
+
+proc rightSize*(count: Natural): int {.inline.} =
+  ## Return the value of `initialSize` to support `count` items.
+  ##
+  ## If more items are expected to be added, simply add that
+  ## expected extra amount to the parameter before calling this.
+  ##
+  ## Internally, we want `mustRehash(t) == false` for t that was just resized.
+  # Make sure to synchronize with `mustRehash`
+  result = nextPowerOfTwo(count * 3 div 2 + 4)
+
+template getPerturb(t: typed, hc: Hash): UHash =
+  # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
+  # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
+  let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
+  # this makes a major difference for cases like #13393; it causes the bits
+  # that were masked out in 1st position so they'll be masked in instead, and
+  # influence the recursion in nextTry earlier rather than later.
+  translateBits(cast[uint](hc), numBitsMask)

 template rawGetKnownHCImpl() {.dirty.} =
  if t.dataLen == 0:
    return -1
  var h: Hash = hc and maxHash(t) # start with real hash value
-  while isFilled(t.data[h].hcode):
-    # Compare hc THEN key with boolean short circuit. This makes the common case
-    # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
-    # It does slow down succeeding lookups by one extra Hash cmp&and..usually
-    # just a few clock cycles, generally worth it for any non-integer-like A.
-    if t.data[h].hcode == hc and t.data[h].key == key:
-      return h
-    h = nextTry(h, maxHash(t))
-  result = -1 - h # < 0 => MISSING; insert idx = -1 - result
+  var perturb = t.getPerturb(hc)
+  var deletedIndex = -1
+  while true:
+    if isFilledAndValid(t.data[h].hcode):
+      # Compare hc THEN key with boolean short circuit. This makes the common case
+      # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
+      # It does slow down succeeding lookups by one extra Hash cmp&and..usually
+      # just a few clock cycles, generally worth it for any non-integer-like A.
+      # performance: we optimize this: depending on type(key), skip hc comparison
+      if t.data[h].hcode == hc and t.data[h].key == key:
+        return h
+      h = nextTry(h, maxHash(t), perturb)
+    elif t.data[h].hcode == deletedMarker:
+      if deletedIndex == -1:
+        deletedIndex = h
+      h = nextTry(h, maxHash(t), perturb)
+    else:
+      break
+  if deletedIndex == -1:
+    result = -1 - h # < 0 => MISSING; insert idx = -1 - result
+  else:
+    # we prefer returning a (in fact the 1st found) deleted index
+    result = -1 - deletedIndex

 proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
  rawGetKnownHCImpl()
@@ -54,6 +107,8 @@ template genHashImpl(key, hc: typed) =
  hc = hash(key)
  if hc == 0: # This almost never taken branch should be very predictable.
    hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
+  elif hc == deletedMarker:
+    hc = 214159261

 template genHash(key: typed): Hash =
  var res: Hash
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -46,30 +46,40 @@ type
  IntSet* = object       ## An efficient set of `int` implemented as a sparse bit set.
    elems: int           # only valid for small numbers
    counter, max: int
+    countDeleted: int
    head: PTrunk
    data: TrunkSeq
    a: array[0..33, int] # profiling shows that 34 elements are enough

-proc mustRehash(length, counter: int): bool {.inline.} =
-  assert(length > counter)
-  result = (length * 2 < counter * 3) or (length - counter < 4)
+proc mustRehash[T](t: T): bool {.inline.} =
+  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
+  let counter2 = t.counter + t.countDeleted
+  let length = t.max + 1
+  assert length > counter2
+  result = (length * 2 < counter2 * 3) or (length - counter2 < 4)

-proc nextTry(h, maxHash: Hash): Hash {.inline.} =
-  result = ((5 * h) + 1) and maxHash
+proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
+  # FACTOR between hashcommon.nextTry, intsets.nextTry
+  const PERTURB_SHIFT = 5
+  var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
+  perturb = cast[Hash](perturb2)
+  result = ((5*h) + 1 + perturb) and maxHash

 proc intSetGet(t: IntSet, key: int): PTrunk =
  var h = key and t.max
+  var perturb = key
  while t.data[h] != nil:
    if t.data[h].key == key:
      return t.data[h]
-    h = nextTry(h, t.max)
+    h = nextTry(h, t.max, perturb)
  result = nil

 proc intSetRawInsert(t: IntSet, data: var TrunkSeq, desc: PTrunk) =
  var h = desc.key and t.max
+  var perturb = desc.key
  while data[h] != nil:
    assert(data[h] != desc)
-    h = nextTry(h, t.max)
+    h = nextTry(h, t.max, perturb)
  assert(data[h] == nil)
  data[h] = desc

@@ -84,14 +94,16 @@ proc intSetEnlarge(t: var IntSet) =

 proc intSetPut(t: var IntSet, key: int): PTrunk =
  var h = key and t.max
+  var perturb = key
  while t.data[h] != nil:
    if t.data[h].key == key:
      return t.data[h]
-    h = nextTry(h, t.max)
-  if mustRehash(t.max + 1, t.counter): intSetEnlarge(t)
+    h = nextTry(h, t.max, perturb)
+  if mustRehash(t): intSetEnlarge(t)
  inc(t.counter)
  h = key and t.max
-  while t.data[h] != nil: h = nextTry(h, t.max)
+  perturb = key
+  while t.data[h] != nil: h = nextTry(h, t.max, perturb)
  assert(t.data[h] == nil)
  new(result)
  result.next = t.head
@@ -100,6 +112,7 @@ proc intSetPut(t: var IntSet, key: int): PTrunk =
  t.data[h] = result

 proc bitincl(s: var IntSet, key: int) {.inline.} =
+  var ret: PTrunk
  var t = intSetPut(s, `shr`(key, TrunkShift))
  var u = key and TrunkMask
  t.bits[u shr IntShift] = t.bits[u shr IntShift] or
@@ -393,7 +406,8 @@ proc assign*(dest: var IntSet, src: IntSet) =
    var it = src.head
    while it != nil:
      var h = it.key and dest.max
-      while dest.data[h] != nil: h = nextTry(h, dest.max)
+      var perturb = it.key
+      while dest.data[h] != nil: h = nextTry(h, dest.max, perturb)
      assert(dest.data[h] == nil)
      var n: PTrunk
      new(n)
--- a/lib/pure/collections/setimpl.nim
+++ b/lib/pure/collections/setimpl.nim
@@ -48,7 +48,7 @@ template inclImpl() {.dirty.} =
  var hc: Hash
  var index = rawGet(s, key, hc)
  if index < 0:
-    if mustRehash(len(s.data), s.counter):
+    if mustRehash(s):
      enlarge(s)
      index = rawGetKnownHC(s, key, hc)
    rawInsert(s, s.data, key, hc, -1 - index)
@@ -62,17 +62,12 @@ template containsOrInclImpl() {.dirty.} =
  if index >= 0:
    result = true
  else:
-    if mustRehash(len(s.data), s.counter):
+    if mustRehash(s):
      enlarge(s)
      index = rawGetKnownHC(s, key, hc)
    rawInsert(s, s.data, key, hc, -1 - index)
    inc(s.counter)

-template doWhile(a, b) =
-  while true:
-    b
-    if not a: break
-
 proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
  var hc: Hash
  var i = rawGet(s, key, hc)
@@ -82,17 +77,9 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
  if i >= 0:
    result = false
    dec(s.counter)
-    while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
-      var j = i # The correctness of this depends on (h+1) in nextTry,
-      var r = j # though may be adaptable to other simple sequences.
-      s.data[i].hcode = 0 # mark current EMPTY
-      s.data[i].key = default(type(s.data[i].key))
-      doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
-        i = (i + 1) and msk # increment mod table size
-        if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
-          return
-        r = s.data[i].hcode and msk # "home" location of key@i
-      s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop
+    inc(s.countDeleted)
+    s.data[i].hcode = deletedMarker
+    s.data[i].key = default(type(s.data[i].key))

 template dollarImpl() {.dirty.} =
  result = "{"
--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -68,6 +68,7 @@ type
    ## before calling other procs on it.
    data: KeyValuePairSeq[A]
    counter: int
+    countDeleted: int

 type
  OrderedKeyValuePair[A] = tuple[
@@ -80,15 +81,13 @@ type
    ## <#initOrderedSet,int>`_ before calling other procs on it.
    data: OrderedKeyValuePairSeq[A]
    counter, first, last: int
+    countDeleted: int

 const
  defaultInitialSize* = 64

 include setimpl

-proc rightSize*(count: Natural): int {.inline.}
-
-
 # ---------------------------------------------------------------------
 # ------------------------------ HashSet ------------------------------
 # ---------------------------------------------------------------------
@@ -250,7 +249,7 @@ iterator items*[A](s: HashSet[A]): A =
  ##   echo b
  ##   # --> {(a: 1, b: 3), (a: 0, b: 4)}
  for h in 0 .. high(s.data):
-    if isFilled(s.data[h].hcode): yield s.data[h].key
+    if isFilledAndValid(s.data[h].hcode): yield s.data[h].key

 proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
  ## Includes `key` in the set `s` and tells if `key` was already in `s`.
@@ -342,7 +341,7 @@ proc pop*[A](s: var HashSet[A]): A =
    doAssertRaises(KeyError, echo s.pop)

  for h in 0 .. high(s.data):
-    if isFilled(s.data[h].hcode):
+    if isFilledAndValid(s.data[h].hcode):
      result = s.data[h].key
      excl(s, result)
      return result
@@ -593,16 +592,6 @@ proc `$`*[A](s: HashSet[A]): string =
  ##   # --> {no, esc'aping, is " provided}
  dollarImpl()

-proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of `initialSize` to support `count` items.
-  ##
-  ## If more items are expected to be added, simply add that
-  ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want `mustRehash(rightSize(x), x) == false`.
-  result = nextPowerOfTwo(count * 3 div 2 + 4)
-
-
 proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated:
     "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize)

@@ -634,7 +623,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
    var idx = 0
    while h >= 0:
      var nxt = s.data[h].next
-      if isFilled(s.data[h].hcode):
+      if isFilledAndValid(s.data[h].hcode):
        yieldStmt
        inc(idx)
      h = nxt
@@ -868,7 +857,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
  while h >= 0 and g >= 0:
    var nxh = s.data[h].next
    var nxg = t.data[g].next
-    if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
+    if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode):
      if s.data[h].key == t.data[g].key:
        inc compared
      else:
@@ -1146,10 +1135,19 @@ when isMainModule and not defined(release):
      b.incl(2)
      assert b.len == 1

-    for i in 0 .. 32:
-      var s = rightSize(i)
-      if s <= i or mustRehash(s, i):
-        echo "performance issue: rightSize() will not elide enlarge() at ", i
+    block:
+      type FakeTable = object
+        dataLen: int
+        counter: int
+        countDeleted: int
+
+      var t: FakeTable
+      for i in 0 .. 32:
+        var s = rightSize(i)
+        t.dataLen = s
+        t.counter = i
+        doAssert s > i and not mustRehash(t),
+          "performance issue: rightSize() will not elide enlarge() at: " & $i

    block missingOrExcl:
      var s = toOrderedSet([2, 3, 6, 7])
--- a/lib/pure/collections/sharedtables.nim
+++ b/lib/pure/collections/sharedtables.nim
@@ -25,6 +25,7 @@ type
  SharedTable*[A, B] = object ## generic hash SharedTable
    data: KeyValuePairSeq[A, B]
    counter, dataLen: int
+    countDeleted: int
    lock: Lock

 template maxHash(t): untyped = t.dataLen-1
@@ -32,7 +33,7 @@ template maxHash(t): untyped = t.dataLen-1
 include tableimpl

 template st_maybeRehashPutImpl(enlarge) {.dirty.} =
-  if mustRehash(t.dataLen, t.counter):
+  if mustRehash(t):
    enlarge(t)
    index = rawGetKnownHC(t, key, hc)
  index = -1 - index # important to transform for mgetOrPutImpl
@@ -49,9 +50,10 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
  for i in 0..<oldSize:
    let eh = n[i].hcode
    if isFilled(eh):
+      var perturb = t.getPerturb(eh)
      var j: Hash = eh and maxHash(t)
      while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
      rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
  deallocShared(n)

--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -14,13 +14,20 @@ include hashcommon
 template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
  genHashImpl(key, hc)
  var h: Hash = hc and maxHash(t)
-  while isFilled(t.data[h].hcode):
-    h = nextTry(h, maxHash(t))
+  var perturb = t.getPerturb(hc)
+  while true:
+    let hcode = t.data[h].hcode
+    if hcode == deletedMarker or hcode == freeMarker:
+      break
+    else:
+      h = nextTry(h, maxHash(t), perturb)
  result = h

-template rawInsertImpl() {.dirty.} =
+template rawInsertImpl(t) {.dirty.} =
  data[h].key = key
  data[h].val = val
+  if data[h].hcode == deletedMarker:
+    t.countDeleted.dec
  data[h].hcode = hc

 proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
@@ -28,7 +35,7 @@ proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =

 proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
                     key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl()
+  rawInsertImpl(t)

 template checkIfInitialized() =
  when compiles(defaultInitialSize):
@@ -37,15 +44,14 @@ template checkIfInitialized() =

 template addImpl(enlarge) {.dirty.} =
  checkIfInitialized()
-  if mustRehash(t.dataLen, t.counter): enlarge(t)
+  if mustRehash(t): enlarge(t)
  var hc: Hash
  var j = rawGetDeep(t, key, hc)
  rawInsert(t, t.data, key, val, hc, j)
  inc(t.counter)

 template maybeRehashPutImpl(enlarge) {.dirty.} =
-  checkIfInitialized()
-  if mustRehash(t.dataLen, t.counter):
+  if mustRehash(t):
    enlarge(t)
    index = rawGetKnownHC(t, key, hc)
  index = -1 - index                  # important to transform for mgetOrPutImpl
@@ -82,24 +88,11 @@ template delImplIdx(t, i) =
  let msk = maxHash(t)
  if i >= 0:
    dec(t.counter)
-    block outer:
-      while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
-        var j = i         # The correctness of this depends on (h+1) in nextTry,
-        var r = j         # though may be adaptable to other simple sequences.
-        t.data[i].hcode = 0              # mark current EMPTY
-        t.data[i].key = default(type(t.data[i].key))
-        t.data[i].val = default(type(t.data[i].val))
-        while true:
-          i = (i + 1) and msk            # increment mod table size
-          if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
-            break outer
-          r = t.data[i].hcode and msk    # "home" location of key@i
-          if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
-            break
-        when defined(js):
-          t.data[j] = t.data[i]
-        else:
-          t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop
+    inc(t.countDeleted)
+    t.data[i].hcode = deletedMarker
+    t.data[i].key = default(type(t.data[i].key))
+    t.data[i].val = default(type(t.data[i].val))
+    # mustRehash + enlarge not needed because counter+countDeleted doesn't change

 template delImpl() {.dirty.} =
  var hc: Hash
@@ -123,8 +116,8 @@ template initImpl(result: typed, size: int) =
    result.last = -1

 template insertImpl() = # for CountTable
-  if t.dataLen == 0: initImpl(t, defaultInitialSize)
-  if mustRehash(len(t.data), t.counter): enlarge(t)
+  checkIfInitialized()
+  if mustRehash(t): enlarge(t)
  ctRawInsert(t, t.data, key, val)
  inc(t.counter)

--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -233,6 +233,7 @@ type
    ## For creating an empty Table, use `initTable proc<#initTable,int>`_.
    data: KeyValuePairSeq[A, B]
    counter: int
+    countDeleted: int
  TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_.
    ##
    ## For creating a new empty TableRef, use `newTable proc
@@ -250,8 +251,6 @@ template dataLen(t): untyped = len(t.data)

 include tableimpl

-proc rightSize*(count: Natural): int {.inline.}
-
 template get(t, key): untyped =
  ## retrieves the value at ``t[key]``. The value can be modified.
  ## If ``key`` is not in ``t``, the ``KeyError`` exception is raised.
@@ -267,14 +266,16 @@ template get(t, key): untyped =

 proc enlarge[A, B](t: var Table[A, B]) =
  var n: KeyValuePairSeq[A, B]
-  newSeq(n, len(t.data) * growthFactor)
+  newSeq(n, t.counter.rightSize)
  swap(t.data, n)
+  t.countDeleted = 0
  for i in countup(0, high(n)):
    let eh = n[i].hcode
-    if isFilled(eh):
+    if isFilledAndValid(eh):
      var j: Hash = eh and maxHash(t)
+      var perturb = t.getPerturb(eh)
      while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
      when defined(js):
        rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
      else:
@@ -579,15 +580,6 @@ proc `==`*[A, B](s, t: Table[A, B]): bool =

  equalsImpl(s, t)

-proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of ``initialSize`` to support ``count`` items.
-  ##
-  ## If more items are expected to be added, simply add that
-  ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want mustRehash(rightSize(x), x) == false.
-  result = nextPowerOfTwo(count * 3 div 2 + 4)
-
 proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] =
  ## Index the collection with the proc provided.
  # TODO: As soon as supported, change collection: A to collection: A[B]
@@ -670,7 +662,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) =
  ##   # value: [1, 5, 7, 9]
  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
      yield (t.data[h].key, t.data[h].val)
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -692,7 +684,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
      yield (t.data[h].key, t.data[h].val)
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -713,7 +705,7 @@ iterator keys*[A, B](t: Table[A, B]): A =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
      yield t.data[h].key
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -734,7 +726,7 @@ iterator values*[A, B](t: Table[A, B]): B =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
      yield t.data[h].val
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -756,36 +748,53 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
      yield t.data[h].val
      assert(len(t) == L, "the length of the table changed while iterating over it")

+template hasKeyOrPutCache(cache, h): bool =
+  # using `IntSet` would be an option worth considering to avoid quadratic
+  # behavior in case user misuses Table with lots of duplicate keys; but it
+  # has overhead in the common case of small number of duplicates.
+  # However: when lots of duplicates are used, all operations would be slow
+  # anyway because the full `hash(key)` is identical for these, which makes
+  # `nextTry` follow the exact same path for each key, resulting in large
+  # collision clusters. Alternatives could involve modifying the hash/retrieval
+  # based on duplicate key count.
+  var ret = false
+  for hi in cache:
+    if hi == h:
+      ret = true
+      break
+  if not ret: cache.add h
+  ret
+
 iterator allValues*[A, B](t: Table[A, B]; key: A): B =
  ## Iterates over any value in the table ``t`` that belongs to the given ``key``.
  ##
  ## Used if you have a table with duplicate keys (as a result of using
  ## `add proc<#add,Table[A,B],A,B>`_).
  ##
-  ## **Examples:**
-  ##
-  ## .. code-block::
-  ##   var a = {'a': 3, 'b': 5}.toTable
-  ##   for i in 1..3:
-  ##     a.add('z', 10*i)
-  ##   echo a # {'a': 3, 'b': 5, 'z': 10, 'z': 20, 'z': 30}
-  ##
-  ##   for v in a.allValues('z'):
-  ##     echo v
-  ##   # 10
-  ##   # 20
-  ##   # 30
-  var h: Hash = genHash(key) and high(t.data)
+  runnableExamples:
+    import testutils
+    var a = {'a': 3, 'b': 5}.toTable
+    for i in 1..3: a.add('z', 10*i)
+    doAssert a.sortedPairs == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)]
+    doAssert sortedItems(a.allValues('z')) == @[10, 20, 30]
+
+  let hc = genHash(key)
+  var h: Hash = hc and high(t.data)
  let L = len(t)
-  while isFilled(t.data[h].hcode):
-    if t.data[h].key == key:
-      yield t.data[h].val
-      assert(len(t) == L, "the length of the table changed while iterating over it")
-    h = nextTry(h, high(t.data))
+  var perturb = t.getPerturb(hc)
+
+  var num = 0
+  var cache: seq[Hash]
+  while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues`
+    if t.data[h].hcode == hc and t.data[h].key == key:
+      if not hasKeyOrPutCache(cache, h):
+        yield t.data[h].val
+        assert(len(t) == L, "the length of the table changed while iterating over it")
+    h = nextTry(h, high(t.data), perturb)



@@ -1219,6 +1228,8 @@ type
    ## <#initOrderedTable,int>`_.
    data: OrderedKeyValuePairSeq[A, B]
    counter, first, last: int
+    countDeleted: int
+
  OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of
    ## `OrderedTable<#OrderedTable>`_.
    ##
@@ -1240,7 +1251,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int =
 proc rawInsert[A, B](t: var OrderedTable[A, B],
                     data: var OrderedKeyValuePairSeq[A, B],
                     key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl()
+  rawInsertImpl(t)
  data[h].next = -1
  if t.first < 0: t.first = h
  if t.last >= 0: data[t.last].next = h
@@ -1248,18 +1259,20 @@ proc rawInsert[A, B](t: var OrderedTable[A, B],

 proc enlarge[A, B](t: var OrderedTable[A, B]) =
  var n: OrderedKeyValuePairSeq[A, B]
-  newSeq(n, len(t.data) * growthFactor)
+  newSeq(n, t.counter.rightSize)
  var h = t.first
  t.first = -1
  t.last = -1
  swap(t.data, n)
+  t.countDeleted = 0
  while h >= 0:
    var nxt = n[h].next
    let eh = n[h].hcode
-    if isFilled(eh):
+    if isFilledAndValid(eh):
      var j: Hash = eh and maxHash(t)
+      var perturb = t.getPerturb(eh)
      while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
      rawInsert(t, t.data, n[h].key, n[h].val, n[h].hcode, j)
    h = nxt

@@ -2211,6 +2224,7 @@ type
    ## <#initCountTable,int>`_.
    data: seq[tuple[key: A, val: int]]
    counter: int
+    countDeleted: int
    isSorted: bool
  CountTableRef*[A] = ref CountTable[A] ## Ref version of
    ## `CountTable<#CountTable>`_.
@@ -2223,8 +2237,10 @@ type

 proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
                  key: A, val: int) =
-  var h: Hash = hash(key) and high(data)
-  while data[h].val != 0: h = nextTry(h, high(data))
+  let hc = hash(key)
+  var perturb = t.getPerturb(hc)
+  var h: Hash = hc and high(data)
+  while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
  data[h].key = key
  data[h].val = val

@@ -2251,10 +2267,12 @@ proc remove[A](t: var CountTable[A], key: A) =
 proc rawGet[A](t: CountTable[A], key: A): int =
  if t.data.len == 0:
    return -1
-  var h: Hash = hash(key) and high(t.data) # start with real hash value
-  while t.data[h].val != 0:
+  let hc = hash(key)
+  var perturb = t.getPerturb(hc)
+  var h: Hash = hc and high(t.data) # start with real hash value
+  while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
    if t.data[h].key == key: return h
-    h = nextTry(h, high(t.data))
+    h = nextTry(h, high(t.data), perturb)
  result = -1 - h # < 0 => MISSING; insert idx = -1 - result

 template ctget(t, key, default: untyped): untyped =
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -112,38 +112,14 @@ proc hash*[T: proc](x: T): Hash {.inline.} =
  else:
    result = hash(pointer(x))

-const
-  prime = uint(11)
-
-proc hash*(x: int): Hash {.inline.} =
+proc hash*(x: int|int64|uint|uint64|char|Ordinal): Hash {.inline.} =
  ## Efficient hashing of integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: int64): Hash {.inline.} =
-  ## Efficient hashing of `int64` integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: uint): Hash {.inline.} =
-  ## Efficient hashing of unsigned integers.
-  result = cast[Hash](x * prime)
-
-proc hash*(x: uint64): Hash {.inline.} =
-  ## Efficient hashing of `uint64` integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: char): Hash {.inline.} =
-  ## Efficient hashing of characters.
-  result = cast[Hash](cast[uint](ord(x)) * prime)
-
-proc hash*[T: Ordinal](x: T): Hash {.inline.} =
-  ## Efficient hashing of other ordinal types (e.g. enums).
-  result = cast[Hash](cast[uint](ord(x)) * prime)
+  cast[Hash](ord(x))

 proc hash*(x: float): Hash {.inline.} =
  ## Efficient hashing of floats.
-  var y = x + 1.0
-  result = cast[ptr Hash](addr(y))[]
-
+  var y = x + 0.0 # for denormalization
+  result = hash(cast[ptr Hash](addr(y))[])

 # Forward declarations before methods that hash containers. This allows
 # containers to contain other containers
--- a/lib/pure/testutils.nim
+++ b/lib/pure/testutils.nim
@@ -0,0 +1,15 @@
+## Utilities to help writing tests
+##
+## Unstable, experimental API.
+
+import std/[sequtils, algorithm]
+
+proc sortedPairs*[T](t: T): auto =
+  ## helps when writing tests involving tables in a way that's robust to
+  ## changes in hashing functions / table implementation.
+  toSeq(t.pairs).sorted
+
+template sortedItems*(t: untyped): untyped =
+  ## helps when writing tests involving tables in a way that's robust to
+  ## changes in hashing functions / table implementation.
+  sorted(toSeq(t))
--- a/tests/arc/trepr.nim
+++ b/tests/arc/trepr.nim
@@ -1,7 +1,7 @@
 discard """
  cmd: "nim c --gc:arc $file"
  nimout: '''(a: true, n: doAssert)
-Table[system.string, trepr.MyType](data: @[], counter: 0)
+Table[system.string, trepr.MyType](data: @[], counter: 0, countDeleted: 0)
 nil
 '''
 """
--- a/tests/collections/ttables.nim
+++ b/tests/collections/ttables.nim
@@ -8,7 +8,12 @@ And we get here
 '''
 joinable: false
 """
-import hashes, sequtils, tables, algorithm
+import hashes, sequtils, tables, algorithm, testutils
+
+block tableDollar:
+  # other tests should use `sortedPairs` to be robust to future table/hash
+  # implementation changes
+  doAssert ${1: 'a', 2: 'b'}.toTable in ["{1: 'a', 2: 'b'}", "{2: 'b', 1: 'a'}"]

 # test should not be joined because it takes too long.
 block tableadds:
@@ -188,6 +193,23 @@ block ttables2:
  run1()
  echo "2"

+block allValues:
+  var t: Table[int, string]
+  var key = 0
+  let n = 1000
+  for i in 0..<n: t.add(i, $i)
+  const keys = [0, -1, 12]
+  for i in 0..1:
+    for key in keys:
+      t.add(key, $key & ":" & $i)
+  for i in 0..<n:
+    if i notin keys:
+      t.del(i)
+  doAssert t.sortedPairs == @[(-1, "-1:0"), (-1, "-1:1"), (0, "0"), (0, "0:0"), (0, "0:1"), (12, "12"), (12, "12:0"), (12, "12:1")]
+  doAssert sortedItems(t.allValues(0)) == @["0", "0:0", "0:1"]
+  doAssert sortedItems(t.allValues(-1)) == @["-1:0", "-1:1"]
+  doAssert sortedItems(t.allValues(12)) == @["12", "12:0", "12:1"]
+  doAssert sortedItems(t.allValues(1)) == @[]

 block tablesref:
  const
@@ -232,8 +254,8 @@ block tablesref:
    for x in 0..1:
      for y in 0..1:
        assert t[(x,y)] == $x & $y
-    assert($t ==
-      "{(x: 1, y: 1): \"11\", (x: 0, y: 0): \"00\", (x: 0, y: 1): \"01\", (x: 1, y: 0): \"10\"}")
+    assert t.sortedPairs ==
+      @[((x: 0, y: 0), "00"), ((x: 0, y: 1), "01"), ((x: 1, y: 0), "10"), ((x: 1, y: 1), "11")]

  block tableTest2:
    var t = newTable[string, float]()
@@ -340,7 +362,7 @@ block tablesref:
  block anonZipTest:
    let keys = @['a','b','c']
    let values = @[1, 2, 3]
-    doAssert "{'c': 3, 'a': 1, 'b': 2}" == $ toTable zip(keys, values)
+    doAssert zip(keys, values).toTable.sortedPairs == @[('a', 1), ('b', 2), ('c', 3)]

  block clearTableTest:
    var t = newTable[string, float]()
@@ -369,3 +391,4 @@ block tablesref:

  orderedTableSortTest()
  echo "3"
+
--- a/tests/collections/ttablesthreads.nim
+++ b/tests/collections/ttablesthreads.nim
@@ -3,7 +3,7 @@ discard """
  output: '''true'''
 """

-import hashes, tables, sharedtables
+import hashes, tables, sharedtables, testutils

 const
  data = {
@@ -47,8 +47,7 @@ block tableTest1:
  for x in 0..1:
    for y in 0..1:
      assert t[(x,y)] == $x & $y
-  assert($t ==
-    "{(x: 1, y: 1): \"11\", (x: 0, y: 0): \"00\", (x: 0, y: 1): \"01\", (x: 1, y: 0): \"10\"}")
+  assert t.sortedPairs == @[((x: 0, y: 0), "00"), ((x: 0, y: 1), "01"), ((x: 1, y: 0), "10"), ((x: 1, y: 1), "11")]

 block tableTest2:
  var t = initTable[string, float]()