Unwind just the "pseudorandom probing" part of recent sets,tables changes (#13816)

* Unwind just the "pseudorandom probing" (whole hash-code-keyed variable stride double hashing) part of recent sets & tables changes (which has still been causing bugs over a month later (e.g., two days ago https://github.com/nim-lang/Nim/issues/13794) as well as still having several "figure this out" implementation question comments in them (see just diffs of this PR). This topic has been discussed in many places: https://github.com/nim-lang/Nim/issues/13393 https://github.com/nim-lang/Nim/pull/13418 https://github.com/nim-lang/Nim/pull/13440 https://github.com/nim-lang/Nim/issues/13794 Alternative/non-mandatory stronger integer hashes (or vice-versa opt-in identity hashes) are a better solution that is more general (no illusion of one hard-coded sequence solving all problems) while retaining the virtues of linear probing such as cache obliviousness and age-less tables under delete-heavy workloads (still untested after a month of this change). The only real solution for truly adversarial keys is a hash keyed off of data unobservable to attackers. That all fits better with a few families of user-pluggable/define-switchable hashes which can be provided in a separate PR more about `hashes.nim`. This PR carefully preserves the better (but still hard coded!) probing of the `intsets` and other recent fixes like `move` annotations, hash order invariant tests, `intsets.missingOrExcl` fixing, and the move of `rightSize` into `hashcommon.nim`. * Fix `data.len` -> `dataLen` problem.
2026-04-19 22:10:33 +00:00 · 2020-03-31 13:18:45 -04:00
parent 7abeba6aeb
commit b1aa3b1eea
8 changed files with 99 additions and 175 deletions
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -18,87 +18,43 @@ when not defined(nimHasDefault):
    var v: T
    v

-const freeMarker = 0
-const deletedMarker = -1
-
-type UHash = uint
-
 # hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
 # two procs retain clarity of that encoding without the space cost of an enum.
-proc isFilledAndValid(hcode: Hash): bool {.inline.} =
-  result = hcode != 0 and hcode != deletedMarker
-    # performance: we could use bit magic if needed
+proc isEmpty(hcode: Hash): bool {.inline.} =
+  result = hcode == 0

 proc isFilled(hcode: Hash): bool {.inline.} =
  result = hcode != 0

-
-proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
-  result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
-
-proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
-  # FACTOR between hashcommon.nextTry, intsets.nextTry
-  # an optimization would be to use `(h + 1) and maxHash` for a few iterations
-  # and then switch to the formula below, to get "best of both worlds": good
-  # cache locality, except when a collision cluster is detected (ie, large number
-  # of iterations).
-  const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)`
-  result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
-  perturb = perturb shr PERTURB_SHIFT
+proc nextTry(h, maxHash: Hash): Hash {.inline.} =
+  result = (h + 1) and maxHash

 proc mustRehash[T](t: T): bool {.inline.} =
-  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
-  let counter2 = t.counter + t.countDeleted
-  let length = t.dataLen
-  assert(length > counter2)
-  result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize`
+  assert(t.dataLen > t.counter)
+  result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4)

 proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of `initialSize` to support `count` items.
+  ## Return the value of ``initialSize`` to support ``count`` items.
  ##
  ## If more items are expected to be added, simply add that
  ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want `mustRehash(t) == false` for t that was just resized.
+  #
  # Make sure to synchronize with `mustRehash`
  result = nextPowerOfTwo(count * 3 div 2 + 4)

-template getPerturb(t: typed, hc: Hash): UHash =
-  # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
-  # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
-  let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
-  # this makes a major difference for cases like #13393; it causes the bits
-  # that were masked out in 1st position so they'll be masked in instead, and
-  # influence the recursion in nextTry earlier rather than later.
-  translateBits(cast[uint](hc), numBitsMask)
-
 template rawGetKnownHCImpl() {.dirty.} =
  if t.dataLen == 0:
    return -1
  var h: Hash = hc and maxHash(t) # start with real hash value
-  var perturb = t.getPerturb(hc)
-  var deletedIndex = -1
-  while true:
-    if isFilledAndValid(t.data[h].hcode):
-      # Compare hc THEN key with boolean short circuit. This makes the common case
-      # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
-      # It does slow down succeeding lookups by one extra Hash cmp&and..usually
-      # just a few clock cycles, generally worth it for any non-integer-like A.
-      # performance: we optimize this: depending on type(key), skip hc comparison
-      if t.data[h].hcode == hc and t.data[h].key == key:
-        return h
-      h = nextTry(h, maxHash(t), perturb)
-    elif t.data[h].hcode == deletedMarker:
-      if deletedIndex == -1:
-        deletedIndex = h
-      h = nextTry(h, maxHash(t), perturb)
-    else:
-      break
-  if deletedIndex == -1:
-    result = -1 - h # < 0 => MISSING; insert idx = -1 - result
-  else:
-    # we prefer returning a (in fact the 1st found) deleted index
-    result = -1 - deletedIndex
+  while isFilled(t.data[h].hcode):
+    # Compare hc THEN key with boolean short circuit. This makes the common case
+    # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
+    # It does slow down succeeding lookups by one extra Hash cmp&and..usually
+    # just a few clock cycles, generally worth it for any non-integer-like A.
+    if t.data[h].hcode == hc and t.data[h].key == key:
+      return h
+    h = nextTry(h, maxHash(t))
+  result = -1 - h # < 0 => MISSING; insert idx = -1 - result

 proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
  rawGetKnownHCImpl()
@@ -107,8 +63,6 @@ template genHashImpl(key, hc: typed) =
  hc = hash(key)
  if hc == 0: # This almost never taken branch should be very predictable.
    hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
-  elif hc == deletedMarker:
-    hc = 214159261

 template genHash(key: typed): Hash =
  var res: Hash
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -46,20 +46,16 @@ type
  IntSet* = object       ## An efficient set of `int` implemented as a sparse bit set.
    elems: int           # only valid for small numbers
    counter, max: int
-    countDeleted: int
    head: PTrunk
    data: TrunkSeq
    a: array[0..33, int] # profiling shows that 34 elements are enough

 proc mustRehash[T](t: T): bool {.inline.} =
-  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
-  let counter2 = t.counter + t.countDeleted
  let length = t.max + 1
-  assert length > counter2
-  result = (length * 2 < counter2 * 3) or (length - counter2 < 4)
+  assert length > t.counter
+  result = (length * 2 < t.counter * 3) or (length - t.counter < 4)

 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
-  # FACTOR between hashcommon.nextTry, intsets.nextTry
  const PERTURB_SHIFT = 5
  var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
  perturb = cast[Hash](perturb2)
--- a/lib/pure/collections/setimpl.nim
+++ b/lib/pure/collections/setimpl.nim
@@ -35,11 +35,10 @@ proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,

 proc enlarge[A](s: var HashSet[A]) =
  var n: KeyValuePairSeq[A]
-  newSeq(n, s.counter.rightSize)
+  newSeq(n, len(s.data) * growthFactor)
  swap(s.data, n) # n is now old seq
-  s.countDeleted = 0
  for i in countup(0, high(n)):
-    if isFilledAndValid(n[i].hcode):
+    if isFilled(n[i].hcode):
      var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
      rawInsert(s, s.data, n[i].key, n[i].hcode, j)

@@ -69,6 +68,11 @@ template containsOrInclImpl() {.dirty.} =
    rawInsert(s, s.data, key, hc, -1 - index)
    inc(s.counter)

+template doWhile(a, b) =
+  while true:
+    b
+    if not a: break
+
 proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
  var hc: Hash
  var i = rawGet(s, key, hc)
@@ -78,9 +82,17 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
  if i >= 0:
    result = false
    dec(s.counter)
-    inc(s.countDeleted)
-    s.data[i].hcode = deletedMarker
-    s.data[i].key = default(type(s.data[i].key))
+    while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
+      var j = i # The correctness of this depends on (h+1) in nextTry,
+      var r = j # though may be adaptable to other simple sequences.
+      s.data[i].hcode = 0 # mark current EMPTY
+      s.data[i].key = default(type(s.data[i].key))
+      doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
+        i = (i + 1) and msk # increment mod table size
+        if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
+          return
+        r = s.data[i].hcode and msk # "home" location of key@i
+      s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop

 template dollarImpl() {.dirty.} =
  result = "{"
@@ -113,7 +125,7 @@ proc enlarge[A](s: var OrderedSet[A]) =
  swap(s.data, n)
  while h >= 0:
    var nxt = n[h].next
-    if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
+    if isFilled(n[h].hcode):
      var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
      rawInsert(s, s.data, n[h].key, n[h].hcode, j)
    h = nxt
@@ -131,7 +143,7 @@ proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} =
  result = true
  while h >= 0:
    var nxt = n[h].next
-    if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
+    if isFilled(n[h].hcode):
      if n[h].hcode == hc and n[h].key == key:
        dec s.counter
        result = false
--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -68,7 +68,6 @@ type
    ## before calling other procs on it.
    data: KeyValuePairSeq[A]
    counter: int
-    countDeleted: int

 type
  OrderedKeyValuePair[A] = tuple[
@@ -81,7 +80,6 @@ type
    ## <#initOrderedSet,int>`_ before calling other procs on it.
    data: OrderedKeyValuePairSeq[A]
    counter, first, last: int
-    countDeleted: int

 const
  defaultInitialSize* = 64
@@ -249,7 +247,7 @@ iterator items*[A](s: HashSet[A]): A =
  ##   echo b
  ##   # --> {(a: 1, b: 3), (a: 0, b: 4)}
  for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode): yield s.data[h].key
+    if isFilled(s.data[h].hcode): yield s.data[h].key

 proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
  ## Includes `key` in the set `s` and tells if `key` was already in `s`.
@@ -341,7 +339,7 @@ proc pop*[A](s: var HashSet[A]): A =
    doAssertRaises(KeyError, echo s.pop)

  for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode):
+    if isFilled(s.data[h].hcode):
      result = s.data[h].key
      excl(s, result)
      return result
@@ -574,8 +572,7 @@ proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
 proc hash*[A](s: HashSet[A]): Hash =
  ## Hashing of HashSet.
  for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode):
-      result = result xor s.data[h].hcode
+    result = result xor s.data[h].hcode
  result = !$result

 proc `$`*[A](s: HashSet[A]): string =
@@ -593,6 +590,7 @@ proc `$`*[A](s: HashSet[A]): string =
  ##   # --> {no, esc'aping, is " provided}
  dollarImpl()

+
 proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated:
     "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize)

@@ -624,7 +622,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
    var idx = 0
    while h >= 0:
      var nxt = s.data[h].next
-      if isFilledAndValid(s.data[h].hcode):
+      if isFilled(s.data[h].hcode):
        yieldStmt
        inc(idx)
      h = nxt
@@ -858,7 +856,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
  while h >= 0 and g >= 0:
    var nxh = s.data[h].next
    var nxg = t.data[g].next
-    if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode):
+    if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
      if s.data[h].key == t.data[g].key:
        inc compared
      else:
--- a/lib/pure/collections/sharedtables.nim
+++ b/lib/pure/collections/sharedtables.nim
@@ -25,7 +25,6 @@ type
  SharedTable*[A, B] = object ## generic hash SharedTable
    data: KeyValuePairSeq[A, B]
    counter, dataLen: int
-    countDeleted: int
    lock: Lock

 template maxHash(t): untyped = t.dataLen-1
@@ -50,10 +49,9 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
  for i in 0..<oldSize:
    let eh = n[i].hcode
    if isFilled(eh):
-      var perturb = t.getPerturb(eh)
      var j: Hash = eh and maxHash(t)
      while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t), perturb)
+        j = nextTry(j, maxHash(t))
      rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
  deallocShared(n)

--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -14,20 +14,13 @@ include hashcommon
 template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
  genHashImpl(key, hc)
  var h: Hash = hc and maxHash(t)
-  var perturb = t.getPerturb(hc)
-  while true:
-    let hcode = t.data[h].hcode
-    if hcode == deletedMarker or hcode == freeMarker:
-      break
-    else:
-      h = nextTry(h, maxHash(t), perturb)
+  while isFilled(t.data[h].hcode):
+    h = nextTry(h, maxHash(t))
  result = h

-template rawInsertImpl(t) {.dirty.} =
+template rawInsertImpl() {.dirty.} =
  data[h].key = key
  data[h].val = val
-  if data[h].hcode == deletedMarker:
-    t.countDeleted.dec
  data[h].hcode = hc

 proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
@@ -35,7 +28,7 @@ proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =

 proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
                     key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl(t)
+  rawInsertImpl()

 template checkIfInitialized() =
  when compiles(defaultInitialSize):
@@ -51,6 +44,7 @@ template addImpl(enlarge) {.dirty.} =
  inc(t.counter)

 template maybeRehashPutImpl(enlarge) {.dirty.} =
+  checkIfInitialized()
  if mustRehash(t):
    enlarge(t)
    index = rawGetKnownHC(t, key, hc)
@@ -88,11 +82,24 @@ template delImplIdx(t, i) =
  let msk = maxHash(t)
  if i >= 0:
    dec(t.counter)
-    inc(t.countDeleted)
-    t.data[i].hcode = deletedMarker
-    t.data[i].key = default(type(t.data[i].key))
-    t.data[i].val = default(type(t.data[i].val))
-    # mustRehash + enlarge not needed because counter+countDeleted doesn't change
+    block outer:
+      while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
+        var j = i         # The correctness of this depends on (h+1) in nextTry,
+        var r = j         # though may be adaptable to other simple sequences.
+        t.data[i].hcode = 0              # mark current EMPTY
+        t.data[i].key = default(type(t.data[i].key))
+        t.data[i].val = default(type(t.data[i].val))
+        while true:
+          i = (i + 1) and msk            # increment mod table size
+          if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
+            break outer
+          r = t.data[i].hcode and msk    # "home" location of key@i
+          if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
+            break
+        when defined(js):
+          t.data[j] = t.data[i]
+        else:
+          t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop

 template delImpl() {.dirty.} =
  var hc: Hash
@@ -108,7 +115,6 @@ template clearImpl() {.dirty.} =
  t.counter = 0

 template ctAnd(a, b): bool =
-  # pending https://github.com/nim-lang/Nim/issues/13502
  when a:
    when b: true
    else: false
@@ -126,7 +132,7 @@ template initImpl(result: typed, size: int) =
      result.last = -1

 template insertImpl() = # for CountTable
-  checkIfInitialized()
+  if t.dataLen == 0: initImpl(t, defaultInitialSize)
  if mustRehash(t): enlarge(t)
  ctRawInsert(t, t.data, key, val)
  inc(t.counter)
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -233,7 +233,6 @@ type
    ## For creating an empty Table, use `initTable proc<#initTable,int>`_.
    data: KeyValuePairSeq[A, B]
    counter: int
-    countDeleted: int
  TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_.
    ##
    ## For creating a new empty TableRef, use `newTable proc
@@ -266,16 +265,14 @@ template get(t, key): untyped =

 proc enlarge[A, B](t: var Table[A, B]) =
  var n: KeyValuePairSeq[A, B]
-  newSeq(n, t.counter.rightSize)
+  newSeq(n, len(t.data) * growthFactor)
  swap(t.data, n)
-  t.countDeleted = 0
  for i in countup(0, high(n)):
    let eh = n[i].hcode
-    if isFilledAndValid(eh):
+    if isFilled(eh):
      var j: Hash = eh and maxHash(t)
-      var perturb = t.getPerturb(eh)
      while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t), perturb)
+        j = nextTry(j, maxHash(t))
      when defined(js):
        rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
      else:
@@ -662,7 +659,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) =
  ##   # value: [1, 5, 7, 9]
  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield (t.data[h].key, t.data[h].val)
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -684,7 +681,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield (t.data[h].key, t.data[h].val)
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -705,7 +702,7 @@ iterator keys*[A, B](t: Table[A, B]): A =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield t.data[h].key
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -726,7 +723,7 @@ iterator values*[A, B](t: Table[A, B]): B =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield t.data[h].val
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -748,27 +745,10 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield t.data[h].val
      assert(len(t) == L, "the length of the table changed while iterating over it")

-template hasKeyOrPutCache(cache, h): bool =
-  # using `IntSet` would be an option worth considering to avoid quadratic
-  # behavior in case user misuses Table with lots of duplicate keys; but it
-  # has overhead in the common case of small number of duplicates.
-  # However: when lots of duplicates are used, all operations would be slow
-  # anyway because the full `hash(key)` is identical for these, which makes
-  # `nextTry` follow the exact same path for each key, resulting in large
-  # collision clusters. Alternatives could involve modifying the hash/retrieval
-  # based on duplicate key count.
-  var ret = false
-  for hi in cache:
-    if hi == h:
-      ret = true
-      break
-  if not ret: cache.add h
-  ret
-
 iterator allValues*[A, B](t: Table[A, B]; key: A): B =
  ## Iterates over any value in the table ``t`` that belongs to the given ``key``.
  ##
@@ -782,20 +762,13 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
    for i in 1..3: a.add('z', 10*i)
    doAssert toSeq(a.pairs).sorted == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)]
    doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30]
-
-  let hc = genHash(key)
-  var h: Hash = hc and high(t.data)
+  var h: Hash = genHash(key) and high(t.data)
  let L = len(t)
-  var perturb = t.getPerturb(hc)
-
-  var num = 0
-  var cache: seq[Hash]
-  while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues`
-    if t.data[h].hcode == hc and t.data[h].key == key:
-      if not hasKeyOrPutCache(cache, h):
-        yield t.data[h].val
-        assert(len(t) == L, "the length of the table changed while iterating over it")
-    h = nextTry(h, high(t.data), perturb)
+  while isFilled(t.data[h].hcode):
+    if t.data[h].key == key:
+      yield t.data[h].val
+      assert(len(t) == L, "the length of the table changed while iterating over it")
+    h = nextTry(h, high(t.data))



@@ -1118,7 +1091,7 @@ iterator pairs*[A, B](t: TableRef[A, B]): (A, B) =
  ##   # value: [1, 5, 7, 9]
  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield (t.data[h].key, t.data[h].val)
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -1140,7 +1113,7 @@ iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield (t.data[h].key, t.data[h].val)
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -1161,7 +1134,7 @@ iterator keys*[A, B](t: TableRef[A, B]): A =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield t.data[h].key
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -1182,7 +1155,7 @@ iterator values*[A, B](t: TableRef[A, B]): B =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield t.data[h].val
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -1203,7 +1176,7 @@ iterator mvalues*[A, B](t: TableRef[A, B]): var B =

  let L = len(t)
  for h in 0 .. high(t.data):
-    if isFilledAndValid(t.data[h].hcode):
+    if isFilled(t.data[h].hcode):
      yield t.data[h].val
      assert(len(t) == L, "the length of the table changed while iterating over it")

@@ -1229,8 +1202,6 @@ type
    ## <#initOrderedTable,int>`_.
    data: OrderedKeyValuePairSeq[A, B]
    counter, first, last: int
-    countDeleted: int
-
  OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of
    ## `OrderedTable<#OrderedTable>`_.
    ##
@@ -1252,7 +1223,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int =
 proc rawInsert[A, B](t: var OrderedTable[A, B],
                     data: var OrderedKeyValuePairSeq[A, B],
                     key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl(t)
+  rawInsertImpl()
  data[h].next = -1
  if t.first < 0: t.first = h
  if t.last >= 0: data[t.last].next = h
@@ -1260,20 +1231,18 @@ proc rawInsert[A, B](t: var OrderedTable[A, B],

 proc enlarge[A, B](t: var OrderedTable[A, B]) =
  var n: OrderedKeyValuePairSeq[A, B]
-  newSeq(n, t.counter.rightSize)
+  newSeq(n, len(t.data) * growthFactor)
  var h = t.first
  t.first = -1
  t.last = -1
  swap(t.data, n)
-  t.countDeleted = 0
  while h >= 0:
    var nxt = n[h].next
    let eh = n[h].hcode
-    if isFilledAndValid(eh):
+    if isFilled(eh):
      var j: Hash = eh and maxHash(t)
-      var perturb = t.getPerturb(eh)
      while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t), perturb)
+        j = nextTry(j, maxHash(t))
      rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j)
    h = nxt

@@ -1282,10 +1251,6 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
    var h = t.first
    while h >= 0:
      var nxt = t.data[h].next
-       # For OrderedTable/OrderedTableRef, isFilled is ok because `del` is O(n)
-       # and doesn't create tombsones, but if it does start using tombstones,
-       # carefully replace `isFilled` by `isFilledAndValid` as appropriate for these
-       # table types only, ditto with `OrderedSet`.
      if isFilled(t.data[h].hcode):
        yieldStmt
      h = nxt
@@ -2229,7 +2194,6 @@ type
    ## <#initCountTable,int>`_.
    data: seq[tuple[key: A, val: int]]
    counter: int
-    countDeleted: int
    isSorted: bool
  CountTableRef*[A] = ref CountTable[A] ## Ref version of
    ## `CountTable<#CountTable>`_.
@@ -2242,10 +2206,8 @@ type

 proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
                  key: A, val: int) =
-  let hc = hash(key)
-  var perturb = t.getPerturb(hc)
-  var h: Hash = hc and high(data)
-  while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
+  var h: Hash = hash(key) and high(data)
+  while data[h].val != 0: h = nextTry(h, high(data))
  data[h].key = key
  data[h].val = val

@@ -2272,12 +2234,10 @@ proc remove[A](t: var CountTable[A], key: A) =
 proc rawGet[A](t: CountTable[A], key: A): int =
  if t.data.len == 0:
    return -1
-  let hc = hash(key)
-  var perturb = t.getPerturb(hc)
-  var h: Hash = hc and high(t.data) # start with real hash value
-  while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
+  var h: Hash = hash(key) and high(t.data) # start with real hash value
+  while t.data[h].val != 0:
    if t.data[h].key == key: return h
-    h = nextTry(h, high(t.data), perturb)
+    h = nextTry(h, high(t.data))
  result = -1 - h # < 0 => MISSING; insert idx = -1 - result

 template ctget(t, key, default: untyped): untyped =
--- a/tests/arc/trepr.nim
+++ b/tests/arc/trepr.nim
@@ -1,7 +1,7 @@
 discard """
  cmd: "nim c --gc:arc $file"
  nimout: '''(a: true, n: doAssert)
-Table[system.string, trepr.MyType](data: @[], counter: 0, countDeleted: 0)
+Table[system.string, trepr.MyType](data: @[], counter: 0)
 nil
 '''
 """