Nim/lib/pure/collections/sets.nim

#
#
#            Nim's Runtime Library
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## The ``sets`` module implements an efficient `hash set`:idx: and
## ordered hash set.
##
## Hash sets are different from the `built in set type
## <manual.html#set-type>`_. Sets allow you to store any value that can be
## `hashed <hashes.html>`_ and they don't contain duplicate entries.
##
## **Note**: The data types declared here have *value semantics*: This means
## that ``=`` performs a copy of the set.

import
  hashes, math

{.pragma: myShallow.}
when not defined(nimhygiene):
  {.pragma: dirty.}

# For "integer-like A" that are too big for intsets/bit-vectors to be practical,
# it would be best to shrink hcode to the same size as the integer.  Larger
# codes should never be needed, and this can pack more entries per cache-line.
# Losing hcode entirely is also possible - if some element value is forbidden.
type
  KeyValuePair[A] = tuple[hcode: Hash, key: A]
  KeyValuePairSeq[A] = seq[KeyValuePair[A]]
  HashSet* {.myShallow.}[A] = object ## \
    ## A generic hash set.
    ##
    ## Use `init() <#init,HashSet[A],int>`_ or `initSet[type]() <#initSet>`_
    ## before calling other procs on it.
    data: KeyValuePairSeq[A]
    counter: int

{.deprecated: [TSet: HashSet].}

template default[T](t: typedesc[T]): T =
  ## Used by clear methods to get a default value.
  var v: T
  v

proc clear*[A](s: var HashSet[A]) =
  ## Clears the HashSet back to an empty state, without shrinking
  ## any of the existing storage. O(n) where n is the size of the hash bucket.
  s.counter = 0
  for i in 0..<s.data.len:
    s.data[i].hcode = 0
    s.data[i].key   = default(type(s.data[i].key))

# hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
# two procs retain clarity of that encoding without the space cost of an enum.
proc isEmpty(hcode: Hash): bool {.inline.} =
  result = hcode == 0

proc isFilled(hcode: Hash): bool {.inline.} =
  result = hcode != 0

proc isValid*[A](s: HashSet[A]): bool =
  ## Returns `true` if the set has been initialized with `initSet <#initSet>`_.
  ##
  ## Most operations over an uninitialized set will crash at runtime and
  ## `assert <system.html#assert>`_ in debug builds. You can use this proc in
  ## your own procs to verify that sets passed to your procs are correctly
  ## initialized. Example:
  ##
  ## .. code-block ::
  ##   proc savePreferences(options: HashSet[string]) =
  ##     assert options.isValid, "Pass an initialized set!"
  ##     # Do stuff here, may crash in release builds!
  result = not s.data.isNil

proc len*[A](s: HashSet[A]): int =
  ## Returns the number of keys in `s`.
  ##
  ## Due to an implementation detail you can call this proc on variables which
  ## have not been initialized yet. The proc will return zero as the length
  ## then. Example:
  ##
  ## .. code-block::
  ##
  ##   var values: HashSet[int]
  ##   assert(not values.isValid)
  ##   assert values.len == 0
  result = s.counter

proc card*[A](s: HashSet[A]): int =
  ## Alias for `len() <#len,TSet[A]>`_.
  ##
  ## Card stands for the `cardinality
  ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set.
  result = s.counter

iterator items*[A](s: HashSet[A]): A =
  ## Iterates over keys in the set `s`.
  ##
  ## If you need a sequence with the keys you can use `sequtils.toSeq()
  ## <sequtils.html#toSeq>`_ on the iterator. Usage example:
  ##
  ## .. code-block::
  ##   type
  ##     pair = tuple[a, b: int]
  ##   var
  ##     a, b = initSet[pair]()
  ##   a.incl((2, 3))
  ##   a.incl((3, 2))
  ##   a.incl((2, 3))
  ##   for x, y in a.items:
  ##     b.incl((x - 2, y + 1))
  ##   assert a.len == 2
  ##   echo b
  ##   # --> {(a: 1, b: 3), (a: 0, b: 4)}
  assert s.isValid, "The set needs to be initialized."
  for h in 0..high(s.data):
    if isFilled(s.data[h].hcode): yield s.data[h].key

const
  growthFactor = 2

proc mustRehash(length, counter: int): bool {.inline.} =
  assert(length > counter)
  result = (length * 2 < counter * 3) or (length - counter < 4)

proc rightSize*(count: Natural): int {.inline.} =
  ## Return the value of `initialSize` to support `count` items.
  ##
  ## If more items are expected to be added, simply add that
  ## expected extra amount to the parameter before calling this.
  ##
  ## Internally, we want mustRehash(rightSize(x), x) == false.
  result = nextPowerOfTwo(count * 3 div 2  +  4)

proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  result = (h + 1) and maxHash

template rawGetKnownHCImpl() {.dirty.} =
  var h: Hash = hc and high(s.data)  # start with real hash value
  while isFilled(s.data[h].hcode):
    # Compare hc THEN key with boolean short circuit. This makes the common case
    # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
    # It does slow down succeeding lookups by one extra Hash cmp&and..usually
    # just a few clock cycles, generally worth it for any non-integer-like A.
    if s.data[h].hcode == hc and s.data[h].key == key:  # compare hc THEN key
      return h
    h = nextTry(h, high(s.data))
  result = -1 - h                   # < 0 => MISSING; insert idx = -1 - result

template rawGetImpl() {.dirty.} =
  hc = hash(key)
  if hc == 0:       # This almost never taken branch should be very predictable.
    hc = 314159265  # Value doesn't matter; Any non-zero favorite is fine.
  rawGetKnownHCImpl()

template rawInsertImpl() {.dirty.} =
  data[h].key = key
  data[h].hcode = hc

proc rawGetKnownHC[A](s: HashSet[A], key: A, hc: Hash): int {.inline.} =
  rawGetKnownHCImpl()

proc rawGet[A](s: HashSet[A], key: A, hc: var Hash): int {.inline.} =
  rawGetImpl()

proc `[]`*[A](s: var HashSet[A], key: A): var A =
  ## returns the element that is actually stored in 's' which has the same
  ## value as 'key' or raises the ``KeyError`` exception. This is useful
  ## when one overloaded 'hash' and '==' but still needs reference semantics
  ## for sharing.
  assert s.isValid, "The set needs to be initialized."
  var hc: Hash
  var index = rawGet(s, key, hc)
  if index >= 0: result = s.data[index].key
  else:
    when compiles($key):
      raise newException(KeyError, "key not found: " & $key)
    else:
      raise newException(KeyError, "key not found")

proc mget*[A](s: var HashSet[A], key: A): var A {.deprecated.} =
  ## returns the element that is actually stored in 's' which has the same
  ## value as 'key' or raises the ``KeyError`` exception. This is useful
  ## when one overloaded 'hash' and '==' but still needs reference semantics
  ## for sharing. Use ```[]``` instead.
  s[key]

proc contains*[A](s: HashSet[A], key: A): bool =
  ## Returns true iff `key` is in `s`.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var values = initSet[int]()
  ##   assert(not values.contains(2))
  ##   values.incl(2)
  ##   assert values.contains(2)
  ##   values.excl(2)
  ##   assert(not values.contains(2))
  assert s.isValid, "The set needs to be initialized."
  var hc: Hash
  var index = rawGet(s, key, hc)
  result = index >= 0

proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,
                  hc: Hash, h: Hash) =
  rawInsertImpl()

proc enlarge[A](s: var HashSet[A]) =
  var n: KeyValuePairSeq[A]
  newSeq(n, len(s.data) * growthFactor)
  swap(s.data, n)                   # n is now old seq
  for i in countup(0, high(n)):
    if isFilled(n[i].hcode):
      var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
      rawInsert(s, s.data, n[i].key, n[i].hcode, j)

template inclImpl() {.dirty.} =
  var hc: Hash
  var index = rawGet(s, key, hc)
  if index < 0:
    if mustRehash(len(s.data), s.counter):
      enlarge(s)
      index = rawGetKnownHC(s, key, hc)
    rawInsert(s, s.data, key, hc, -1 - index)
    inc(s.counter)

template containsOrInclImpl() {.dirty.} =
  var hc: Hash
  var index = rawGet(s, key, hc)
  if index >= 0:
    result = true
  else:
    if mustRehash(len(s.data), s.counter):
      enlarge(s)
      index = rawGetKnownHC(s, key, hc)
    rawInsert(s, s.data, key, hc, -1 - index)
    inc(s.counter)

proc incl*[A](s: var HashSet[A], key: A) =
  ## Includes an element `key` in `s`.
  ##
  ## This doesn't do anything if `key` is already in `s`. Example:
  ##
  ## .. code-block::
  ##   var values = initSet[int]()
  ##   values.incl(2)
  ##   values.incl(2)
  ##   assert values.len == 1
  assert s.isValid, "The set needs to be initialized."
  inclImpl()

proc incl*[A](s: var HashSet[A], other: HashSet[A]) =
  ## Includes all elements from `other` into `s`.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var values = initSet[int]()
  ##   values.incl(2)
  ##   var others = toSet([6, 7])
  ##   values.incl(others)
  ##   assert values.len == 3
  assert s.isValid, "The set `s` needs to be initialized."
  assert other.isValid, "The set `other` needs to be initialized."
  for item in other: incl(s, item)

template doWhile(a, b) =
  while true:
    b
    if not a: break

template default[T](t: typedesc[T]): T =
  var v: T
  v

proc exclImpl[A](s: var HashSet[A], key: A) : bool {. inline .} =
  assert s.isValid, "The set needs to be initialized."
  var hc: Hash
  var i = rawGet(s, key, hc)
  var msk = high(s.data)
  result = true

  if i >= 0:
    result = false
    dec(s.counter)
    while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
      var j = i         # The correctness of this depends on (h+1) in nextTry,
      var r = j         # though may be adaptable to other simple sequences.
      s.data[i].hcode = 0              # mark current EMPTY
      s.data[i].key = default(type(s.data[i].key))
      doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
        i = (i + 1) and msk            # increment mod table size
        if isEmpty(s.data[i].hcode):   # end of collision cluster; So all done
          return
        r = s.data[i].hcode and msk    # "home" location of key@i
      shallowCopy(s.data[j], s.data[i]) # data[i] will be marked EMPTY next loop

proc missingOrExcl*[A](s: var HashSet[A], key: A): bool =
  ## Excludes `key` in the set `s` and tells if `key` was removed from `s`.
  ##
  ## The difference with regards to the `excl() <#excl,TSet[A],A>`_ proc is
  ## that this proc returns `true` if `key` was not present in `s`. Example:
  ##
  ## .. code-block::
  ##  var s = toSet([2, 3, 6, 7])
  ##  assert s.missingOrExcl(4) == true
  ##  assert s.missingOrExcl(6) == false
  exclImpl(s, key)

proc excl*[A](s: var HashSet[A], key: A) =
  ## Excludes `key` from the set `s`.
  ##
  ## This doesn't do anything if `key` is not found in `s`. Example:
  ##
  ## .. code-block::
  ##   var s = toSet([2, 3, 6, 7])
  ##   s.excl(2)
  ##   s.excl(2)
  ##   assert s.len == 3
  discard exclImpl(s, key)

proc excl*[A](s: var HashSet[A], other: HashSet[A]) =
  ## Excludes everything in `other` from `s`.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var
  ##     numbers = toSet([1, 2, 3, 4, 5])
  ##     even = toSet([2, 4, 6, 8])
  ##   numbers.excl(even)
  ##   echo numbers
  ##   # --> {1, 3, 5}
  assert s.isValid, "The set `s` needs to be initialized."
  assert other.isValid, "The set `other` needs to be initialized."
  for item in other: discard exclImpl(s, item)

proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
  ## Includes `key` in the set `s` and tells if `key` was added to `s`.
  ##
  ## The difference with regards to the `incl() <#incl,TSet[A],A>`_ proc is
  ## that this proc returns `true` if `key` was already present in `s`. The
  ## proc will return false if `key` was added as a new value to `s` during
  ## this call. Example:
  ##
  ## .. code-block::
  ##   var values = initSet[int]()
  ##   assert values.containsOrIncl(2) == false
  ##   assert values.containsOrIncl(2) == true
  assert s.isValid, "The set needs to be initialized."
  containsOrInclImpl()

proc init*[A](s: var HashSet[A], initialSize=64) =
  ## Initializes a hash set.
  ##
  ## The `initialSize` parameter needs to be a power of two. You can use
  ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to
  ## guarantee that at runtime. All set variables must be initialized before
  ## use with other procs from this module with the exception of `isValid()
  ## <#isValid,TSet[A]>`_ and `len() <#len,TSet[A]>`_.
  ##
  ## You can call this proc on a previously initialized hash set, which will
  ## discard all its values. This might be more convenient than iterating over
  ## existing values and calling `excl() <#excl,TSet[A],A>`_ on them. Example:
  ##
  ## .. code-block ::
  ##   var a: HashSet[int]
  ##   a.init(4)
  ##   a.incl(2)
  ##   a.init
  ##   assert a.len == 0 and a.isValid
  assert isPowerOfTwo(initialSize)
  s.counter = 0
  newSeq(s.data, initialSize)

proc initSet*[A](initialSize=64): HashSet[A] =
  ## Wrapper around `init() <#init,TSet[A],int>`_ for initialization of hash
  ## sets.
  ##
  ## Returns an empty hash set you can assign directly in ``var`` blocks in a
  ## single line. Example:
  ##
  ## .. code-block ::
  ##   var a = initSet[int](4)
  ##   a.incl(2)
  result.init(initialSize)

proc toSet*[A](keys: openArray[A]): HashSet[A] =
  ## Creates a new hash set that contains the given `keys`.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var numbers = toSet([1, 2, 3, 4, 5])
  ##   assert numbers.contains(2)
  ##   assert numbers.contains(4)
  result = initSet[A](rightSize(keys.len))
  for key in items(keys): result.incl(key)

template dollarImpl() {.dirty.} =
  result = "{"
  for key in items(s):
    if result.len > 1: result.add(", ")
    result.add($key)
  result.add("}")

proc `$`*[A](s: HashSet[A]): string =
  ## Converts the set `s` to a string, mostly for logging purposes.
  ##
  ## Don't use this proc for serialization, the representation may change at
  ## any moment and values are not escaped. Example:
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   echo toSet([2, 4, 5])
  ##   # --> {2, 4, 5}
  ##   echo toSet(["no", "esc'aping", "is \" provided"])
  ##   # --> {no, esc'aping, is " provided}
  assert s.isValid, "The set needs to be initialized."
  dollarImpl()

proc union*[A](s1, s2: HashSet[A]): HashSet[A] =
  ## Returns the union of the sets `s1` and `s2`.
  ##
  ## The union of two sets is represented mathematically as *A ∪ B* and is the
  ## set of all objects that are members of `s1`, `s2` or both. Example:
  ##
  ## .. code-block::
  ##   var
  ##     a = toSet(["a", "b"])
  ##     b = toSet(["b", "c"])
  ##     c = union(a, b)
  ##   assert c == toSet(["a", "b", "c"])
  assert s1.isValid, "The set `s1` needs to be initialized."
  assert s2.isValid, "The set `s2` needs to be initialized."
  result = s1
  incl(result, s2)

proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] =
  ## Returns the intersection of the sets `s1` and `s2`.
  ##
  ## The intersection of two sets is represented mathematically as *A ∩ B* and
  ## is the set of all objects that are members of `s1` and `s2` at the same
  ## time. Example:
  ##
  ## .. code-block::
  ##   var
  ##     a = toSet(["a", "b"])
  ##     b = toSet(["b", "c"])
  ##     c = intersection(a, b)
  ##   assert c == toSet(["b"])
  assert s1.isValid, "The set `s1` needs to be initialized."
  assert s2.isValid, "The set `s2` needs to be initialized."
  result = initSet[A](min(s1.data.len, s2.data.len))
  for item in s1:
    if item in s2: incl(result, item)

proc difference*[A](s1, s2: HashSet[A]): HashSet[A] =
  ## Returns the difference of the sets `s1` and `s2`.
  ##
  ## The difference of two sets is represented mathematically as *A \ B* and is
  ## the set of all objects that are members of `s1` and not members of `s2`.
  ## Example:
  ##
  ## .. code-block::
  ##   var
  ##     a = toSet(["a", "b"])
  ##     b = toSet(["b", "c"])
  ##     c = difference(a, b)
  ##   assert c == toSet(["a"])
  assert s1.isValid, "The set `s1` needs to be initialized."
  assert s2.isValid, "The set `s2` needs to be initialized."
  result = initSet[A]()
  for item in s1:
    if not contains(s2, item):
      incl(result, item)

proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] =
  ## Returns the symmetric difference of the sets `s1` and `s2`.
  ##
  ## The symmetric difference of two sets is represented mathematically as *A △
  ## B* or *A ⊖ B* and is the set of all objects that are members of `s1` or
  ## `s2` but not both at the same time. Example:
  ##
  ## .. code-block::
  ##   var
  ##     a = toSet(["a", "b"])
  ##     b = toSet(["b", "c"])
  ##     c = symmetricDifference(a, b)
  ##   assert c == toSet(["a", "c"])
  assert s1.isValid, "The set `s1` needs to be initialized."
  assert s2.isValid, "The set `s2` needs to be initialized."
  result = s1
  for item in s2:
    if containsOrIncl(result, item): excl(result, item)

proc `+`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  ## Alias for `union(s1, s2) <#union>`_.
  result = union(s1, s2)

proc `*`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  ## Alias for `intersection(s1, s2) <#intersection>`_.
  result = intersection(s1, s2)

proc `-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  ## Alias for `difference(s1, s2) <#difference>`_.
  result = difference(s1, s2)

proc `-+-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} =
  ## Alias for `symmetricDifference(s1, s2) <#symmetricDifference>`_.
  result = symmetricDifference(s1, s2)

proc disjoint*[A](s1, s2: HashSet[A]): bool =
  ## Returns true iff the sets `s1` and `s2` have no items in common.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var
  ##     a = toSet(["a", "b"])
  ##     b = toSet(["b", "c"])
  ##   assert disjoint(a, b) == false
  ##   assert disjoint(a, b - a) == true
  assert s1.isValid, "The set `s1` needs to be initialized."
  assert s2.isValid, "The set `s2` needs to be initialized."
  for item in s1:
    if item in s2: return false
  return true

proc `<`*[A](s, t: HashSet[A]): bool =
  ## Returns true if `s` is a strict or proper subset of `t`.
  ##
  ## A strict or proper subset `s` has all of its members in `t` but `t` has
  ## more elements than `s`. Example:
  ##
  ## .. code-block::
  ##   var
  ##     a = toSet(["a", "b"])
  ##     b = toSet(["b", "c"])
  ##     c = intersection(a, b)
  ##   assert c < a and c < b
  ##   assert((a < a) == false)
  s.counter != t.counter and s <= t

proc `<=`*[A](s, t: HashSet[A]): bool =
  ## Returns true if `s` is subset of `t`.
  ##
  ## A subset `s` has all of its members in `t` and `t` doesn't necessarily
  ## have more members than `s`. That is, `s` can be equal to `t`. Example:
  ##
  ## .. code-block::
  ##   var
  ##     a = toSet(["a", "b"])
  ##     b = toSet(["b", "c"])
  ##     c = intersection(a, b)
  ##   assert c <= a and c <= b
  ##   assert((a <= a))
  result = false
  if s.counter > t.counter: return
  result = true
  for item in s:
    if not(t.contains(item)):
      result = false
      return

proc `==`*[A](s, t: HashSet[A]): bool =
  ## Returns true if both `s` and `t` have the same members and set size.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var
  ##     a = toSet([1, 2])
  ##     b = toSet([1])
  ##   b.incl(2)
  ##   assert a == b
  s.counter == t.counter and s <= t

proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
  ## Returns a new set after applying `op` on each of the elements of `data`.
  ##
  ## You can use this proc to transform the elements from a set. Example:
  ##
  ## .. code-block::
  ##   var a = toSet([1, 2, 3])
  ##   var b = a.map(proc (x: int): string = $x)
  ##   assert b == toSet(["1", "2", "3"])
  result = initSet[B]()
  for item in data: result.incl(op(item))

# ------------------------------ ordered set ------------------------------

type
  OrderedKeyValuePair[A] = tuple[
    hcode: Hash, next: int, key: A]
  OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]]
  OrderedSet* {.myShallow.}[A] = object ## \
    ## A generic hash set that remembers insertion order.
    ##
    ## Use `init() <#init,OrderedSet[A],int>`_ or `initOrderedSet[type]()
    ## <#initOrderedSet>`_ before calling other procs on it.
    data: OrderedKeyValuePairSeq[A]
    counter, first, last: int

{.deprecated: [TOrderedSet: OrderedSet].}

proc clear*[A](s: var OrderedSet[A]) =
  ## Clears the OrderedSet back to an empty state, without shrinking
  ## any of the existing storage. O(n) where n is the size of the hash bucket.
  s.counter = 0
  s.first = -1
  s.last = -1
  for i in 0..<s.data.len:
    s.data[i].hcode = 0
    s.data[i].next = 0
    s.data[i].key = default(type(s.data[i].key))


proc isValid*[A](s: OrderedSet[A]): bool =
  ## Returns `true` if the ordered set has been initialized with `initSet
  ## <#initOrderedSet>`_.
  ##
  ## Most operations over an uninitialized ordered set will crash at runtime
  ## and `assert <system.html#assert>`_ in debug builds. You can use this proc
  ## in your own procs to verify that ordered sets passed to your procs are
  ## correctly initialized. Example:
  ##
  ## .. code-block::
  ##   proc saveTarotCards(cards: OrderedSet[int]) =
  ##     assert cards.isValid, "Pass an initialized set!"
  ##     # Do stuff here, may crash in release builds!
  result = not s.data.isNil

proc len*[A](s: OrderedSet[A]): int {.inline.} =
  ## Returns the number of keys in `s`.
  ##
  ## Due to an implementation detail you can call this proc on variables which
  ## have not been initialized yet. The proc will return zero as the length
  ## then. Example:
  ##
  ## .. code-block::
  ##
  ##   var values: OrderedSet[int]
  ##   assert(not values.isValid)
  ##   assert values.len == 0
  result = s.counter

proc card*[A](s: OrderedSet[A]): int {.inline.} =
  ## Alias for `len() <#len,TOrderedSet[A]>`_.
  ##
  ## Card stands for the `cardinality
  ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set.
  result = s.counter

template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
  var h = s.first
  var idx = 0
  while h >= 0:
    var nxt = s.data[h].next
    if isFilled(s.data[h].hcode):
      yieldStmt
      inc(idx)
    h = nxt

iterator items*[A](s: OrderedSet[A]): A =
  ## Iterates over keys in the ordered set `s` in insertion order.
  ##
  ## If you need a sequence with the keys you can use `sequtils.toSeq()
  ## <sequtils.html#toSeq>`_ on the iterator. Usage example:
  ##
  ## .. code-block::
  ##   var a = initOrderedSet[int]()
  ##   for value in [9, 2, 1, 5, 1, 8, 4, 2]:
  ##     a.incl(value)
  ##   for value in a.items:
  ##     echo "Got ", value
  ##   # --> Got 9
  ##   # --> Got 2
  ##   # --> Got 1
  ##   # --> Got 5
  ##   # --> Got 8
  ##   # --> Got 4
  assert s.isValid, "The set needs to be initialized."
  forAllOrderedPairs:
    yield s.data[h].key

iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] =
  assert s.isValid, "The set needs to be initialized"
  forAllOrderedPairs:
    yield (idx, s.data[h].key)

proc rawGetKnownHC[A](s: OrderedSet[A], key: A, hc: Hash): int {.inline.} =
  rawGetKnownHCImpl()

proc rawGet[A](s: OrderedSet[A], key: A, hc: var Hash): int {.inline.} =
  rawGetImpl()

proc contains*[A](s: OrderedSet[A], key: A): bool =
  ## Returns true iff `key` is in `s`.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var values = initOrderedSet[int]()
  ##   assert(not values.contains(2))
  ##   values.incl(2)
  ##   assert values.contains(2)
  assert s.isValid, "The set needs to be initialized."
  var hc: Hash
  var index = rawGet(s, key, hc)
  result = index >= 0

proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A],
                  key: A, hc: Hash, h: Hash) =
  rawInsertImpl()
  data[h].next = -1
  if s.first < 0: s.first = h
  if s.last >= 0: data[s.last].next = h
  s.last = h

proc enlarge[A](s: var OrderedSet[A]) =
  var n: OrderedKeyValuePairSeq[A]
  newSeq(n, len(s.data) * growthFactor)
  var h = s.first
  s.first = -1
  s.last = -1
  swap(s.data, n)
  while h >= 0:
    var nxt = n[h].next
    if isFilled(n[h].hcode):
      var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
      rawInsert(s, s.data, n[h].key, n[h].hcode, j)
    h = nxt

proc incl*[A](s: var OrderedSet[A], key: A) =
  ## Includes an element `key` in `s`.
  ##
  ## This doesn't do anything if `key` is already in `s`. Example:
  ##
  ## .. code-block::
  ##   var values = initOrderedSet[int]()
  ##   values.incl(2)
  ##   values.incl(2)
  ##   assert values.len == 1
  assert s.isValid, "The set needs to be initialized."
  inclImpl()

proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) =
  ## Includes all elements from `other` into `s`.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var values = initOrderedSet[int]()
  ##   values.incl(2)
  ##   var others = toOrderedSet([6, 7])
  ##   values.incl(others)
  ##   assert values.len == 3
  assert s.isValid, "The set `s` needs to be initialized."
  assert other.isValid, "The set `other` needs to be initialized."
  for item in other: incl(s, item)

proc exclImpl[A](s: var OrderedSet[A], key: A) : bool {. inline .} =
  assert s.isValid, "The set needs to be initialized."
  var hc: Hash
  var i = rawGet(s, key, hc)
  var msk = high(s.data)
  result = true

  if i >= 0:
    result = false
    # Fix ordering
    if s.first == i:
      s.first = s.data[i].next
    else:
      var itr = s.first
      while true:
        if (s.data[itr].next == i):
          s.data[itr].next = s.data[i].next
          if s.last == i:
            s.last = itr
          break
        itr = s.data[itr].next

    dec(s.counter)
    while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
      var j = i         # The correctness of this depends on (h+1) in nextTry,
      var r = j         # though may be adaptable to other simple sequences.
      s.data[i].hcode = 0              # mark current EMPTY
      s.data[i].key = default(type(s.data[i].key))
      s.data[i].next = 0
      doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
        i = (i + 1) and msk            # increment mod table size
        if isEmpty(s.data[i].hcode):   # end of collision cluster; So all done
          return
        r = s.data[i].hcode and msk    # "home" location of key@i
      shallowCopy(s.data[j], s.data[i]) # data[i] will be marked EMPTY next loop

proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool =
  ## Excludes `key` in the set `s` and tells if `key` was removed from `s`. Efficiency: O(n).
  ##
  ## The difference with regards to the `excl() <#excl,TOrderedSet[A],A>`_ proc is
  ## that this proc returns `true` if `key` was not present in `s`. Example:
  ##
  ## .. code-block::
  ##  var s = toOrderedSet([2, 3, 6, 7])
  ##  assert s.missingOrExcl(4) == true
  ##  assert s.missingOrExcl(6) == false
  exclImpl(s, key)


proc excl*[A](s: var OrderedSet[A], key: A) =
  ## Excludes `key` from the set `s`. Efficiency: O(n).
  ##
  ## This doesn't do anything if `key` is not found in `s`. Example:
  ##
  ## .. code-block::
  ##   var s = toOrderedSet([2, 3, 6, 7])
  ##   s.excl(2)
  ##   s.excl(2)
  ##   assert s.len == 3
  discard exclImpl(s, key)

proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool =
  ## Includes `key` in the set `s` and tells if `key` was added to `s`.
  ##
  ## The difference with regards to the `incl() <#incl,TOrderedSet[A],A>`_ proc
  ## is that this proc returns `true` if `key` was already present in `s`. The
  ## proc will return false if `key` was added as a new value to `s` during
  ## this call. Example:
  ##
  ## .. code-block::
  ##   var values = initOrderedSet[int]()
  ##   assert values.containsOrIncl(2) == false
  ##   assert values.containsOrIncl(2) == true
  assert s.isValid, "The set needs to be initialized."
  containsOrInclImpl()

proc init*[A](s: var OrderedSet[A], initialSize=64) =
  ## Initializes an ordered hash set.
  ##
  ## The `initialSize` parameter needs to be a power of two. You can use
  ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to
  ## guarantee that at runtime. All set variables must be initialized before
  ## use with other procs from this module with the exception of `isValid()
  ## <#isValid,TOrderedSet[A]>`_ and `len() <#len,TOrderedSet[A]>`_.
  ##
  ## You can call this proc on a previously initialized ordered hash set to
  ## discard its values. At the moment this is the only proc to remove elements
  ## from an ordered hash set. Example:
  ##
  ## .. code-block ::
  ##   var a: OrderedSet[int]
  ##   a.init(4)
  ##   a.incl(2)
  ##   a.init
  ##   assert a.len == 0 and a.isValid
  assert isPowerOfTwo(initialSize)
  s.counter = 0
  s.first = -1
  s.last = -1
  newSeq(s.data, initialSize)

proc initOrderedSet*[A](initialSize=64): OrderedSet[A] =
  ## Wrapper around `init() <#init,TOrderedSet[A],int>`_ for initialization of
  ## ordered hash sets.
  ##
  ## Returns an empty ordered hash set you can assign directly in ``var``
  ## blocks in a single line. Example:
  ##
  ## .. code-block ::
  ##   var a = initOrderedSet[int](4)
  ##   a.incl(2)
  result.init(initialSize)

proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] =
  ## Creates a new ordered hash set that contains the given `keys`.
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   var numbers = toOrderedSet([1, 2, 3, 4, 5])
  ##   assert numbers.contains(2)
  ##   assert numbers.contains(4)
  result = initOrderedSet[A](rightSize(keys.len))
  for key in items(keys): result.incl(key)

proc `$`*[A](s: OrderedSet[A]): string =
  ## Converts the ordered hash set `s` to a string, mostly for logging purposes.
  ##
  ## Don't use this proc for serialization, the representation may change at
  ## any moment and values are not escaped. Example:
  ##
  ## Example:
  ##
  ## .. code-block::
  ##   echo toOrderedSet([2, 4, 5])
  ##   # --> {2, 4, 5}
  ##   echo toOrderedSet(["no", "esc'aping", "is \" provided"])
  ##   # --> {no, esc'aping, is " provided}
  assert s.isValid, "The set needs to be initialized."
  dollarImpl()

proc `==`*[A](s, t: OrderedSet[A]): bool =
  ## Equality for ordered sets.
  if s.counter != t.counter: return false
  var h = s.first
  var g = s.first
  var compared = 0
  while h >= 0 and g >= 0:
    var nxh = s.data[h].next
    var nxg = t.data[g].next
    if isFilled(s.data[h].hcode) and isFilled(s.data[g].hcode):
      if s.data[h].key == s.data[g].key:
        inc compared
      else:
        return false
    h = nxh
    g = nxg
  result = compared == s.counter

when isMainModule and not defined(release):
  proc testModule() =
    ## Internal micro test to validate docstrings and such.
    block isValidTest:
      var options: HashSet[string]
      proc savePreferences(options: HashSet[string]) =
        assert options.isValid, "Pass an initialized set!"
      options = initSet[string]()
      options.savePreferences

    block lenTest:
      var values: HashSet[int]
      assert(not values.isValid)
      assert values.len == 0
      assert values.card == 0

    block setIterator:
      type pair = tuple[a, b: int]
      var a, b = initSet[pair]()
      a.incl((2, 3))
      a.incl((3, 2))
      a.incl((2, 3))
      for x, y in a.items:
        b.incl((x - 2, y + 1))
      assert a.len == b.card
      assert a.len == 2
      #echo b

    block setContains:
      var values = initSet[int]()
      assert(not values.contains(2))
      values.incl(2)
      assert values.contains(2)
      values.excl(2)
      assert(not values.contains(2))

      values.incl(4)
      var others = toSet([6, 7])
      values.incl(others)
      assert values.len == 3

      values.init
      assert values.containsOrIncl(2) == false
      assert values.containsOrIncl(2) == true
      var
        a = toSet([1, 2])
        b = toSet([1])
      b.incl(2)
      assert a == b

    block exclusions:
      var s = toSet([2, 3, 6, 7])
      s.excl(2)
      s.excl(2)
      assert s.len == 3

      var
        numbers = toSet([1, 2, 3, 4, 5])
        even = toSet([2, 4, 6, 8])
      numbers.excl(even)
      #echo numbers
      # --> {1, 3, 5}

    block toSeqAndString:
      var a = toSet([2, 4, 5])
      var b = initSet[int]()
      for x in [2, 4, 5]: b.incl(x)
      assert($a == $b)
      #echo a
      #echo toSet(["no", "esc'aping", "is \" provided"])

    #block orderedToSeqAndString:
    #  echo toOrderedSet([2, 4, 5])
    #  echo toOrderedSet(["no", "esc'aping", "is \" provided"])

    block setOperations:
      var
        a = toSet(["a", "b"])
        b = toSet(["b", "c"])
        c = union(a, b)
      assert c == toSet(["a", "b", "c"])
      var d = intersection(a, b)
      assert d == toSet(["b"])
      var e = difference(a, b)
      assert e == toSet(["a"])
      var f = symmetricDifference(a, b)
      assert f == toSet(["a", "c"])
      assert d < a and d < b
      assert((a < a) == false)
      assert d <= a and d <= b
      assert((a <= a))
      # Alias test.
      assert a + b == toSet(["a", "b", "c"])
      assert a * b == toSet(["b"])
      assert a - b == toSet(["a"])
      assert a -+- b == toSet(["a", "c"])
      assert disjoint(a, b) == false
      assert disjoint(a, b - a) == true

    block mapSet:
      var a = toSet([1, 2, 3])
      var b = a.map(proc (x: int): string = $x)
      assert b == toSet(["1", "2", "3"])

    block isValidTest:
      var cards: OrderedSet[string]
      proc saveTarotCards(cards: OrderedSet[string]) =
        assert cards.isValid, "Pass an initialized set!"
      cards = initOrderedSet[string]()
      cards.saveTarotCards

    block lenTest:
      var values: OrderedSet[int]
      assert(not values.isValid)
      assert values.len == 0
      assert values.card == 0

    block setIterator:
      type pair = tuple[a, b: int]
      var a, b = initOrderedSet[pair]()
      a.incl((2, 3))
      a.incl((3, 2))
      a.incl((2, 3))
      for x, y in a.items:
        b.incl((x - 2, y + 1))
      assert a.len == b.card
      assert a.len == 2

    block setPairsIterator:
      var s = toOrderedSet([1, 3, 5, 7])
      var items = newSeq[tuple[a: int, b: int]]()
      for idx, item in s: items.add((idx, item))
      assert items == @[(0, 1), (1, 3), (2, 5), (3, 7)]

    block exclusions:
      var s = toOrderedSet([1, 2, 3, 6, 7, 4])

      s.excl(3)
      s.excl(3)
      s.excl(1)
      s.excl(4)

      var items = newSeq[int]()
      for item in s: items.add item
      assert items == @[2, 6, 7]

    #block orderedSetIterator:
    #  var a = initOrderedSet[int]()
    #  for value in [9, 2, 1, 5, 1, 8, 4, 2]:
    #    a.incl(value)
    #  for value in a.items:
    #    echo "Got ", value

    block setContains:
      var values = initOrderedSet[int]()
      assert(not values.contains(2))
      values.incl(2)
      assert values.contains(2)

    block toSeqAndString:
      var a = toOrderedSet([2, 4, 5])
      var b = initOrderedSet[int]()
      for x in [2, 4, 5]: b.incl(x)
      assert($a == $b)
      assert(a == b) # https://github.com/Araq/Nim/issues/1413

    block initBlocks:
      var a: OrderedSet[int]
      a.init(4)
      a.incl(2)
      a.init
      assert a.len == 0 and a.isValid
      a = initOrderedSet[int](4)
      a.incl(2)
      assert a.len == 1

      var b: HashSet[int]
      b.init(4)
      b.incl(2)
      b.init
      assert b.len == 0 and b.isValid
      b = initSet[int](4)
      b.incl(2)
      assert b.len == 1

    for i in 0 .. 32:
      var s = rightSize(i)
      if s <= i or mustRehash(s, i):
        echo "performance issue: rightSize() will not elide enlarge() at ", i

    block missingOrExcl:
      var s = toOrderedSet([2, 3, 6, 7])
      assert s.missingOrExcl(4) == true
      assert s.missingOrExcl(6) == false

    when not defined(testing):
      echo "Micro tests run successfully."

  testModule()