basic generic collections implemented and tested

2026-07-22 00:41:28 +00:00 · 2011-06-07 03:37:36 +02:00
parent 42eb21be7b
commit 3bc821aa5c
10 changed files with 512 additions and 95 deletions
--- a/lib/pure/collections/lists.nim
+++ b/lib/pure/collections/lists.nim
@@ -12,28 +12,28 @@
 ## be manipulated directly for efficiency.

 type
-  TDoublyLinkedNode*[T] {.pure, 
-      final.} = object ## a node a doubly linked list consists of
+  TDoublyLinkedNode* {.pure, 
+      final.}[T] = object ## a node a doubly linked list consists of
    next*, prev*: ref TDoublyLinkedNode[T]
    value*: T
  PDoublyLinkedNode*[T] = ref TDoublyLinkedNode[T]

-  TSinglyLinkedNode*[T] {.pure, 
-      final.} = object ## a node a singly linked list consists of
+  TSinglyLinkedNode* {.pure, 
+      final.}[T] = object ## a node a singly linked list consists of
    next*: ref TSinglyLinkedNode[T]
    value*: T
  PSinglyLinkedNode*[T] = ref TSinglyLinkedNode[T]

-  TSinglyLinkedList*[T] {.pure, final.} = object ## a singly linked list
+  TSinglyLinkedList* {.pure, final.}[T] = object ## a singly linked list
    head*, tail*: PSinglyLinkedNode[T]
  
-  TDoublyLinkedList*[T] {.pure, final.} = object ## a doubly linked list
+  TDoublyLinkedList* {.pure, final.}[T] = object ## a doubly linked list
    head*, tail*: PDoublyLinkedNode[T]

-  TSinglyLinkedRing*[T] {.pure, final.} = object ## a singly linked ring
+  TSinglyLinkedRing* {.pure, final.}[T] = object ## a singly linked ring
    head*: PSinglyLinkedNode[T]
  
-  TDoublyLinkedRing*[T] {.pure, final.} = object ## a doubly linked ring
+  TDoublyLinkedRing* {.pure, final.}[T] = object ## a doubly linked ring
    head*: PDoublyLinkedNode[T]
    
 proc newDoublyLinkedNode*[T](value: T): PDoublyLinkedNode[T] =
@@ -240,17 +240,15 @@ proc prepend*[T](L: var TSinglyLinkedRing[T], value: T) =

 proc append*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = 
  ## appends a node `n` to `L`. Efficiency: O(1).
-  if L.tail != nil: 
-    L.tail.next = n
-    n.prev = L.tail
+  if L.head != nil:
    n.next = L.head
+    n.prev = L.head.prev
+    L.head.prev.next = n
+    L.head.prev = n
  else:
-    # both head and tail are nil:
-    assert L.head == nil
-    L.head = n
    n.prev = n
    n.next = n
-  L.tail = n
+    L.head = n

 proc append*[T](L: var TDoublyLinkedRing[T], value: T) = 
  ## appends a value to `L`. Efficiency: O(1).
@@ -259,13 +257,11 @@ proc append*[T](L: var TDoublyLinkedRing[T], value: T) =
 proc prepend*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = 
  ## prepends a node `n` to `L`. Efficiency: O(1).
  if L.head != nil: 
-    L.head.prev = n
-    n.prev = L.tail
    n.next = L.head
+    n.prev = L.head.prev
+    L.head.prev.next = n
+    L.head.prev = n
  else:
-    # both head and tail are nil:
-    assert L.tail == nil
-    L.tail = n
    n.prev = n
    n.next = n
  L.head = n
@@ -276,19 +272,14 @@ proc prepend*[T](L: var TDoublyLinkedRing[T], value: T) =
  
 proc remove*[T](L: var TDoublyLinkedRing[T], n: PDoublyLinkedNode[T]) = 
  ## removes `n` from `L`. Efficiency: O(1).
-  if n == L.tail: 
-    if n == L.head:
-      # only element:
-      L.tail = nil
-      L.head = nil
-    else:
-      L.tail = n.prev
-  elif n == L.head: 
-    L.head = n.next
  n.next.prev = n.prev
  n.prev.next = n.next
-  # break cycles for the GC; not necessary, but might help:
-  n.next = nil
-  n.prev = nil
+  if n == L.head: 
+    var p = L.head.prev
+    if p == L.head: 
+      # only one element left:
+      L.head = nil
+    else:
+      L.head = L.head.prev


--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -0,0 +1,231 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2011 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## The ``sets`` module implements an efficient hash set and ordered hash set.
+##
+## Note: The data types declared here have *value semantics*: This means that
+## ``=`` performs a copy of the hash table. If you are overly concerned with
+## efficiency and know what you do (!), you can define the symbol
+## ``shallowADT`` to compile a version that uses shallow copies instead.
+
+import
+  os, hashes, math
+
+when defined(shallowADT):
+  {.pragma: myShallow, shallow.}
+else:
+  {.pragma: myShallow.}
+
+type
+  TSlotEnum = enum seEmpty, seFilled, seDeleted
+  TKeyValuePair[A] = tuple[slot: TSlotEnum, key: A]
+  TKeyValuePairSeq[A] = seq[TKeyValuePair[A]]
+  TSet* {.final, myShallow.}[A] = object
+    data: TKeyValuePairSeq[A]
+    counter: int
+
+proc len*[A](s: TSet[A]): int =
+  ## returns the number of keys in `s`.
+  result = s.counter
+
+proc card*[A](s: TSet[A]): int =
+  ## alias for `len`.
+  result = s.counter
+
+iterator items*[A](s: TSet[A]): A =
+  ## iterates over any key in the table `t`.
+  for h in 0..high(s.data):
+    if s.data[h].slot == seFilled: yield s.data[h].key
+
+const
+  growthFactor = 2
+
+proc mustRehash(length, counter: int): bool {.inline.} =
+  assert(length > counter)
+  result = (length * 2 < counter * 3) or (length - counter < 4)
+
+proc nextTry(h, maxHash: THash): THash {.inline.} =
+  result = ((5 * h) + 1) and maxHash
+
+template rawGetImpl() =
+  var h: THash = hash(key) and high(s.data) # start with real hash value
+  while s.data[h].slot != seEmpty:
+    if s.data[h].key == key and s.data[h].slot == seFilled:
+      return h
+    h = nextTry(h, high(s.data))
+  result = -1
+
+template rawInsertImpl() =
+  var h: THash = hash(key) and high(data)
+  while data[h].slot == seFilled:
+    h = nextTry(h, high(data))
+  data[h].key = key
+  data[h].slot = seFilled
+
+proc RawGet[A](s: TSet[A], key: A): int =
+  rawGetImpl()
+
+proc contains*[A](s: TSet[A], key: A): bool =
+  ## returns true iff `key` is in `s`.
+  var index = RawGet(t, key)
+  result = index >= 0
+
+proc RawInsert[A](s: var TSet[A], data: var TKeyValuePairSeq[A], key: A) =
+  rawInsertImpl()
+
+proc Enlarge[A](s: var TSet[A]) =
+  var n: TKeyValuePairSeq[A]
+  newSeq(n, len(s.data) * growthFactor)
+  for i in countup(0, high(s.data)):
+    if s.data[i].slot == seFilled: RawInsert(s, n, s.data[i].key)
+  swap(s.data, n)
+
+template inclImpl() =
+  var index = RawGet(s, key)
+  if index < 0:
+    if mustRehash(len(s.data), s.counter): Enlarge(s)
+    RawInsert(s, s.data, key)
+    inc(s.counter)
+
+template containsOrInclImpl() =
+  var index = RawGet(s, key)
+  if index >= 0:
+    result = true
+  else:
+    if mustRehash(len(s.data), s.counter): Enlarge(s)
+    RawInsert(s, s.data, key)
+    inc(s.counter)
+
+proc incl*[A](s: var TSet[A], key: A) =
+  ## includes an element `key` in `s`.
+  inclImpl()
+
+proc excl*[A](s: var TSet[A], key: A) =
+  ## excludes `key` from the set `s`.
+  var index = RawGet(t, key)
+  if index >= 0:
+    s.data[index].slot = seDeleted
+    dec(s.counter)
+
+proc containsOrIncl*[A](s: var TSet[A], key: A): bool =
+  ## returns true if `s` contains `key`, otherwise `key` is included in `s`
+  ## and false is returned.
+  containsOrInclImpl()
+
+proc initSet*[A](initialSize=64): TSet[A] =
+  ## creates a new hash set that is empty. `initialSize` needs to be
+  ## a power of two.
+  assert isPowerOfTwo(initialSize)
+  result.counter = 0
+  newSeq(result.data, initialSize)
+
+proc toSet*[A](keys: openarray[A]): TSet[A] =
+  ## creates a new hash set that contains the given `keys`.
+  result = initSet[A](nextPowerOfTwo(keys.len+10))
+  for key in items(keys): result.incl(key)
+
+template dollarImpl(): stmt =
+  result = "{"
+  for key in items(s):
+    if result.len > 1: result.add(", ")
+    result.add($key)
+  result.add("}")
+
+proc `$`*[A](s: TSet[A]): string =
+  ## The `$` operator for hash sets.
+  dollarImpl()
+
+# ------------------------------ ordered table ------------------------------
+
+type
+  TOrderedKeyValuePair[A] = tuple[
+    slot: TSlotEnum, next: int, key: A]
+  TOrderedKeyValuePairSeq[A] = seq[TOrderedKeyValuePair[A]]
+  TOrderedSet* {.
+      final, myShallow.}[A] = object ## set that remembers insertion order
+    data: TOrderedKeyValuePairSeq[A]
+    counter, first, last: int
+
+proc len*[A](s: TOrderedSet[A]): int {.inline.} =
+  ## returns the number of keys in `s`.
+  result = t.counter
+
+proc card*[A](s: TOrderedSet[A]): int {.inline.} =
+  ## alias for `len`.
+  result = t.counter
+
+template forAllOrderedPairs(yieldStmt: stmt) =
+  var h = s.first
+  while h >= 0:
+    var nxt = s.data[h].next
+    if s.data[h].slot == seFilled: yieldStmt
+    h = nxt
+
+iterator items*[A](s: TOrderedSet[A]): A =
+  ## iterates over any key in the set `s` in insertion order.
+  forAllOrderedPairs:
+    yield s.data[h].key
+
+proc RawGet[A](s: TOrderedSet[A], key: A): int =
+  rawGetImpl()
+
+proc contains*[A](s: TOrderedSet[A], key: A): bool =
+  ## returns true iff `key` is in `s`.
+  var index = RawGet(s, key)
+  result = index >= 0
+
+proc RawInsert[A](s: var TOrderedSet[A], 
+                  data: var TOrderedKeyValuePairSeq[A], key: A) =
+  rawInsertImpl()
+  data[h].next = -1
+  if s.first < 0: s.first = h
+  if s.last >= 0: data[s.last].next = h
+  s.last = h
+
+proc Enlarge[A](s: var TOrderedSet[A]) =
+  var n: TOrderedKeyValuePairSeq[A]
+  newSeq(n, len(s.data) * growthFactor)
+  var h = s.first
+  s.first = -1
+  s.last = -1
+  while h >= 0:
+    var nxt = s.data[h].next
+    if s.data[h].slot == seFilled: 
+      RawInsert(s, n, s.data[h].key)
+    h = nxt
+  swap(s.data, n)
+
+proc incl*[A](s: var TOrderedSet[A], key: A) =
+  ## includes an element `key` in `s`.
+  inclImpl()
+
+proc containsOrIncl*[A](s: var TOrderedSet[A], key: A): bool =
+  ## returns true if `s` contains `key`, otherwise `key` is included in `s`
+  ## and false is returned.
+  containsOrInclImpl()
+
+proc initOrderedSet*[A](initialSize=64): TOrderedSet[A] =
+  ## creates a new ordered hash set that is empty. `initialSize` needs to be
+  ## a power of two.
+  assert isPowerOfTwo(initialSize)
+  result.counter = 0
+  result.first = -1
+  result.last = -1
+  newSeq(result.data, initialSize)
+
+proc toOrderedSet*[A](keys: openarray[A]): TOrderedSet[A] =
+  ## creates a new ordered hash set that contains the given `keys`.
+  result = initOrderedSet[A](nextPowerOfTwo(keys.len+10))
+  for key in items(keys): result.incl(key)
+
+proc `$`*[A](s: TOrderedSet[A]): string =
+  ## The `$` operator for ordered hash sets.
+  dollarImpl()
+
+
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -1,7 +1,7 @@
 #
 #
 #            Nimrod's Runtime Library
-#        (c) Copyright 2011 Andreas Rumpf, Dominik Picheta
+#        (c) Copyright 2011 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -12,7 +12,7 @@
 ##
 ## Note: The data types declared here have *value semantics*: This means that
 ## ``=`` performs a copy of the hash table. If you are overly concerned with
-## efficiency and don't need this behaviour, you can define the symbol
+## efficiency and know what you do (!), you can define the symbol
 ## ``shallowADT`` to compile a version that uses shallow copies instead.

 import
@@ -123,7 +123,7 @@ proc del*[A, B](t: var TTable[A, B], key: A) =
    dec(t.counter)

 proc initTable*[A, B](initialSize=64): TTable[A, B] =
-  ## creates a new hash table table that is empty. `initialSize` needs to be
+  ## creates a new hash table that is empty. `initialSize` needs to be
  ## a power of two.
  assert isPowerOfTwo(initialSize)
  result.counter = 0
@@ -132,7 +132,7 @@ proc initTable*[A, B](initialSize=64): TTable[A, B] =
 proc toTable*[A, B](pairs: openarray[tuple[key: A, 
                    val: B]]): TTable[A, B] =
  ## creates a new hash table that contains the given `pairs`.
-  result = initTable[A](nextPowerOfTwo(pairs.len+10))
+  result = initTable[A, B](nextPowerOfTwo(pairs.len+10))
  for key, val in items(pairs): result[key] = val

 template dollarImpl(): stmt =
@@ -148,7 +148,7 @@ template dollarImpl(): stmt =
    result.add("}")

 proc `$`*[A, B](t: TTable[A, B]): string =
-  ## The `$` operator for string tables.
+  ## The `$` operator for hash tables.
  dollarImpl()

 # ------------------------------ ordered table ------------------------------
@@ -167,11 +167,11 @@ proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} =
  result = t.counter

 template forAllOrderedPairs(yieldStmt: stmt) =
-  var i = t.first
-  while i >= 0:
-    var nxt = t.data[i].next
+  var h = t.first
+  while h >= 0:
+    var nxt = t.data[h].next
    if t.data[h].slot == seFilled: yieldStmt
-    i = nxt
+    h = nxt

 iterator pairs*[A, B](t: TOrderedTable[A, B]): tuple[key: A, val: B] =
  ## iterates over any (key, value) pair in the table `t` in insertion
@@ -204,23 +204,29 @@ proc hasKey*[A, B](t: TOrderedTable[A, B], key: A): bool =
  ## returns true iff `key` is in the table `t`.
  result = rawGet(t, key) >= 0

-proc RawInsert[A, B](t: TOrderedTable[A, B], 
+proc RawInsert[A, B](t: var TOrderedTable[A, B], 
                     data: var TOrderedKeyValuePairSeq[A, B],
                     key: A, val: B) =
  rawInsertImpl()
  data[h].next = -1
-  if first < 0: first = h
-  if last >= 0: data[last].next = h
-  lastEntry = h
+  if t.first < 0: t.first = h
+  if t.last >= 0: data[t.last].next = h
+  t.last = h

-proc Enlarge[A, B](t: TOrderedTable[A, B]) =
+proc Enlarge[A, B](t: var TOrderedTable[A, B]) =
  var n: TOrderedKeyValuePairSeq[A, B]
  newSeq(n, len(t.data) * growthFactor)
-  forAllOrderedPairs:
-    RawInsert(t, n, t.data[h].key, t.data[h].val)
+  var h = t.first
+  t.first = -1
+  t.last = -1
+  while h >= 0:
+    var nxt = t.data[h].next
+    if t.data[h].slot == seFilled: 
+      RawInsert(t, n, t.data[h].key, t.data[h].val)
+    h = nxt
  swap(t.data, n)

-proc `[]=`*[A, B](t: TOrderedTable[A, B], key: A, val: B) =
+proc `[]=`*[A, B](t: var TOrderedTable[A, B], key: A, val: B) =
  ## puts a (key, value)-pair into `t`.
  putImpl()

@@ -240,7 +246,7 @@ proc toOrderedTable*[A, B](pairs: openarray[tuple[key: A,
  for key, val in items(pairs): result[key] = val

 proc `$`*[A, B](t: TOrderedTable[A, B]): string =
-  ## The `$` operator for hash tables.
+  ## The `$` operator for ordered hash tables.
  dollarImpl()

 # ------------------------------ count tables -------------------------------
@@ -295,14 +301,14 @@ proc RawInsert[A](t: TCountTable[A], data: var seq[tuple[key: A, val: int]],
  data[h].key = key
  data[h].val = val

-proc Enlarge[A](t: TCountTable[A]) =
+proc Enlarge[A](t: var TCountTable[A]) =
  var n: seq[tuple[key: A, val: int]]
  newSeq(n, len(t.data) * growthFactor)
  for i in countup(0, high(t.data)):
    if t.data[i].val != 0: RawInsert(t, n, t.data[i].key, t.data[i].val)
  swap(t.data, n)

-proc `[]=`*[A](t: TCountTable[A], key: A, val: int) =
+proc `[]=`*[A](t: var TCountTable[A], key: A, val: int) =
  ## puts a (key, value)-pair into `t`. `val` has to be positive.
  assert val > 0
  PutImpl()
@@ -323,7 +329,7 @@ proc `$`*[A](t: TCountTable[A]): string =
  ## The `$` operator for count tables.
  dollarImpl()

-proc inc*[A](t: TCountTable[A], key: A, val = 1) = 
+proc inc*[A](t: var TCountTable[A], key: A, val = 1) = 
  ## increments `t[key]` by `val`.
  var index = RawGet(t, key)
  if index >= 0:
@@ -351,8 +357,8 @@ proc Largest*[A](t: TCountTable[A]): tuple[key: A, val: int] =
  result.key = t.data[maxIdx].key
  result.val = t.data[maxIdx].val

-proc sort*[A](t: var TCountTable[A]) = 
-  ## sorts the count table so that the entry with the highest counter comes 
+proc sort*[A](t: var TCountTable[A]) =
+  ## sorts the count table so that the entry with the highest counter comes
  ## first. This is destructive! You must not modify `t` afterwards!
  ## You can use the iterators `pairs`,  `keys`, and `values` to iterate over
  ## `t` in the sorted order.
@@ -361,35 +367,14 @@ proc sort*[A](t: var TCountTable[A]) =
  var h = 1
  while true:
    h = 3 * h + 1
-    if h >= t.data.high: break
-  while true: 
+    if h >= high(t.data): break
+  while true:
    h = h div 3
-    for i in countup(h, t.data.high):
+    for i in countup(h, high(t.data)):
      var j = i
-      while t.data[j-h].val < t.data[j].val:
+      while t.data[j-h].val <= t.data[j].val:
        swap(t.data[j], t.data[j-h])
        j = j-h
        if j < h: break
    if h == 1: break

-when isMainModule:
-  var table = initHashTable[string, float]()
-  table["test"] = 1.2345
-  table["111"] = 1.000043
-  echo table
-  table.del("111")
-  echo table
-  #echo repr(table["111"])
-  #echo(repr(table["1212"]))
-  table["111"] = 1.5
-  table["011"] = 67.9
-  echo table
-  table.del("test")
-  table.del("111")
-  echo table
-
-
-  echo hash("test")
-  echo hash("test")
-
-