mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-30 18:02:05 +00:00
535 lines
17 KiB
Nim
535 lines
17 KiB
Nim
#
|
|
#
|
|
# Nim's Runtime Library
|
|
# (c) Copyright 2012 Andreas Rumpf
|
|
#
|
|
# See the file "copying.txt", included in this
|
|
# distribution, for details about the copyright.
|
|
#
|
|
|
|
## This module implements a `crit bit tree`:idx: which is an efficient
|
|
## container for a sorted set of strings, or for a sorted mapping of strings. Based on the
|
|
## [excellent paper by Adam Langley](https://www.imperialviolet.org/binary/critbit.pdf).
|
|
## (A crit bit tree is a form of `radix tree`:idx: or `patricia trie`:idx:.)
|
|
|
|
runnableExamples:
|
|
from std/sequtils import toSeq
|
|
|
|
var critbitAsSet: CritBitTree[void] = ["kitten", "puppy"].toCritBitTree
|
|
doAssert critbitAsSet.len == 2
|
|
critbitAsSet.incl("")
|
|
doAssert "" in critbitAsSet
|
|
critbitAsSet.excl("")
|
|
doAssert "" notin critbitAsSet
|
|
doAssert toSeq(critbitAsSet.items) == @["kitten", "puppy"]
|
|
let same = ["puppy", "kitten", "puppy"].toCritBitTree
|
|
doAssert toSeq(same.keys) == toSeq(critbitAsSet.keys)
|
|
|
|
var critbitAsDict: CritBitTree[int] = {"key1": 42}.toCritBitTree
|
|
doAssert critbitAsDict.len == 1
|
|
critbitAsDict["key2"] = 0
|
|
doAssert "key2" in critbitAsDict
|
|
doAssert critbitAsDict["key2"] == 0
|
|
critbitAsDict.excl("key1")
|
|
doAssert "key1" notin critbitAsDict
|
|
doAssert toSeq(critbitAsDict.pairs) == @[("key2", 0)]
|
|
|
|
import std/private/since
|
|
|
|
type
|
|
NodeObj[T] {.acyclic.} = object
|
|
byte: int ## byte index of the difference
|
|
otherBits: char
|
|
case isLeaf: bool
|
|
of false: child: array[0..1, ref NodeObj[T]]
|
|
of true:
|
|
key: string
|
|
when T isnot void:
|
|
val: T
|
|
|
|
Node[T] = ref NodeObj[T]
|
|
CritBitTree*[T] = object ## The crit bit tree can either be used
|
|
## as a mapping from strings to
|
|
## some type `T` or as a set of
|
|
## strings if `T` is `void`.
|
|
root: Node[T]
|
|
count: int
|
|
|
|
func len*[T](c: CritBitTree[T]): int {.inline.} =
|
|
## Returns the number of elements in `c` in O(1).
|
|
runnableExamples:
|
|
let c = ["key1", "key2"].toCritBitTree
|
|
doAssert c.len == 2
|
|
|
|
result = c.count
|
|
|
|
proc rawGet[T](c: CritBitTree[T], key: string): Node[T] =
|
|
var it = c.root
|
|
while it != nil:
|
|
if not it.isLeaf:
|
|
let ch = if it.byte < key.len: key[it.byte] else: '\0'
|
|
let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
|
|
it = it.child[dir]
|
|
else:
|
|
return if it.key == key: it else: nil
|
|
|
|
func contains*[T](c: CritBitTree[T], key: string): bool {.inline.} =
|
|
## Returns true if `c` contains the given `key`.
|
|
runnableExamples:
|
|
var c: CritBitTree[void]
|
|
incl(c, "key")
|
|
doAssert c.contains("key")
|
|
|
|
result = rawGet(c, key) != nil
|
|
|
|
func hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} =
|
|
## Alias for `contains <#contains,CritBitTree[T],string>`_.
|
|
result = rawGet(c, key) != nil
|
|
|
|
proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] =
|
|
if c.root == nil:
|
|
c.root = Node[T](isleaf: true, key: key)
|
|
result = c.root
|
|
else:
|
|
var it = c.root
|
|
while not it.isLeaf:
|
|
let ch = if it.byte < key.len: key[it.byte] else: '\0'
|
|
let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8
|
|
it = it.child[dir]
|
|
|
|
var newOtherBits = 0
|
|
var newByte = 0
|
|
block blockX:
|
|
while newByte < key.len:
|
|
let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
|
|
if ch != key[newByte]:
|
|
newOtherBits = ch.ord xor key[newByte].ord
|
|
break blockX
|
|
inc newByte
|
|
if newByte < it.key.len:
|
|
newOtherBits = it.key[newByte].ord
|
|
else:
|
|
return it
|
|
while (newOtherBits and (newOtherBits-1)) != 0:
|
|
newOtherBits = newOtherBits and (newOtherBits-1)
|
|
newOtherBits = newOtherBits xor 255
|
|
let ch = if newByte < it.key.len: it.key[newByte] else: '\0'
|
|
let dir = (1 + (ord(ch) or newOtherBits)) shr 8
|
|
|
|
var inner: Node[T]
|
|
new inner
|
|
result = Node[T](isLeaf: true, key: key)
|
|
inner.otherBits = chr(newOtherBits)
|
|
inner.byte = newByte
|
|
inner.child[1 - dir] = result
|
|
|
|
var wherep = addr(c.root)
|
|
while true:
|
|
var p = wherep[]
|
|
if p.isLeaf: break
|
|
if p.byte > newByte: break
|
|
if p.byte == newByte and p.otherBits.ord > newOtherBits: break
|
|
let ch = if p.byte < key.len: key[p.byte] else: '\0'
|
|
let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
|
|
wherep = addr(p.child[dir])
|
|
inner.child[dir] = wherep[]
|
|
wherep[] = inner
|
|
inc c.count
|
|
|
|
func exclImpl[T](c: var CritBitTree[T], key: string): int =
|
|
var p = c.root
|
|
var wherep = addr(c.root)
|
|
var whereq: ptr Node[T] = nil
|
|
if p == nil: return c.count
|
|
var dir = 0
|
|
var q: Node[T]
|
|
while not p.isLeaf:
|
|
whereq = wherep
|
|
q = p
|
|
let ch = if p.byte < key.len: key[p.byte] else: '\0'
|
|
dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
|
|
wherep = addr(p.child[dir])
|
|
p = wherep[]
|
|
if p.key == key:
|
|
# else: not in tree at all
|
|
if whereq == nil:
|
|
c.root = nil
|
|
else:
|
|
whereq[] = q.child[1 - dir]
|
|
dec c.count
|
|
|
|
return c.count
|
|
|
|
proc excl*[T](c: var CritBitTree[T], key: string) =
|
|
## Removes `key` (and its associated value) from the set `c`.
|
|
## If the `key` does not exist, nothing happens.
|
|
##
|
|
## **See also:**
|
|
## * `incl proc <#incl,CritBitTree[void],string>`_
|
|
## * `incl proc <#incl,CritBitTree[T],string,T>`_
|
|
runnableExamples:
|
|
var c: CritBitTree[void]
|
|
incl(c, "key")
|
|
excl(c, "key")
|
|
doAssert not c.contains("key")
|
|
|
|
discard exclImpl(c, key)
|
|
|
|
proc missingOrExcl*[T](c: var CritBitTree[T], key: string): bool =
|
|
## Returns true if `c` does not contain the given `key`. If the key
|
|
## does exist, `c.excl(key)` is performed.
|
|
##
|
|
## **See also:**
|
|
## * `excl proc <#excl,CritBitTree[T],string>`_
|
|
## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_
|
|
## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_
|
|
runnableExamples:
|
|
block:
|
|
var c: CritBitTree[void]
|
|
doAssert c.missingOrExcl("key")
|
|
block:
|
|
var c: CritBitTree[void]
|
|
incl(c, "key")
|
|
doAssert not c.missingOrExcl("key")
|
|
doAssert not c.contains("key")
|
|
|
|
let oldCount = c.count
|
|
discard exclImpl(c, key)
|
|
result = c.count == oldCount
|
|
|
|
proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: T): bool =
|
|
## Returns true if `c` contains the given `key`. If the key does not exist,
|
|
## `c[key] = val` is performed.
|
|
##
|
|
## **See also:**
|
|
## * `incl proc <#incl,CritBitTree[void],string>`_
|
|
## * `incl proc <#incl,CritBitTree[T],string,T>`_
|
|
## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_
|
|
## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_
|
|
runnableExamples:
|
|
block:
|
|
var c: CritBitTree[int]
|
|
doAssert not c.containsOrIncl("key", 42)
|
|
doAssert c.contains("key")
|
|
block:
|
|
var c: CritBitTree[int]
|
|
incl(c, "key", 21)
|
|
doAssert c.containsOrIncl("key", 42)
|
|
doAssert c["key"] == 21
|
|
|
|
let oldCount = c.count
|
|
var n = rawInsert(c, key)
|
|
result = c.count == oldCount
|
|
when T isnot void:
|
|
if not result: n.val = val
|
|
|
|
proc containsOrIncl*(c: var CritBitTree[void], key: string): bool =
|
|
## Returns true if `c` contains the given `key`. If the key does not exist,
|
|
## it is inserted into `c`.
|
|
##
|
|
## **See also:**
|
|
## * `incl proc <#incl,CritBitTree[void],string>`_
|
|
## * `incl proc <#incl,CritBitTree[T],string,T>`_
|
|
## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_
|
|
## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_
|
|
runnableExamples:
|
|
block:
|
|
var c: CritBitTree[void]
|
|
doAssert not c.containsOrIncl("key")
|
|
doAssert c.contains("key")
|
|
block:
|
|
var c: CritBitTree[void]
|
|
incl(c, "key")
|
|
doAssert c.containsOrIncl("key")
|
|
|
|
let oldCount = c.count
|
|
discard rawInsert(c, key)
|
|
result = c.count == oldCount
|
|
|
|
proc inc*(c: var CritBitTree[int]; key: string, val: int = 1) =
|
|
## Increments `c[key]` by `val`.
|
|
runnableExamples:
|
|
var c: CritBitTree[int]
|
|
c["key"] = 1
|
|
inc(c, "key")
|
|
doAssert c["key"] == 2
|
|
|
|
var n = rawInsert(c, key)
|
|
inc n.val, val
|
|
|
|
proc incl*(c: var CritBitTree[void], key: string) =
|
|
## Includes `key` in `c`.
|
|
##
|
|
## **See also:**
|
|
## * `excl proc <#excl,CritBitTree[T],string>`_
|
|
## * `incl proc <#incl,CritBitTree[T],string,T>`_
|
|
runnableExamples:
|
|
var c: CritBitTree[void]
|
|
incl(c, "key")
|
|
doAssert c.hasKey("key")
|
|
|
|
discard rawInsert(c, key)
|
|
|
|
proc incl*[T](c: var CritBitTree[T], key: string, val: T) =
|
|
## Inserts `key` with value `val` into `c`.
|
|
##
|
|
## **See also:**
|
|
## * `excl proc <#excl,CritBitTree[T],string>`_
|
|
## * `incl proc <#incl,CritBitTree[void],string>`_
|
|
runnableExamples:
|
|
var c: CritBitTree[int]
|
|
incl(c, "key", 42)
|
|
doAssert c["key"] == 42
|
|
|
|
var n = rawInsert(c, key)
|
|
n.val = val
|
|
|
|
proc `[]=`*[T](c: var CritBitTree[T], key: string, val: T) =
|
|
## Alias for `incl <#incl,CritBitTree[T],string,T>`_.
|
|
##
|
|
## **See also:**
|
|
## * `[] proc <#[],CritBitTree[T],string>`_
|
|
## * `[] proc <#[],CritBitTree[T],string_2>`_
|
|
var n = rawInsert(c, key)
|
|
n.val = val
|
|
|
|
template get[T](c: CritBitTree[T], key: string): T =
|
|
let n = rawGet(c, key)
|
|
if n == nil:
|
|
raise newException(KeyError, "key not found: " & key)
|
|
|
|
n.val
|
|
|
|
func `[]`*[T](c: CritBitTree[T], key: string): T {.inline.} =
|
|
## Retrieves the value at `c[key]`. If `key` is not in `t`, the
|
|
## `KeyError` exception is raised. One can check with `hasKey` whether
|
|
## the key exists.
|
|
##
|
|
## **See also:**
|
|
## * `[] proc <#[],CritBitTree[T],string_2>`_
|
|
## * `[]= proc <#[]=,CritBitTree[T],string,T>`_
|
|
get(c, key)
|
|
|
|
func `[]`*[T](c: var CritBitTree[T], key: string): var T {.inline.} =
|
|
## Retrieves the value at `c[key]`. The value can be modified.
|
|
## If `key` is not in `t`, the `KeyError` exception is raised.
|
|
##
|
|
## **See also:**
|
|
## * `[] proc <#[],CritBitTree[T],string>`_
|
|
## * `[]= proc <#[]=,CritBitTree[T],string,T>`_
|
|
get(c, key)
|
|
|
|
iterator leaves[T](n: Node[T]): Node[T] =
|
|
if n != nil:
|
|
# XXX actually we could compute the necessary stack size in advance:
|
|
# it's roughly log2(c.count).
|
|
var stack = @[n]
|
|
while stack.len > 0:
|
|
var it = stack.pop
|
|
while not it.isLeaf:
|
|
stack.add(it.child[1])
|
|
it = it.child[0]
|
|
assert(it != nil)
|
|
yield it
|
|
|
|
iterator keys*[T](c: CritBitTree[T]): string =
|
|
## Yields all keys in lexicographical order.
|
|
runnableExamples:
|
|
from std/sequtils import toSeq
|
|
|
|
let c = {"key1": 1, "key2": 2}.toCritBitTree
|
|
doAssert toSeq(c.keys) == @["key1", "key2"]
|
|
|
|
for x in leaves(c.root): yield x.key
|
|
|
|
iterator values*[T](c: CritBitTree[T]): T =
|
|
## Yields all values of `c` in the lexicographical order of the
|
|
## corresponding keys.
|
|
##
|
|
## **See also:**
|
|
## * `mvalues iterator <#mvalues.i,CritBitTree[T]>`_
|
|
runnableExamples:
|
|
from std/sequtils import toSeq
|
|
|
|
let c = {"key1": 1, "key2": 2}.toCritBitTree
|
|
doAssert toSeq(c.values) == @[1, 2]
|
|
|
|
for x in leaves(c.root): yield x.val
|
|
|
|
iterator mvalues*[T](c: var CritBitTree[T]): var T =
|
|
## Yields all values of `c` in the lexicographical order of the
|
|
## corresponding keys. The values can be modified.
|
|
##
|
|
## **See also:**
|
|
## * `values iterator <#values.i,CritBitTree[T]>`_
|
|
for x in leaves(c.root): yield x.val
|
|
|
|
iterator items*[T](c: CritBitTree[T]): string =
|
|
## Alias for `keys <#keys.i,CritBitTree[T]>`_.
|
|
for x in leaves(c.root): yield x.key
|
|
|
|
iterator pairs*[T](c: CritBitTree[T]): tuple[key: string, val: T] =
|
|
## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the
|
|
## corresponding keys.
|
|
##
|
|
## **See also:**
|
|
## * `mpairs iterator <#mpairs.i,CritBitTree[T]>`_
|
|
runnableExamples:
|
|
from std/sequtils import toSeq
|
|
|
|
let c = {"key1": 1, "key2": 2}.toCritBitTree
|
|
doAssert toSeq(c.pairs) == @[(key: "key1", val: 1), (key: "key2", val: 2)]
|
|
|
|
for x in leaves(c.root): yield (x.key, x.val)
|
|
|
|
iterator mpairs*[T](c: var CritBitTree[T]): tuple[key: string, val: var T] =
|
|
## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the
|
|
## corresponding keys. The yielded values can be modified.
|
|
##
|
|
## **See also:**
|
|
## * `pairs iterator <#pairs.i,CritBitTree[T]>`_
|
|
for x in leaves(c.root): yield (x.key, x.val)
|
|
|
|
proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] =
|
|
var p = c.root
|
|
var top = p
|
|
if p != nil:
|
|
while not p.isLeaf:
|
|
var q = p
|
|
let ch = if p.byte < key.len: key[p.byte] else: '\0'
|
|
let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8
|
|
p = p.child[dir]
|
|
if q.byte < key.len: top = p
|
|
for i in 0 ..< key.len:
|
|
if i >= p.key.len or p.key[i] != key[i]: return
|
|
result = top
|
|
|
|
iterator keysWithPrefix*[T](c: CritBitTree[T], prefix: string): string =
|
|
## Yields all keys starting with `prefix`.
|
|
runnableExamples:
|
|
from std/sequtils import toSeq
|
|
|
|
let c = {"key1": 42, "key2": 43}.toCritBitTree
|
|
doAssert toSeq(c.keysWithPrefix("key")) == @["key1", "key2"]
|
|
|
|
let top = allprefixedAux(c, prefix)
|
|
for x in leaves(top): yield x.key
|
|
|
|
iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): T =
|
|
## Yields all values of `c` starting with `prefix` of the
|
|
## corresponding keys.
|
|
##
|
|
## **See also:**
|
|
## * `mvaluesWithPrefix iterator <#mvaluesWithPrefix.i,CritBitTree[T],string>`_
|
|
runnableExamples:
|
|
from std/sequtils import toSeq
|
|
|
|
let c = {"key1": 42, "key2": 43}.toCritBitTree
|
|
doAssert toSeq(c.valuesWithPrefix("key")) == @[42, 43]
|
|
|
|
let top = allprefixedAux(c, prefix)
|
|
for x in leaves(top): yield x.val
|
|
|
|
iterator mvaluesWithPrefix*[T](c: var CritBitTree[T], prefix: string): var T =
|
|
## Yields all values of `c` starting with `prefix` of the
|
|
## corresponding keys. The values can be modified.
|
|
##
|
|
## **See also:**
|
|
## * `valuesWithPrefix iterator <#valuesWithPrefix.i,CritBitTree[T],string>`_
|
|
let top = allprefixedAux(c, prefix)
|
|
for x in leaves(top): yield x.val
|
|
|
|
iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string =
|
|
## Alias for `keysWithPrefix <#keysWithPrefix.i,CritBitTree[T],string>`_.
|
|
let top = allprefixedAux(c, prefix)
|
|
for x in leaves(top): yield x.key
|
|
|
|
iterator pairsWithPrefix*[T](c: CritBitTree[T],
|
|
prefix: string): tuple[key: string, val: T] =
|
|
## Yields all (key, value)-pairs of `c` starting with `prefix`.
|
|
##
|
|
## **See also:**
|
|
## * `mpairsWithPrefix iterator <#mpairsWithPrefix.i,CritBitTree[T],string>`_
|
|
runnableExamples:
|
|
from std/sequtils import toSeq
|
|
|
|
let c = {"key1": 42, "key2": 43}.toCritBitTree
|
|
doAssert toSeq(c.pairsWithPrefix("key")) == @[(key: "key1", val: 42), (key: "key2", val: 43)]
|
|
|
|
let top = allprefixedAux(c, prefix)
|
|
for x in leaves(top): yield (x.key, x.val)
|
|
|
|
iterator mpairsWithPrefix*[T](c: var CritBitTree[T],
|
|
prefix: string): tuple[key: string, val: var T] =
|
|
## Yields all (key, value)-pairs of `c` starting with `prefix`.
|
|
## The yielded values can be modified.
|
|
##
|
|
## **See also:**
|
|
## * `pairsWithPrefix iterator <#pairsWithPrefix.i,CritBitTree[T],string>`_
|
|
let top = allprefixedAux(c, prefix)
|
|
for x in leaves(top): yield (x.key, x.val)
|
|
|
|
func `$`*[T](c: CritBitTree[T]): string =
|
|
## Turns `c` into a string representation.
|
|
runnableExamples:
|
|
doAssert $CritBitTree[int].default == "{:}"
|
|
doAssert $toCritBitTree({"key1": 1, "key2": 2}) == """{"key1": 1, "key2": 2}"""
|
|
doAssert $CritBitTree[void].default == "{}"
|
|
doAssert $toCritBitTree(["key1", "key2"]) == """{"key1", "key2"}"""
|
|
|
|
if c.len == 0:
|
|
when T is void:
|
|
result = "{}"
|
|
else:
|
|
result = "{:}"
|
|
else:
|
|
# an educated guess is better than nothing:
|
|
when T is void:
|
|
const avgItemLen = 8
|
|
else:
|
|
const avgItemLen = 16
|
|
result = newStringOfCap(c.count * avgItemLen)
|
|
result.add("{")
|
|
when T is void:
|
|
for key in keys(c):
|
|
if result.len > 1: result.add(", ")
|
|
result.addQuoted(key)
|
|
else:
|
|
for key, val in pairs(c):
|
|
if result.len > 1: result.add(", ")
|
|
result.addQuoted(key)
|
|
result.add(": ")
|
|
result.addQuoted(val)
|
|
result.add("}")
|
|
|
|
func commonPrefixLen*[T](c: CritBitTree[T]): int {.inline, since((1, 3)).} =
|
|
## Returns the length of the longest common prefix of all keys in `c`.
|
|
## If `c` is empty, returns 0.
|
|
runnableExamples:
|
|
var c: CritBitTree[void]
|
|
doAssert c.commonPrefixLen == 0
|
|
incl(c, "key1")
|
|
doAssert c.commonPrefixLen == 4
|
|
incl(c, "key2")
|
|
doAssert c.commonPrefixLen == 3
|
|
|
|
if c.root != nil:
|
|
if c.root.isLeaf: len(c.root.key)
|
|
else: c.root.byte
|
|
else: 0
|
|
|
|
proc toCritBitTree*[T](pairs: openArray[(string, T)]): CritBitTree[T] {.since: (1, 3).} =
|
|
## Creates a new `CritBitTree` that contains the given `pairs`.
|
|
runnableExamples:
|
|
doAssert {"a": "0", "b": "1", "c": "2"}.toCritBitTree is CritBitTree[string]
|
|
doAssert {"a": 0, "b": 1, "c": 2}.toCritBitTree is CritBitTree[int]
|
|
|
|
for item in pairs: result.incl item[0], item[1]
|
|
|
|
proc toCritBitTree*(items: openArray[string]): CritBitTree[void] {.since: (1, 3).} =
|
|
## Creates a new `CritBitTree` that contains the given `items`.
|
|
runnableExamples:
|
|
doAssert ["a", "b", "c"].toCritBitTree is CritBitTree[void]
|
|
|
|
for item in items: result.incl item
|