mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 01:14:41 +00:00
added tests at bottom of file changed some doco layout Makes hashing iteratively through buffers faster when you don't have to pass copied portions of the buffer to the hash function
249 lines
7.1 KiB
Nim
249 lines
7.1 KiB
Nim
#
|
|
#
|
|
# Nim's Runtime Library
|
|
# (c) Copyright 2012 Andreas Rumpf
|
|
#
|
|
# See the file "copying.txt", included in this
|
|
# distribution, for details about the copyright.
|
|
#
|
|
|
|
## This module implements efficient computations of hash values for diverse
|
|
## Nim types. All the procs are based on these two building blocks:
|
|
## - `!& proc <#!&>`_ used to start or mix a hash value, and
|
|
## - `!$ proc <#!$>`_ used to *finish* the hash value.
|
|
## If you want to implement hash procs for
|
|
## your custom types you will end up writing the following kind of skeleton of
|
|
## code:
|
|
##
|
|
## .. code-block:: Nim
|
|
## proc hash(x: Something): Hash =
|
|
## ## Computes a Hash from `x`.
|
|
## var h: Hash = 0
|
|
## # Iterate over parts of `x`.
|
|
## for xAtom in x:
|
|
## # Mix the atom with the partial hash.
|
|
## h = h !& xAtom
|
|
## # Finish the hash.
|
|
## result = !$h
|
|
##
|
|
## If your custom types contain fields for which there already is a hash proc,
|
|
## like for example objects made up of ``strings``, you can simply hash
|
|
## together the hash value of the individual fields:
|
|
##
|
|
## .. code-block:: Nim
|
|
## proc hash(x: Something): Hash =
|
|
## ## Computes a Hash from `x`.
|
|
## var h: Hash = 0
|
|
## h = h !& hash(x.foo)
|
|
## h = h !& hash(x.bar)
|
|
## result = !$h
|
|
|
|
import
|
|
strutils, etcpriv
|
|
|
|
type
|
|
Hash* = int ## a hash value; hash tables using these values should
|
|
## always have a size of a power of two and can use the ``and``
|
|
## operator instead of ``mod`` for truncation of the hash value.
|
|
{.deprecated: [THash: Hash].}
|
|
|
|
proc `!&`*(h: Hash, val: int): Hash {.inline.} =
|
|
## mixes a hash value `h` with `val` to produce a new hash value. This is
|
|
## only needed if you need to implement a hash proc for a new datatype.
|
|
result = h +% val
|
|
result = result +% result shl 10
|
|
result = result xor (result shr 6)
|
|
|
|
proc `!$`*(h: Hash): Hash {.inline.} =
|
|
## finishes the computation of the hash value. This is
|
|
## only needed if you need to implement a hash proc for a new datatype.
|
|
result = h +% h shl 3
|
|
result = result xor (result shr 11)
|
|
result = result +% result shl 15
|
|
|
|
proc hashData*(data: pointer, size: int): Hash =
|
|
## hashes an array of bytes of size `size`
|
|
var h: Hash = 0
|
|
when defined(js):
|
|
var p: cstring
|
|
asm """`p` = `Data`;"""
|
|
else:
|
|
var p = cast[cstring](data)
|
|
var i = 0
|
|
var s = size
|
|
while s > 0:
|
|
h = h !& ord(p[i])
|
|
inc(i)
|
|
dec(s)
|
|
result = !$h
|
|
|
|
when defined(js):
|
|
var objectID = 0
|
|
|
|
proc hash*(x: pointer): Hash {.inline.} =
|
|
## efficient hashing of pointers
|
|
when defined(js):
|
|
asm """
|
|
if (typeof `x` == "object") {
|
|
if ("_NimID" in `x`)
|
|
`result` = `x`["_NimID"];
|
|
else {
|
|
`result` = ++`objectID`;
|
|
`x`["_NimID"] = `result`;
|
|
}
|
|
}
|
|
"""
|
|
else:
|
|
result = (cast[Hash](x)) shr 3 # skip the alignment
|
|
|
|
when not defined(booting):
|
|
proc hash*[T: proc](x: T): Hash {.inline.} =
|
|
## efficient hashing of proc vars; closures are supported too.
|
|
when T is "closure":
|
|
result = hash(rawProc(x)) !& hash(rawEnv(x))
|
|
else:
|
|
result = hash(pointer(x))
|
|
|
|
proc hash*(x: int): Hash {.inline.} =
|
|
## efficient hashing of integers
|
|
result = x
|
|
|
|
proc hash*(x: int64): Hash {.inline.} =
|
|
## efficient hashing of int64 integers
|
|
result = toU32(x)
|
|
|
|
proc hash*(x: char): Hash {.inline.} =
|
|
## efficient hashing of characters
|
|
result = ord(x)
|
|
|
|
proc hash*[T: Ordinal](x: T): Hash {.inline.} =
|
|
## efficient hashing of other ordinal types (e.g., enums)
|
|
result = ord(x)
|
|
|
|
proc hash*(x: string): Hash =
|
|
## efficient hashing of strings
|
|
var h: Hash = 0
|
|
for i in 0..x.len-1:
|
|
h = h !& ord(x[i])
|
|
result = !$h
|
|
|
|
proc hash*(sBuf: string, sPos, ePos: int): Hash =
|
|
## efficient hashing of a string buffer, from starting
|
|
## position `sPos` to ending position `ePos`
|
|
##
|
|
## ``hash(myStr, 0, myStr.high)`` is equivalent to ``hash(myStr)``
|
|
var h: Hash = 0
|
|
for i in sPos..ePos:
|
|
h = h !& ord(sBuf[i])
|
|
result = !$h
|
|
|
|
proc hashIgnoreStyle*(x: string): Hash =
|
|
## efficient hashing of strings; style is ignored
|
|
var h: Hash = 0
|
|
var i = 0
|
|
let xLen = x.len
|
|
while i < xLen:
|
|
var c = x[i]
|
|
if c == '_':
|
|
inc(i)
|
|
elif isMagicIdentSeparatorRune(cstring(x), i):
|
|
inc(i, magicIdentSeparatorRuneByteWidth)
|
|
else:
|
|
if c in {'A'..'Z'}:
|
|
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
|
|
h = h !& ord(c)
|
|
inc(i)
|
|
|
|
result = !$h
|
|
|
|
proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
|
|
## efficient hashing of a string buffer, from starting
|
|
## position `sPos` to ending position `ePos`; style is ignored
|
|
##
|
|
## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
|
|
## to ``hashIgnoreStyle(myBuf)``
|
|
var h: Hash = 0
|
|
var i = sPos
|
|
while i <= ePos:
|
|
var c = sBuf[i]
|
|
if c == '_':
|
|
inc(i)
|
|
elif isMagicIdentSeparatorRune(cstring(sBuf), i):
|
|
inc(i, magicIdentSeparatorRuneByteWidth)
|
|
else:
|
|
if c in {'A'..'Z'}:
|
|
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
|
|
h = h !& ord(c)
|
|
inc(i)
|
|
result = !$h
|
|
|
|
proc hashIgnoreCase*(x: string): Hash =
|
|
## efficient hashing of strings; case is ignored
|
|
var h: Hash = 0
|
|
for i in 0..x.len-1:
|
|
var c = x[i]
|
|
if c in {'A'..'Z'}:
|
|
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
|
|
h = h !& ord(c)
|
|
result = !$h
|
|
|
|
proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
|
|
## efficient hashing of a string buffer, from starting
|
|
## position `sPos` to ending position `ePos`; case is ignored
|
|
##
|
|
## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
|
|
## to ``hashIgnoreCase(myBuf)``
|
|
var h: Hash = 0
|
|
for i in sPos..ePos:
|
|
var c = sBuf[i]
|
|
if c in {'A'..'Z'}:
|
|
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
|
|
h = h !& ord(c)
|
|
result = !$h
|
|
|
|
proc hash*(x: float): Hash {.inline.} =
|
|
## efficient hashing of floats.
|
|
var y = x + 1.0
|
|
result = cast[ptr Hash](addr(y))[]
|
|
|
|
|
|
# Forward declarations before methods that hash containers. This allows
|
|
# containers to contain other containers
|
|
proc hash*[A](x: openArray[A]): Hash
|
|
proc hash*[A](x: set[A]): Hash
|
|
|
|
|
|
proc hash*[T: tuple](x: T): Hash =
|
|
## efficient hashing of tuples.
|
|
for f in fields(x):
|
|
result = result !& hash(f)
|
|
result = !$result
|
|
|
|
proc hash*[A](x: openArray[A]): Hash =
|
|
## efficient hashing of arrays and sequences.
|
|
for it in items(x): result = result !& hash(it)
|
|
result = !$result
|
|
|
|
proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
|
|
## efficient hashing of portions of arrays and sequences.
|
|
##
|
|
## ``hash(myBuf, 0, myBuf.high)`` is equivalent to ``hash(myBuf)``
|
|
for i in sPos..ePos:
|
|
result = result !& hash(aBuf[i])
|
|
result = !$result
|
|
|
|
proc hash*[A](x: set[A]): Hash =
|
|
## efficient hashing of sets.
|
|
for it in items(x): result = result !& hash(it)
|
|
result = !$result
|
|
|
|
when isMainModule:
|
|
doAssert( hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) )
|
|
doAssert( hashIgnoreCase("aa bb aaaa1234") == hash("aa bb aaaa1234") )
|
|
doAssert( hashIgnoreStyle("aa bb aaaa1234") == hashIgnoreCase("aa bb aaaa1234") )
|
|
let xx = @['H','e','l','l','o']
|
|
let ss = "Hello"
|
|
doAssert( hash(xx) == hash(ss) )
|
|
doAssert( hash(xx) == hash(xx, 0, xx.high) )
|
|
doAssert( hash(ss) == hash(ss, 0, ss.high) )
|