Files
Nim/lib/pure/hashes.nim
JamesP 07eaafca69 added hash procs for handling portions of strings/arrays/seqs.
added tests at bottom of file
changed some doco layout

Makes hashing iteratively through buffers faster when you
don't have to pass copied portions of the buffer to the
hash function
2015-10-07 13:03:31 +10:00

249 lines
7.1 KiB
Nim

#
#
# Nim's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements efficient computations of hash values for diverse
## Nim types. All the procs are based on these two building blocks:
## - `!& proc <#!&>`_ used to start or mix a hash value, and
## - `!$ proc <#!$>`_ used to *finish* the hash value.
## If you want to implement hash procs for
## your custom types you will end up writing the following kind of skeleton of
## code:
##
## .. code-block:: Nim
## proc hash(x: Something): Hash =
## ## Computes a Hash from `x`.
## var h: Hash = 0
## # Iterate over parts of `x`.
## for xAtom in x:
## # Mix the atom with the partial hash.
## h = h !& xAtom
## # Finish the hash.
## result = !$h
##
## If your custom types contain fields for which there already is a hash proc,
## like for example objects made up of ``strings``, you can simply hash
## together the hash value of the individual fields:
##
## .. code-block:: Nim
## proc hash(x: Something): Hash =
## ## Computes a Hash from `x`.
## var h: Hash = 0
## h = h !& hash(x.foo)
## h = h !& hash(x.bar)
## result = !$h
import
strutils, etcpriv
type
Hash* = int ## a hash value; hash tables using these values should
## always have a size of a power of two and can use the ``and``
## operator instead of ``mod`` for truncation of the hash value.
{.deprecated: [THash: Hash].}
proc `!&`*(h: Hash, val: int): Hash {.inline.} =
## mixes a hash value `h` with `val` to produce a new hash value. This is
## only needed if you need to implement a hash proc for a new datatype.
result = h +% val
result = result +% result shl 10
result = result xor (result shr 6)
proc `!$`*(h: Hash): Hash {.inline.} =
## finishes the computation of the hash value. This is
## only needed if you need to implement a hash proc for a new datatype.
result = h +% h shl 3
result = result xor (result shr 11)
result = result +% result shl 15
proc hashData*(data: pointer, size: int): Hash =
## hashes an array of bytes of size `size`
var h: Hash = 0
when defined(js):
var p: cstring
asm """`p` = `Data`;"""
else:
var p = cast[cstring](data)
var i = 0
var s = size
while s > 0:
h = h !& ord(p[i])
inc(i)
dec(s)
result = !$h
when defined(js):
var objectID = 0
proc hash*(x: pointer): Hash {.inline.} =
## efficient hashing of pointers
when defined(js):
asm """
if (typeof `x` == "object") {
if ("_NimID" in `x`)
`result` = `x`["_NimID"];
else {
`result` = ++`objectID`;
`x`["_NimID"] = `result`;
}
}
"""
else:
result = (cast[Hash](x)) shr 3 # skip the alignment
when not defined(booting):
proc hash*[T: proc](x: T): Hash {.inline.} =
## efficient hashing of proc vars; closures are supported too.
when T is "closure":
result = hash(rawProc(x)) !& hash(rawEnv(x))
else:
result = hash(pointer(x))
proc hash*(x: int): Hash {.inline.} =
## efficient hashing of integers
result = x
proc hash*(x: int64): Hash {.inline.} =
## efficient hashing of int64 integers
result = toU32(x)
proc hash*(x: char): Hash {.inline.} =
## efficient hashing of characters
result = ord(x)
proc hash*[T: Ordinal](x: T): Hash {.inline.} =
## efficient hashing of other ordinal types (e.g., enums)
result = ord(x)
proc hash*(x: string): Hash =
## efficient hashing of strings
var h: Hash = 0
for i in 0..x.len-1:
h = h !& ord(x[i])
result = !$h
proc hash*(sBuf: string, sPos, ePos: int): Hash =
## efficient hashing of a string buffer, from starting
## position `sPos` to ending position `ePos`
##
## ``hash(myStr, 0, myStr.high)`` is equivalent to ``hash(myStr)``
var h: Hash = 0
for i in sPos..ePos:
h = h !& ord(sBuf[i])
result = !$h
proc hashIgnoreStyle*(x: string): Hash =
## efficient hashing of strings; style is ignored
var h: Hash = 0
var i = 0
let xLen = x.len
while i < xLen:
var c = x[i]
if c == '_':
inc(i)
elif isMagicIdentSeparatorRune(cstring(x), i):
inc(i, magicIdentSeparatorRuneByteWidth)
else:
if c in {'A'..'Z'}:
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
h = h !& ord(c)
inc(i)
result = !$h
proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
## efficient hashing of a string buffer, from starting
## position `sPos` to ending position `ePos`; style is ignored
##
## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
## to ``hashIgnoreStyle(myBuf)``
var h: Hash = 0
var i = sPos
while i <= ePos:
var c = sBuf[i]
if c == '_':
inc(i)
elif isMagicIdentSeparatorRune(cstring(sBuf), i):
inc(i, magicIdentSeparatorRuneByteWidth)
else:
if c in {'A'..'Z'}:
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
h = h !& ord(c)
inc(i)
result = !$h
proc hashIgnoreCase*(x: string): Hash =
## efficient hashing of strings; case is ignored
var h: Hash = 0
for i in 0..x.len-1:
var c = x[i]
if c in {'A'..'Z'}:
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
h = h !& ord(c)
result = !$h
proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
## efficient hashing of a string buffer, from starting
## position `sPos` to ending position `ePos`; case is ignored
##
## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
## to ``hashIgnoreCase(myBuf)``
var h: Hash = 0
for i in sPos..ePos:
var c = sBuf[i]
if c in {'A'..'Z'}:
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
h = h !& ord(c)
result = !$h
proc hash*(x: float): Hash {.inline.} =
## efficient hashing of floats.
var y = x + 1.0
result = cast[ptr Hash](addr(y))[]
# Forward declarations before methods that hash containers. This allows
# containers to contain other containers
proc hash*[A](x: openArray[A]): Hash
proc hash*[A](x: set[A]): Hash
proc hash*[T: tuple](x: T): Hash =
## efficient hashing of tuples.
for f in fields(x):
result = result !& hash(f)
result = !$result
proc hash*[A](x: openArray[A]): Hash =
## efficient hashing of arrays and sequences.
for it in items(x): result = result !& hash(it)
result = !$result
proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
## efficient hashing of portions of arrays and sequences.
##
## ``hash(myBuf, 0, myBuf.high)`` is equivalent to ``hash(myBuf)``
for i in sPos..ePos:
result = result !& hash(aBuf[i])
result = !$result
proc hash*[A](x: set[A]): Hash =
## efficient hashing of sets.
for it in items(x): result = result !& hash(it)
result = !$result
when isMainModule:
doAssert( hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) )
doAssert( hashIgnoreCase("aa bb aaaa1234") == hash("aa bb aaaa1234") )
doAssert( hashIgnoreStyle("aa bb aaaa1234") == hashIgnoreCase("aa bb aaaa1234") )
let xx = @['H','e','l','l','o']
let ss = "Hello"
doAssert( hash(xx) == hash(ss) )
doAssert( hash(xx) == hash(xx, 0, xx.high) )
doAssert( hash(ss) == hash(ss, 0, ss.high) )