added hash procs for handling portions of strings/arrays/seqs.

added tests at bottom of file
changed some doco layout

Makes hashing iteratively through buffers faster when you
don't have to pass copied portions of the buffer to the
hash function
This commit is contained in:
JamesP
2015-10-07 13:03:31 +10:00
parent 80ee72956a
commit 07eaafca69

View File

@@ -8,9 +8,10 @@
#
## This module implements efficient computations of hash values for diverse
## Nim types. All the procs are based on these two building blocks: the `!&
## proc <#!&>`_ used to start or mix a hash value, and the `!$ proc <#!$>`_
## used to *finish* the hash value. If you want to implement hash procs for
## Nim types. All the procs are based on these two building blocks:
## - `!& proc <#!&>`_ used to start or mix a hash value, and
## - `!$ proc <#!$>`_ used to *finish* the hash value.
## If you want to implement hash procs for
## your custom types you will end up writing the following kind of skeleton of
## code:
##
@@ -108,7 +109,7 @@ proc hash*(x: int): Hash {.inline.} =
result = x
proc hash*(x: int64): Hash {.inline.} =
## efficient hashing of integers
## efficient hashing of int64 integers
result = toU32(x)
proc hash*(x: char): Hash {.inline.} =
@@ -126,6 +127,16 @@ proc hash*(x: string): Hash =
h = h !& ord(x[i])
result = !$h
proc hash*(sBuf: string, sPos, ePos: int): Hash =
## efficient hashing of a string buffer, from starting
## position `sPos` to ending position `ePos`
##
## ``hash(myStr, 0, myStr.high)`` is equivalent to ``hash(myStr)``
var h: Hash = 0
for i in sPos..ePos:
h = h !& ord(sBuf[i])
result = !$h
proc hashIgnoreStyle*(x: string): Hash =
## efficient hashing of strings; style is ignored
var h: Hash = 0
@@ -145,6 +156,27 @@ proc hashIgnoreStyle*(x: string): Hash =
result = !$h
proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
## efficient hashing of a string buffer, from starting
## position `sPos` to ending position `ePos`; style is ignored
##
## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
## to ``hashIgnoreStyle(myBuf)``
var h: Hash = 0
var i = sPos
while i <= ePos:
var c = sBuf[i]
if c == '_':
inc(i)
elif isMagicIdentSeparatorRune(cstring(sBuf), i):
inc(i, magicIdentSeparatorRuneByteWidth)
else:
if c in {'A'..'Z'}:
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
h = h !& ord(c)
inc(i)
result = !$h
proc hashIgnoreCase*(x: string): Hash =
## efficient hashing of strings; case is ignored
var h: Hash = 0
@@ -155,7 +187,22 @@ proc hashIgnoreCase*(x: string): Hash =
h = h !& ord(c)
result = !$h
proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
## efficient hashing of a string buffer, from starting
## position `sPos` to ending position `ePos`; case is ignored
##
## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
## to ``hashIgnoreCase(myBuf)``
var h: Hash = 0
for i in sPos..ePos:
var c = sBuf[i]
if c in {'A'..'Z'}:
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
h = h !& ord(c)
result = !$h
proc hash*(x: float): Hash {.inline.} =
## efficient hashing of floats.
var y = x + 1.0
result = cast[ptr Hash](addr(y))[]
@@ -173,10 +220,29 @@ proc hash*[T: tuple](x: T): Hash =
result = !$result
proc hash*[A](x: openArray[A]): Hash =
## efficient hashing of arrays and sequences.
for it in items(x): result = result !& hash(it)
result = !$result
proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
## efficient hashing of portions of arrays and sequences.
##
## ``hash(myBuf, 0, myBuf.high)`` is equivalent to ``hash(myBuf)``
for i in sPos..ePos:
result = result !& hash(aBuf[i])
result = !$result
proc hash*[A](x: set[A]): Hash =
## efficient hashing of sets.
for it in items(x): result = result !& hash(it)
result = !$result
when isMainModule:
doAssert( hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) )
doAssert( hashIgnoreCase("aa bb aaaa1234") == hash("aa bb aaaa1234") )
doAssert( hashIgnoreStyle("aa bb aaaa1234") == hashIgnoreCase("aa bb aaaa1234") )
let xx = @['H','e','l','l','o']
let ss = "Hello"
doAssert( hash(xx) == hash(ss) )
doAssert( hash(xx) == hash(xx, 0, xx.high) )
doAssert( hash(ss) == hash(ss, 0, ss.high) )