mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-20 14:25:23 +00:00
faster hashing (#11203)
* faster hashing * multibyte hashing for: * string and string slices * cstring * string, ignoring case * string, ignoring style * openArray of byte or char * address the review comments * use optimized version for all ints * add more tests * make it work in VM * put warnings about differences between CT and runtime * minor style tweaks
This commit is contained in:
@@ -53,6 +53,9 @@ type
|
||||
## always have a size of a power of two and can use the ``and``
|
||||
## operator instead of ``mod`` for truncation of the hash value.
|
||||
|
||||
const
|
||||
IntSize = sizeof(int)
|
||||
|
||||
proc `!&`*(h: Hash, val: int): Hash {.inline.} =
|
||||
## Mixes a hash value `h` with `val` to produce a new hash value.
|
||||
##
|
||||
@@ -136,6 +139,33 @@ proc hash*[T: Ordinal](x: T): Hash {.inline.} =
|
||||
## Efficient hashing of other ordinal types (e.g. enums).
|
||||
result = ord(x)
|
||||
|
||||
proc hash*(x: float): Hash {.inline.} =
|
||||
## Efficient hashing of floats.
|
||||
var y = x + 1.0
|
||||
result = cast[ptr Hash](addr(y))[]
|
||||
|
||||
template bytewiseHashing(result: Hash, x: typed, start, stop: int) =
|
||||
for i in start .. stop:
|
||||
result = result !& hash(x[i])
|
||||
result = !$result
|
||||
|
||||
template hashImpl(result: Hash, x: typed, start, stop: int) =
|
||||
let
|
||||
elementSize = sizeof(x[start])
|
||||
stepSize = IntSize div elementSize
|
||||
var i = start
|
||||
while i <= stop+1 - stepSize:
|
||||
var n = 0
|
||||
when nimvm:
|
||||
# we cannot cast in VM, so we do it manually
|
||||
for j in countdown(stepsize-1, 0):
|
||||
n = (n shl (8*elementSize)) or ord(x[i+j])
|
||||
else:
|
||||
n = cast[ptr Hash](unsafeAddr x[i])[]
|
||||
result = result !& n
|
||||
i += stepSize
|
||||
bytewiseHashing(result, x, i, stop) # hash the remaining elements and finish
|
||||
|
||||
proc hash*(x: string): Hash =
|
||||
## Efficient hashing of strings.
|
||||
##
|
||||
@@ -145,28 +175,16 @@ proc hash*(x: string): Hash =
|
||||
runnableExamples:
|
||||
doAssert hash("abracadabra") != hash("AbracadabrA")
|
||||
|
||||
var h: Hash = 0
|
||||
for i in 0..x.len-1:
|
||||
h = h !& ord(x[i])
|
||||
result = !$h
|
||||
hashImpl(result, x, 0, high(x))
|
||||
|
||||
proc hash*(x: cstring): Hash =
|
||||
## Efficient hashing of null-terminated strings.
|
||||
runnableExamples:
|
||||
doAssert hash(cstring"abracadabra") == hash("abracadabra")
|
||||
doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA")
|
||||
doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")
|
||||
|
||||
var h: Hash = 0
|
||||
var i = 0
|
||||
when defined(js):
|
||||
while i < x.len:
|
||||
h = h !& ord(x[i])
|
||||
inc i
|
||||
else:
|
||||
while x[i] != 0.char:
|
||||
h = h !& ord(x[i])
|
||||
inc i
|
||||
result = !$h
|
||||
hashImpl(result, x, 0, high(x))
|
||||
|
||||
proc hash*(sBuf: string, sPos, ePos: int): Hash =
|
||||
## Efficient hashing of a string buffer, from starting
|
||||
@@ -177,18 +195,18 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash =
|
||||
var a = "abracadabra"
|
||||
doAssert hash(a, 0, 3) == hash(a, 7, 10)
|
||||
|
||||
var h: Hash = 0
|
||||
for i in sPos..ePos:
|
||||
h = h !& ord(sBuf[i])
|
||||
result = !$h
|
||||
hashImpl(result, sBuf, sPos, ePos)
|
||||
|
||||
proc hashIgnoreStyle*(x: string): Hash =
|
||||
## Efficient hashing of strings; style is ignored.
|
||||
##
|
||||
## **Note:** This uses different hashing algorithm than `hash(string)`.
|
||||
##
|
||||
## See also:
|
||||
## * `hashIgnoreCase <#hashIgnoreCase,string>`_
|
||||
runnableExamples:
|
||||
doAssert hashIgnoreStyle("aBr_aCa_dAB_ra") == hash("abracadabra")
|
||||
doAssert hashIgnoreStyle("aBr_aCa_dAB_ra") == hashIgnoreStyle("abracadabra")
|
||||
doAssert hashIgnoreStyle("abcdefghi") != hash("abcdefghi")
|
||||
|
||||
var h: Hash = 0
|
||||
var i = 0
|
||||
@@ -202,13 +220,14 @@ proc hashIgnoreStyle*(x: string): Hash =
|
||||
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
|
||||
h = h !& ord(c)
|
||||
inc(i)
|
||||
|
||||
result = !$h
|
||||
|
||||
proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
|
||||
## Efficient hashing of a string buffer, from starting
|
||||
## position `sPos` to ending position `ePos` (included); style is ignored.
|
||||
##
|
||||
## **Note:** This uses different hashing algorithm than `hash(string)`.
|
||||
##
|
||||
## ``hashIgnoreStyle(myBuf, 0, myBuf.high)`` is equivalent
|
||||
## to ``hashIgnoreStyle(myBuf)``.
|
||||
runnableExamples:
|
||||
@@ -231,10 +250,13 @@ proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
|
||||
proc hashIgnoreCase*(x: string): Hash =
|
||||
## Efficient hashing of strings; case is ignored.
|
||||
##
|
||||
## **Note:** This uses different hashing algorithm than `hash(string)`.
|
||||
##
|
||||
## See also:
|
||||
## * `hashIgnoreStyle <#hashIgnoreStyle,string>`_
|
||||
runnableExamples:
|
||||
doAssert hashIgnoreCase("ABRAcaDABRA") == hashIgnoreCase("abRACAdabra")
|
||||
doAssert hashIgnoreCase("abcdefghi") != hash("abcdefghi")
|
||||
|
||||
var h: Hash = 0
|
||||
for i in 0..x.len-1:
|
||||
@@ -248,6 +270,8 @@ proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
|
||||
## Efficient hashing of a string buffer, from starting
|
||||
## position `sPos` to ending position `ePos` (included); case is ignored.
|
||||
##
|
||||
## **Note:** This uses different hashing algorithm than `hash(string)`.
|
||||
##
|
||||
## ``hashIgnoreCase(myBuf, 0, myBuf.high)`` is equivalent
|
||||
## to ``hashIgnoreCase(myBuf)``.
|
||||
runnableExamples:
|
||||
@@ -262,11 +286,6 @@ proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash =
|
||||
h = h !& ord(c)
|
||||
result = !$h
|
||||
|
||||
proc hash*(x: float): Hash {.inline.} =
|
||||
## Efficient hashing of floats.
|
||||
var y = x + 1.0
|
||||
result = cast[ptr Hash](addr(y))[]
|
||||
|
||||
|
||||
# Forward declarations before methods that hash containers. This allows
|
||||
# containers to contain other containers
|
||||
@@ -282,8 +301,10 @@ proc hash*[T: tuple](x: T): Hash =
|
||||
|
||||
proc hash*[A](x: openArray[A]): Hash =
|
||||
## Efficient hashing of arrays and sequences.
|
||||
for it in items(x): result = result !& hash(it)
|
||||
result = !$result
|
||||
when A is char|SomeInteger:
|
||||
hashImpl(result, x, 0, x.high)
|
||||
else:
|
||||
bytewiseHashing(result, x, 0, x.high)
|
||||
|
||||
proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
|
||||
## Efficient hashing of portions of arrays and sequences, from starting
|
||||
@@ -294,23 +315,55 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
|
||||
let a = [1, 2, 5, 1, 2, 6]
|
||||
doAssert hash(a, 0, 1) == hash(a, 3, 4)
|
||||
|
||||
for i in sPos..ePos:
|
||||
result = result !& hash(aBuf[i])
|
||||
result = !$result
|
||||
when A is char|SomeInteger:
|
||||
hashImpl(result, aBuf, sPos, ePos)
|
||||
else:
|
||||
bytewiseHashing(result, aBuf, sPos, ePos)
|
||||
|
||||
proc hash*[A](x: set[A]): Hash =
|
||||
## Efficient hashing of sets.
|
||||
for it in items(x): result = result !& hash(it)
|
||||
for it in items(x):
|
||||
result = result !& hash(it)
|
||||
result = !$result
|
||||
|
||||
|
||||
when isMainModule:
|
||||
doAssert( hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13) )
|
||||
doAssert( hash("aa bb aaaa1234") == hash(cstring("aa bb aaaa1234")) )
|
||||
doAssert( hashIgnoreCase("aA bb aAAa1234") == hash("aa bb aaaa1234") )
|
||||
doAssert( hashIgnoreStyle("aa_bb_AAaa1234") == hashIgnoreCase("aaBBAAAa1234") )
|
||||
let xx = @['H','e','l','l','o']
|
||||
let ss = "Hello"
|
||||
doAssert( hash(xx) == hash(ss) )
|
||||
doAssert( hash(xx) == hash(xx, 0, xx.high) )
|
||||
doAssert( hash(ss) == hash(ss, 0, ss.high) )
|
||||
block empty:
|
||||
var
|
||||
a = ""
|
||||
b = newSeq[char]()
|
||||
c = newSeq[int]()
|
||||
doAssert hash(a) == 0
|
||||
doAssert hash(b) == 0
|
||||
doAssert hash(c) == 0
|
||||
doAssert hashIgnoreCase(a) == 0
|
||||
doAssert hashIgnoreStyle(a) == 0
|
||||
block sameButDifferent:
|
||||
doAssert hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13)
|
||||
doAssert hash("aa bb aaaa1234") == hash(cstring"aa bb aaaa1234")
|
||||
doAssert hashIgnoreCase("aA bb aAAa1234") == hashIgnoreCase("aa bb aaaa1234")
|
||||
doAssert hashIgnoreStyle("aa_bb_AAaa1234") == hashIgnoreCase("aaBBAAAa1234")
|
||||
block smallSize: # no multibyte hashing
|
||||
let
|
||||
xx = @['H','e','l','l','o']
|
||||
ii = @[72, 101, 108, 108, 111]
|
||||
ss = "Hello"
|
||||
doAssert hash(xx) == hash(ii)
|
||||
doAssert hash(xx) == hash(ss)
|
||||
doAssert hash(xx) == hash(xx, 0, xx.high)
|
||||
doAssert hash(ss) == hash(ss, 0, ss.high)
|
||||
block largeSize: # longer than 8 characters, should trigger multibyte hashing
|
||||
let
|
||||
xx = @['H','e','l','l','o']
|
||||
xxl = @['H','e','l','l','o','w','e','e','n','s']
|
||||
ssl = "Helloweens"
|
||||
doAssert hash(xxl) == hash(ssl)
|
||||
doAssert hash(xxl) == hash(xxl, 0, xxl.high)
|
||||
doAssert hash(ssl) == hash(ssl, 0, ssl.high)
|
||||
doAssert hash(xx) == hash(xxl, 0, 4)
|
||||
block misc:
|
||||
let
|
||||
a = [1'u8, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4]
|
||||
b = [1'i8, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4]
|
||||
doAssert hash(a) == hash(b)
|
||||
doAssert hash(a, 2, 5) == hash(b, 2, 5)
|
||||
|
||||
Reference in New Issue
Block a user