From 3e6ec65dd97698a9a676ba18998848a59f0ab88e Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Fri, 15 Jul 2022 13:02:07 +0200 Subject: [PATCH] Fix murmur64a's tail handling. Also, split up the murmur64 proc into murmur64a and murmur64b as they're distinct hashes with their own output. --- core/hash/hash.odin | 196 +++++++++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 95 deletions(-) diff --git a/core/hash/hash.odin b/core/hash/hash.odin index 63708a096..870d6a638 100644 --- a/core/hash/hash.odin +++ b/core/hash/hash.odin @@ -172,108 +172,114 @@ murmur32 :: proc(data: []byte, seed := u32(0)) -> u32 { return h1 } +// See https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp#L96 @(optimization_mode="speed") -murmur64 :: proc(data: []byte, seed := u64(0x9747b28c)) -> u64 { - when size_of(int) == 8 { - m :: 0xc6a4a7935bd1e995 - r :: 47 +murmur64a :: proc(data: []byte, seed := u64(0x9747b28c)) -> u64 { + m :: 0xc6a4a7935bd1e995 + r :: 47 - h: u64 = seed ~ (u64(len(data)) * m) - data64 := mem.slice_ptr(cast(^u64)raw_data(data), len(data)/size_of(u64)) + h: u64 = seed ~ (u64(len(data)) * m) + data64 := mem.slice_data_cast([]u64, data) - for _, i in data64 { - k := data64[i] + for _, i in data64 { + k := data64[i] - k *= m - k ~= k>>r - k *= m + k *= m + k ~= k>>r + k *= m - h ~= k - h *= m - } - - switch len(data)&7 { - case 7: h ~= u64(data[6]) << 48; fallthrough - case 6: h ~= u64(data[5]) << 40; fallthrough - case 5: h ~= u64(data[4]) << 32; fallthrough - case 4: h ~= u64(data[3]) << 24; fallthrough - case 3: h ~= u64(data[2]) << 16; fallthrough - case 2: h ~= u64(data[1]) << 8; fallthrough - case 1: - h ~= u64(data[0]) - h *= m - } - - h ~= h>>r + h ~= k h *= m - h ~= h>>r - - return h - } else { - m :: 0x5bd1e995 - r :: 24 - - h1 := u32(seed) ~ u32(len(data)) - h2 := u32(seed) >> 32 - data32 := mem.slice_ptr(cast(^u32)raw_data(data), len(data)/size_of(u32)) - len := len(data) - i := 0 - - for len >= 8 { - k1, k2: u32 - k1 = data32[i]; i += 1 - k1 *= m - k1 ~= k1>>r - k1 *= m - h1 *= m - h1 ~= k1 - len -= 4 - - k2 = data32[i]; i += 1 - k2 *= m - k2 ~= k2>>r - k2 *= m - h2 *= m - h2 ~= k2 - len -= 4 - } - - if len >= 4 { - k1: u32 - k1 = data32[i]; i += 1 - k1 *= m - k1 ~= k1>>r - k1 *= m - h1 *= m - h1 ~= k1 - len -= 4 - } - - // TODO(bill): Fix this - #no_bounds_check data8 := mem.slice_to_bytes(data32[i:])[:3] - switch len { - case 3: - h2 ~= u32(data8[2]) << 16 - fallthrough - case 2: - h2 ~= u32(data8[1]) << 8 - fallthrough - case 1: - h2 ~= u32(data8[0]) - h2 *= m - } - - h1 ~= h2>>18 - h1 *= m - h2 ~= h1>>22 - h2 *= m - h1 ~= h2>>17 - h1 *= m - h2 ~= h1>>19 - h2 *= m - - return u64(h1)<<32 | u64(h2) } + + offset := len(data64) * size_of(u64) + + switch len(data)&7 { + case 7: h ~= u64(data[offset + 6]) << 48; fallthrough + case 6: h ~= u64(data[offset + 5]) << 40; fallthrough + case 5: h ~= u64(data[offset + 4]) << 32; fallthrough + case 4: h ~= u64(data[offset + 3]) << 24; fallthrough + case 3: h ~= u64(data[offset + 2]) << 16; fallthrough + case 2: h ~= u64(data[offset + 1]) << 8; fallthrough + case 1: + h ~= u64(data[offset + 0]) + h *= m + } + + h ~= h>>r + h *= m + h ~= h>>r + + return h +} + +// See https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp#L140 +@(optimization_mode="speed") +murmur64b :: proc(data: []byte, seed := u64(0x9747b28c)) -> u64 { + m :: 0x5bd1e995 + r :: 24 + + h1 := u32(seed) ~ u32(len(data)) + h2 := u32(seed) >> 32 + + data32 := mem.slice_ptr(cast(^u32)raw_data(data), len(data)/size_of(u32)) + len := len(data) + i := 0 + + for len >= 8 { + k1, k2: u32 + k1 = data32[i]; i += 1 + k1 *= m + k1 ~= k1>>r + k1 *= m + h1 *= m + h1 ~= k1 + len -= 4 + + k2 = data32[i]; i += 1 + k2 *= m + k2 ~= k2>>r + k2 *= m + h2 *= m + h2 ~= k2 + len -= 4 + } + + if len >= 4 { + k1: u32 + k1 = data32[i]; i += 1 + k1 *= m + k1 ~= k1>>r + k1 *= m + h1 *= m + h1 ~= k1 + len -= 4 + } + + // TODO(bill): Fix this + #no_bounds_check data8 := mem.slice_to_bytes(data32[i:])[:3] + switch len { + case 3: + h2 ~= u32(data8[2]) << 16 + fallthrough + case 2: + h2 ~= u32(data8[1]) << 8 + fallthrough + case 1: + h2 ~= u32(data8[0]) + h2 *= m + } + + h1 ~= h2>>18 + h1 *= m + h2 ~= h1>>22 + h2 *= m + h1 ~= h2>>17 + h1 *= m + h2 ~= h1>>19 + h2 *= m + + return u64(h1)<<32 | u64(h2) } @(optimization_mode="speed")