[xxhash] Add tests for large inputs

Test XXH32, XXH64, XXH3-64 and XXH3-128 for large inputs, with both all-at-once and streaming APIs.

XXH32_create_state and XXH64_create_state now implicitly call their "reset state" variants to simplify the streaming API to 3 steps:
- create state / defer destroy
- update
- digest (finalize)

These are tested with an array of 1, 2, 4, 8 and 16 megabytes worth of zeroes.
All return the same hashes as do both the one-shot version, as well as that of the official xxhsum tool.

3778/3778 tests successful.
This commit is contained in:
Jeroen van Rijn
2022-05-02 17:51:39 +02:00
parent 8023c8abc7
commit 6985181961
4 changed files with 158 additions and 4 deletions

View File

@@ -197,6 +197,7 @@ XXH32 :: proc(input: []u8, seed := XXH32_DEFAULT_SEED) -> (digest: XXH32_hash) {
*/
XXH32_create_state :: proc(allocator := context.allocator) -> (res: ^XXH32_state, err: Error) {
state := new(XXH32_state, allocator)
XXH32_reset_state(state)
return state, .None if state != nil else .Error
}
@@ -258,7 +259,7 @@ XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
v3 := state.v3
v4 := state.v4
for len(buf) >= 15 {
for len(buf) >= 16 {
#no_bounds_check v1 = XXH32_round(v1, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
#no_bounds_check v2 = XXH32_round(v2, XXH32_read32(buf, .Unaligned)); buf = buf[4:]
#no_bounds_check v3 = XXH32_round(v3, XXH32_read32(buf, .Unaligned)); buf = buf[4:]

View File

@@ -163,6 +163,7 @@ XXH64 :: proc(input: []u8, seed := XXH64_DEFAULT_SEED) -> (digest: XXH64_hash) {
*/
XXH64_create_state :: proc(allocator := context.allocator) -> (res: ^XXH64_state, err: Error) {
state := new(XXH64_state, allocator)
XXH64_reset_state(state)
return state, .None if state != nil else .Error
}

View File

@@ -31,8 +31,10 @@ when ODIN_TEST {
main :: proc() {
t := testing.T{}
test_benchmark_runner(&t)
test_xxhash_vectors(&t)
test_crc64_vectors(&t)
test_xxhash_vectors(&t)
test_xxhash_large(&t)
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
if TEST_fail > 0 {
os.exit(1)
@@ -191,6 +193,88 @@ test_benchmark_runner :: proc(t: ^testing.T) {
benchmark_print(name, options)
}
@test
test_xxhash_large :: proc(t: ^testing.T) {
many_zeroes := make([]u8, 16 * 1024 * 1024)
defer delete(many_zeroes)
// All at once.
for i, v in ZERO_VECTORS {
b := many_zeroes[:i]
xxh32 := xxhash.XXH32(b)
xxh64 := xxhash.XXH64(b)
xxh3_64 := xxhash.XXH3_64(b)
xxh3_128 := xxhash.XXH3_128(b)
xxh32_error := fmt.tprintf("[ XXH32(%03d) ] Expected: %08x. Got: %08x.", i, v.xxh_32, xxh32)
xxh64_error := fmt.tprintf("[ XXH64(%03d) ] Expected: %16x. Got: %16x.", i, v.xxh_64, xxh64)
xxh3_64_error := fmt.tprintf("[XXH3_64(%03d) ] Expected: %16x. Got: %16x.", i, v.xxh3_64, xxh3_64)
xxh3_128_error := fmt.tprintf("[XXH3_128(%03d) ] Expected: %32x. Got: %32x.", i, v.xxh3_128, xxh3_128)
expect(t, xxh32 == v.xxh_32, xxh32_error)
expect(t, xxh64 == v.xxh_64, xxh64_error)
expect(t, xxh3_64 == v.xxh3_64, xxh3_64_error)
expect(t, xxh3_128 == v.xxh3_128, xxh3_128_error)
}
// Streamed
for i, v in ZERO_VECTORS {
b := many_zeroes[:i]
bytes_per_update := []int{1, 42, 13, 7, 16, 5, 23, 74, 1024, 511, 1023, 47}
update_size_idx: int
xxh_32_state, xxh_32_err := xxhash.XXH32_create_state()
defer xxhash.XXH32_destroy_state(xxh_32_state)
expect(t, xxh_32_err == nil, "Problem initializing XXH_32 state.")
xxh_64_state, xxh_64_err := xxhash.XXH64_create_state()
defer xxhash.XXH64_destroy_state(xxh_64_state)
expect(t, xxh_64_err == nil, "Problem initializing XXH_64 state.")
xxh3_64_state, xxh3_64_err := xxhash.XXH3_create_state()
defer xxhash.XXH3_destroy_state(xxh3_64_state)
expect(t, xxh3_64_err == nil, "Problem initializing XXH3_64 state.")
xxh3_128_state, xxh3_128_err := xxhash.XXH3_create_state()
defer xxhash.XXH3_destroy_state(xxh3_128_state)
expect(t, xxh3_128_err == nil, "Problem initializing XXH3_128 state.")
// XXH3_128_update
for len(b) > 0 {
update_size := min(len(b), bytes_per_update[update_size_idx % len(bytes_per_update)])
update_size_idx += 1
xxhash.XXH32_update (xxh_32_state, b[:update_size])
xxhash.XXH64_update (xxh_64_state, b[:update_size])
xxhash.XXH3_64_update (xxh3_64_state, b[:update_size])
xxhash.XXH3_128_update(xxh3_128_state, b[:update_size])
b = b[update_size:]
}
// Now finalize
xxh32 := xxhash.XXH32_digest(xxh_32_state)
xxh64 := xxhash.XXH64_digest(xxh_64_state)
xxh3_64 := xxhash.XXH3_64_digest(xxh3_64_state)
xxh3_128 := xxhash.XXH3_128_digest(xxh3_128_state)
xxh32_error := fmt.tprintf("[ XXH32(%03d) ] Expected: %08x. Got: %08x.", i, v.xxh_32, xxh32)
xxh64_error := fmt.tprintf("[ XXH64(%03d) ] Expected: %16x. Got: %16x.", i, v.xxh_64, xxh64)
xxh3_64_error := fmt.tprintf("[XXH3_64(%03d) ] Expected: %16x. Got: %16x.", i, v.xxh3_64, xxh3_64)
xxh3_128_error := fmt.tprintf("[XXH3_128(%03d) ] Expected: %32x. Got: %32x.", i, v.xxh3_128, xxh3_128)
expect(t, xxh32 == v.xxh_32, xxh32_error)
expect(t, xxh64 == v.xxh_64, xxh64_error)
expect(t, xxh3_64 == v.xxh3_64, xxh3_64_error)
expect(t, xxh3_128 == v.xxh3_128, xxh3_128_error)
}
}
@test
test_xxhash_vectors :: proc(t: ^testing.T) {
fmt.println("Verifying against XXHASH_TEST_VECTOR_SEEDED:")

View File

@@ -3,7 +3,7 @@
*/
package test_core_hash
XXHASH_Test_Vectors_With_Seed :: struct #packed {
XXHASH_Test_Vectors :: struct #packed {
/*
Old hashes
*/
@@ -17,7 +17,75 @@ XXHASH_Test_Vectors_With_Seed :: struct #packed {
xxh3_128: u128,
}
XXHASH_TEST_VECTOR_SEEDED := map[u64][257]XXHASH_Test_Vectors_With_Seed{
ZERO_VECTORS := map[int]XXHASH_Test_Vectors{
1024 * 1024 = {
/*
Old hashes
*/
xxh_32 = 0x9430f97f, // xxhsum -H0
xxh_64 = 0x87d2a1b6e1163ef1, // xxhsum -H1
/*
XXH3 hashes
*/
xxh3_128 = 0xb6ef17a3448492b6918780b90550bf34, // xxhsum -H2
xxh3_64 = 0x918780b90550bf34, // xxhsum -H3
},
1024 * 2048 = {
/*
Old hashes
*/
xxh_32 = 0xeeb74ca1, // xxhsum -H0
xxh_64 = 0xeb8a7322f88e23db, // xxhsum -H1
/*
XXH3 hashes
*/
xxh3_128 = 0x7b3e6abe1456fd0094e26d8e04364852, // xxhsum -H2
xxh3_64 = 0x94e26d8e04364852, // xxhsum -H3
},
1024 * 4096 = {
/*
Old hashes
*/
xxh_32 = 0xa59010b8, // xxhsum -H0
xxh_64 = 0x639f9e1a7cbc9d28, // xxhsum -H1
/*
XXH3 hashes
*/
xxh3_128 = 0x34001ae2f947e773165f453a5f35c459, // xxhsum -H2
xxh3_64 = 0x165f453a5f35c459, // xxhsum -H3
},
1024 * 8192 = {
/*
Old hashes
*/
xxh_32 = 0xfed1d084, // xxhsum -H0
xxh_64 = 0x86823cbc61f6df0f, // xxhsum -H1
/*
XXH3 hashes
*/
xxh3_128 = 0x9d6bf1a4e92df02ce881a25e37e37b19, // xxhsum -H2
xxh3_64 = 0xe881a25e37e37b19, // xxhsum -H3
},
1024 * 16384 = {
/*
Old hashes
*/
xxh_32 = 0x0ee4ebf9, // xxhsum -H0
xxh_64 = 0x412f1e415ee2d80b, // xxhsum -H1
/*
XXH3 hashes
*/
xxh3_128 = 0x14d914cac1f4c1b1c4979470a1b529a1, // xxhsum -H2
xxh3_64 = 0xc4979470a1b529a1, // xxhsum -H3
},
}
XXHASH_TEST_VECTOR_SEEDED := map[u64][257]XXHASH_Test_Vectors{
0 = {
{ // Length: 000
/* XXH32 with seed */ 0x02cc5d05,