From 980aa37bee30792bd8cd9cb082313f6633267840 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Thu, 24 Jun 2021 14:56:28 +0200 Subject: [PATCH] ZLIB: Another 10%+ faster. --- core/compress/common.odin | 5 ++++- core/compress/zlib/zlib.odin | 24 ++++++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/core/compress/common.odin b/core/compress/common.odin index e35365c81..df798e751 100644 --- a/core/compress/common.odin +++ b/core/compress/common.odin @@ -250,8 +250,11 @@ peek_back_byte :: #force_inline proc(cb: ^Code_Buffer, offset: i64) -> (res: u8, @(optimization_mode="speed") refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) { when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); } + + refill := u64(width); + for { - if cb.num_bits > u64(width) { + if cb.num_bits > refill { break; } if cb.code_buffer == 0 && cb.num_bits > 63 { diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin index 41578e16b..ce15ea147 100644 --- a/core/compress/zlib/zlib.odin +++ b/core/compress/zlib/zlib.odin @@ -23,6 +23,16 @@ import "core:hash" Returns: Error. */ +/* + Do we do Adler32 as we write bytes to output? + It used to be faster to do it inline, now it's faster to do it at the end of `inflate`. + + We'll see what's faster after more optimization, and might end up removing + `Context.rolling_hash` if not inlining it is still faster. + +*/ +INLINE_ADLER :: false; + Context :: compress.Context; Code_Buffer :: compress.Code_Buffer; @@ -135,7 +145,7 @@ write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); } c := c; buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1}; - z.rolling_hash = hash.adler32(buf, z.rolling_hash); + when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); } _, e := z.output->impl_write(buf); if e != .None { @@ -161,7 +171,7 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io. cb.last[z.bytes_written & cb.window_mask] = c; z.bytes_written += 1; } - z.rolling_hash = hash.adler32(buf, z.rolling_hash); + when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); } _, e := z.output->impl_write(buf); if e != .None { @@ -188,7 +198,7 @@ repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) -> buf[i] = c; z.bytes_written += 1; offset += 1; } - z.rolling_hash = hash.adler32(buf, z.rolling_hash); + when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); } _, e := z.output->impl_write(buf); if e != .None { @@ -458,8 +468,13 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont if !raw { compress.discard_to_next_byte_lsb(cb); - adler32 := compress.read_bits_lsb(ctx, cb, 8) << 24 | compress.read_bits_lsb(ctx, cb, 8) << 16 | compress.read_bits_lsb(ctx, cb, 8) << 8 | compress.read_bits_lsb(ctx, cb, 8); + + when !INLINE_ADLER { + buf := (^bytes.Buffer)(ctx.output.stream_data).buf[:]; + ctx.rolling_hash = hash.adler32(buf); + } + if ctx.rolling_hash != u32(adler32) { return E_General.Checksum_Failed; } @@ -643,6 +658,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont break; } } + return nil; }