From 40a12cca53dcecd73740bb4cb704cee25189cfc1 Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sat, 26 Jun 2021 13:17:14 +0200
Subject: [PATCH 1/9] ZLIB: If output size is known, reserve that much.

---
 core/compress/common.odin       | 54 ++++++++++++++++++++--
 core/compress/gzip/example.odin |  2 +-
 core/compress/gzip/gzip.odin    | 80 ++++++++++++++++++++++++++++-----
 core/compress/zlib/example.odin |  6 ++-
 core/compress/zlib/zlib.odin    | 52 ++++++++++++++++-----
 5 files changed, 166 insertions(+), 28 deletions(-)

diff --git a/core/compress/common.odin b/core/compress/common.odin
index df798e751..a6ae230e5 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -11,6 +11,39 @@ package compress
 import "core:io"
 import "core:image"
 
+/*
+	These settings bound how much compression algorithms will allocate for their output buffer.
+	If streaming their output, these are unnecessary and will be ignored.
+
+*/
+
+/*
+	When a decompression routine doesn't stream its output, but writes to a buffer,
+	we pre-allocate an output buffer to speed up decompression. The default is 1 MiB.
+*/
+COMPRESS_OUTPUT_ALLOCATE_MIN :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MIN, 1 << 20));
+
+/*
+	This bounds the maximum a buffer will resize to as needed, or the maximum we'll
+	pre-allocate if you inform the decompression routine you know the payload size.
+
+	For reference, the largest payload size of a GZIP file is 4 GiB.
+
+*/
+when size_of(uintptr) == 8 {
+	/*
+		For 64-bit platforms, we set the default max buffer size to 4 GiB,
+		which is GZIP and PKZIP's max payload size.
+	*/	
+	COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 32));
+} else {
+	/*
+		For 32-bit platforms, we set the default max buffer size to 512 MiB.
+	*/
+	COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29));
+}
+
+
 // when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
 
 Error :: union {
@@ -46,6 +79,20 @@ GZIP_Error :: enum {
 	Comment_Too_Long,
 	Payload_Length_Invalid,
 	Payload_CRC_Invalid,
+
+	/*
+		GZIP's payload can be a maximum of max(u32le), or 4 GiB.
+		If you tell it you expect it to contain more, that's obviously an error.
+	*/
+	Payload_Size_Exceeds_Max_Payload,
+	/*
+		For buffered instead of streamed output, the payload size can't exceed
+		the max set by the `COMPRESS_OUTPUT_ALLOCATE_MAX` switch in compress/common.odin.
+
+		You can tweak this setting using `-define:COMPRESS_OUTPUT_ALLOCATE_MAX=size_in_bytes`
+	*/
+	Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX,
+
 }
 
 ZIP_Error :: enum {
@@ -79,7 +126,7 @@ Context :: struct #packed {
 	input_data:        []u8,
 
 	output:            io.Stream,
-	output_buf:        [dynamic]u8,
+	output_buf:        ^[dynamic]u8,
 	bytes_written:     i64,
 
 	/*
@@ -103,9 +150,10 @@ Context :: struct #packed {
 	*/
 	input_fully_in_memory: b8,
 	input_refills_from_stream: b8,
-	reserved_flags: [2]b8,
+	output_to_stream: b8,
+	reserved_flag: b8,
 }
-#assert(size_of(Context) == 128);
+// #assert(size_of(Context) == 128);
 
 /*
 	Compression algorithm context
diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin
index b4fc50ade..bfb4267b8 100644
--- a/core/compress/gzip/example.odin
+++ b/core/compress/gzip/example.odin
@@ -45,7 +45,7 @@ main :: proc() {
 
 	if len(args) < 2 {
 		stderr("No input file specified.\n");
-		err := load(TEST, &buf);
+		err := load(slice=TEST, buf=&buf, known_gzip_size=len(TEST));
 		if err == nil {
 			stdout("Displaying test vector: ");
 			stdout(bytes.buffer_to_string(&buf));
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index a9f833ae4..4d185fa6e 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -21,6 +21,8 @@ import "core:io"
 import "core:bytes"
 import "core:hash"
 
+// import "core:fmt"
+
 Magic :: enum u16le {
 	GZIP = 0x8b << 8 | 0x1f,
 }
@@ -99,7 +101,9 @@ E_GZIP    :: compress.GZIP_Error;
 E_ZLIB    :: compress.ZLIB_Error;
 E_Deflate :: compress.Deflate_Error;
 
-load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+GZIP_MAX_PAYLOAD_SIZE :: int(max(u32le));
+
+load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 
 	r := bytes.Reader{};
 	bytes.reader_init(&r, slice);
@@ -111,33 +115,47 @@ load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.al
 		input_fully_in_memory = true,
 		input_refills_from_stream = true,
 	};
-	err = load_from_stream(ctx, buf, allocator);
+
+	err = load_from_stream(ctx, buf, known_gzip_size, expected_output_size, allocator);
 
 	return err;
 }
 
-load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	data, ok := os.read_entire_file(filename, allocator);
 	defer delete(data);
 
 	err = E_General.File_Not_Found;
 	if ok {
-		err = load_from_slice(data, buf, allocator);
+		err = load_from_slice(data, buf, len(data), expected_output_size, allocator);
 	}
 	return;
 }
 
-load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	buf := buf;
+	expected_output_size := expected_output_size;
+
+	input_data_consumed := 0;
+
 	ws := bytes.buffer_to_stream(buf);
 	ctx.output = ws;
 
+	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
+		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
+	}
+
+	if expected_output_size > compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
+		return E_GZIP.Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX;
+	}
+
 	b: []u8;
 
 	header, e := compress.read_data(ctx, Header);
 	if e != .None {
 		return E_General.File_Too_Short;
 	}
+	input_data_consumed += size_of(Header);
 
 	if header.magic != .GZIP {
 		return E_GZIP.Invalid_GZIP_Signature;
@@ -163,6 +181,8 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 
 	if .extra in header.flags {
 		xlen, e_extra := compress.read_data(ctx, u16le);
+		input_data_consumed += 2;
+
 		if e_extra != .None {
 			return E_General.Stream_Too_Short;
 		}
@@ -184,6 +204,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 				return E_General.Stream_Too_Short;
 			}
 			xlen -= 2;
+			input_data_consumed += 2;
 
 			field_length, field_error = compress.read_data(ctx, u16le);
 			if field_error != .None {
@@ -191,6 +212,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 				return E_General.Stream_Too_Short;
 			}
 			xlen -= 2;
+			input_data_consumed += 2;
 
 			if xlen <= 0 {
 				// We're not going to try and recover by scanning for a ZLIB header.
@@ -206,6 +228,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 					return E_General.Stream_Too_Short;
 				}
 				xlen -= field_length;
+				input_data_consumed += int(field_length);
 
 				// printf("%v\n", string(field_data));
 			}
@@ -227,6 +250,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 			if name_error != .None {
 				return E_General.Stream_Too_Short;
 			}
+			input_data_consumed += 1;
 			if b[0] == 0 {
 				break;
 			}
@@ -250,6 +274,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 			if comment_error != .None {
 				return E_General.Stream_Too_Short;
 			}
+			input_data_consumed += 1;
 			if b[0] == 0 {
 				break;
 			}
@@ -265,6 +290,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 	if .header_crc in header.flags {
 		crc_error: io.Error;
 		_, crc_error = compress.read_slice(ctx, 2);
+		input_data_consumed += 2;
 		if crc_error != .None {
 			return E_General.Stream_Too_Short;
 		}
@@ -280,7 +306,43 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 	code_buffer := compress.Code_Buffer{};
 	cb := &code_buffer;
 
-	zlib_error := zlib.inflate_raw(ctx, &code_buffer);
+	payload_u32le: u32le;
+
+	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
+
+	if expected_output_size > -1 {
+		/*
+			We already checked that it's not larger than the output buffer max,
+			or GZIP length field's max.
+
+			We'll just pass it on to `zlib.inflate_raw`;
+		*/
+	} else {
+		/*
+			If we know the size of the GZIP file *and* it is fully in memory,
+			then we can peek at the unpacked size at the end.
+
+			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
+
+		*/
+		if ctx.input_fully_in_memory && known_gzip_size > -1 {
+			offset := known_gzip_size - input_data_consumed - 4;
+			if len(ctx.input_data) >= offset + 4 {
+				length_bytes         := ctx.input_data[offset:][:4];
+				payload_u32le         = (^u32le)(&length_bytes[0])^;
+				expected_output_size = int(payload_u32le);
+			}
+		} else {
+			/*
+				TODO(Jeroen): When reading a GZIP from a stream, check if impl_seek is present.
+				If so, we can seek to the end, grab the size from the footer, and seek back to payload start.
+			*/
+		}
+	}
+
+	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
+
+	zlib_error := zlib.inflate_raw(z=ctx, cb=&code_buffer, expected_output_size=expected_output_size);
 	if zlib_error != nil {
 		return zlib_error;
 	}
@@ -300,9 +362,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 		}
 	}
 	payload_crc := transmute(u32le)payload_crc_b;
-
-	payload_len: u32le;
-	payload_len, footer_error = compress.read_data(ctx, u32le);
+	payload_u32le, footer_error = compress.read_data(ctx, u32le);
 
 	payload := bytes.buffer_to_bytes(buf);
 	crc32 := u32le(hash.crc32(payload));
@@ -311,7 +371,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 		return E_GZIP.Payload_CRC_Invalid;
 	}
 
-	if len(payload) != int(payload_len) {
+	if len(payload) != int(payload_u32le) {
 		return E_GZIP.Payload_Length_Invalid;
 	}
 	return nil;
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
index 4d951b2f4..cfbbcd717 100644
--- a/core/compress/zlib/example.odin
+++ b/core/compress/zlib/example.odin
@@ -35,11 +35,13 @@ main :: proc() {
 		171,  15,  18,  59, 138, 112,  63,  23, 205, 110, 254, 136, 109,  78, 231,
 		 63, 234, 138, 133, 204,
 	};
+	OUTPUT_SIZE :: 438;
+
 
 	buf: bytes.Buffer;
 
 	// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
-	err := inflate(ODIN_DEMO, &buf);
+	err := inflate(input=ODIN_DEMO, buf=&buf, expected_output_size=OUTPUT_SIZE);
 	defer bytes.buffer_destroy(&buf);
 
 	if err != nil {
@@ -47,5 +49,5 @@ main :: proc() {
 	}
 	s := bytes.buffer_to_string(&buf);
 	fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s);
-	assert(len(s) == 438);
+	assert(len(s) == OUTPUT_SIZE);
 }
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index ce15ea147..b29e65007 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -16,6 +16,8 @@ import "core:io"
 import "core:bytes"
 import "core:hash"
 
+// import "core:fmt"
+
 // when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
 
 /*
@@ -397,7 +399,7 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 }
 
 @(optimization_mode="speed")
-inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	/*
 		ctx.input must be an io.Stream backed by an implementation that supports:
 		- read
@@ -461,7 +463,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 	}
 
 	// Parse ZLIB stream without header.
-	err = inflate_raw(ctx, cb);
+	err = inflate_raw(z=ctx, cb=cb, expected_output_size=expected_output_size);
 	if err != nil {
 		return err;
 	}
@@ -483,12 +485,29 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 }
 
 @(optimization_mode="speed")
-inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
-	final := u32(0);
-	type := u32(0);
 
-	cb.num_bits = 0;
+	buf := (^bytes.Buffer)(z.output.stream_data);
+	z.output_buf = &buf.buf;
+
+	// fmt.printf("ZLIB: Expected Payload Size: %v\n", expected_output_size);
+
+	if expected_output_size > -1 && expected_output_size <= compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
+		reserve(z.output_buf, expected_output_size);
+		// resize (z.output_buf, expected_output_size);
+	} else {
+		reserve(z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
+	}
+
+	// reserve(&z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
+	// resize (&z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
+	// fmt.printf("ZLIB: buf: %v\n", buf);
+	// fmt.printf("ZLIB: output_buf: %v\n", z.output_buf);
+	// fmt.printf("ZLIB: z.output: %v\n", z.output);
+
+
+	cb.num_bits    = 0;
 	cb.code_buffer = 0;
 
 	z_repeat:      ^Huffman_Table;
@@ -519,6 +538,10 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 	cb.last = mem.make_dynamic_array_len_cap([dynamic]u8, cb.window_mask + 1, cb.window_mask + 1, allocator);
 	defer delete(cb.last);
 
+
+	final := u32(0);
+	type  := u32(0);
+
 	for {
 		final = compress.read_bits_lsb(z, cb, 1);
 		type  = compress.read_bits_lsb(z, cb, 2);
@@ -659,10 +682,15 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 		}
 	}
 
+	// fmt.printf("ZLIB: Bytes written: %v\n", z.bytes_written);
+	if int(z.bytes_written) != len(buf.buf) {
+		resize(&buf.buf, int(z.bytes_written));
+	}
+
 	return nil;
 }
 
-inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
+inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
 	ctx := Context{};
 
 	r := bytes.Reader{};
@@ -673,15 +701,15 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -
 	ctx.input_fully_in_memory = true;
 
 	buf := buf;
-	ws := bytes.buffer_to_stream(buf);
+	ws  := bytes.buffer_to_stream(buf);
 	ctx.output = ws;
 
-	err = inflate_from_stream(&ctx, raw);
+	err = inflate_from_stream(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
 
 	return err;
 }
 
-inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false) -> (err: Error) {
+inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
 	ctx := Context{};
 
 	r := bytes.Reader{};
@@ -692,10 +720,10 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_B
 	ctx.input_fully_in_memory = true;
 
 	buf := buf;
-	ws := bytes.buffer_to_stream(buf);
+	ws  := bytes.buffer_to_stream(buf);
 	ctx.output = ws;
 
-	return inflate_from_stream_raw(&ctx, cb);
+	return inflate_from_stream_raw(z=&ctx, cb=cb, expected_output_size=expected_output_size);
 }
 
 inflate     :: proc{inflate_from_stream, inflate_from_byte_array};

From 65b78b1aa9e8b696007a6a35adb4115c45d5259b Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sat, 26 Jun 2021 15:11:15 +0200
Subject: [PATCH 2/9] So far, so good.

---
 core/compress/common.odin    |   7 ++
 core/compress/zlib/zlib.odin | 161 ++++++++++++++++++++++++++++-------
 2 files changed, 138 insertions(+), 30 deletions(-)

diff --git a/core/compress/common.odin b/core/compress/common.odin
index a6ae230e5..e21339839 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -69,6 +69,13 @@ General_Error :: enum {
 	Checksum_Failed,
 	Incompatible_Options,
 	Unimplemented,
+
+
+	/*
+		Memory errors
+	*/
+	Allocation_Failed,
+	Resize_Failed,
 }
 
 GZIP_Error :: enum {
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index b29e65007..25911ba88 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -142,43 +142,121 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
 	return;
 }
 
+
+@(optimization_mode="speed")
+grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
+	/*
+		That we get here at all means that we didn't pass an expected output size,
+		or that it was too little.
+	*/
+
+	/*
+		Double until we reach the maximum allowed.
+	*/
+	new_size := min(len(buf) << 1, compress.COMPRESS_OUTPUT_ALLOCATE_MAX);
+
+	resize(buf, new_size);
+	if len(buf) != new_size {
+		/*
+			Resize failed.
+		*/
+		return .Resize_Failed;
+	}
+
+	return nil;
+}
+
+/*
+	TODO: Make these return compress.Error.
+*/
+
 @(optimization_mode="speed")
 write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: io.Error) #no_bounds_check {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); }
 	c := c;
-	buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
-	when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
 
-	_, e := z.output->impl_write(buf);
-	if e != .None {
-		return e;
+	if !z.output_to_stream {
+		/*
+			Resize if needed.
+		*/
+		if int(z.bytes_written) + 1 >= len(z.output_buf) {
+			grow_buffer(z.output_buf);
+		}
+
+		#no_bounds_check {
+			z.output_buf[z.bytes_written] = c;
+			cb.last[z.bytes_written & cb.window_mask] = c;
+		}
+		z.bytes_written += 1;
+
+		when INLINE_ADLER {
+			buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
+			z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+		}
+	} else {
+		buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
+		when INLINE_ADLER {
+			z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+		}
+
+		_, e := z.output->impl_write(buf);
+		if e != .None {
+			return e;
+		}
+
+		#no_bounds_check cb.last[z.bytes_written & cb.window_mask] = c;
+		z.bytes_written += 1;
 	}
-	cb.last[z.bytes_written & cb.window_mask] = c;
 
-	z.bytes_written += 1;
 	return .None;
 }
 
 @(optimization_mode="speed")
-repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) {
+repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); }
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		the output stream, just give it _that_ slice.
 	*/
-	buf := make([]u8, count, context.temp_allocator);
-	#no_bounds_check for i in 0..<count {
-		buf[i] = c;
-		cb.last[z.bytes_written & cb.window_mask] = c;
-		z.bytes_written += 1;
-	}
-	when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
 
-	_, e := z.output->impl_write(buf);
-	if e != .None {
-		return e;
+	if !z.output_to_stream {
+		/*
+			Resize if needed.
+		*/
+		if int(z.bytes_written) + int(count) >= len(z.output_buf) {
+			grow_buffer(z.output_buf);
+		}
+
+		#no_bounds_check {
+			for _ in 0..<count {
+				z.output_buf[z.bytes_written] = c;
+				cb.last[z.bytes_written & cb.window_mask] = c;
+				z.bytes_written += 1;
+			}
+		}
+
+		when INLINE_ADLER {
+			/* TODO(Jeroen): Test */
+			buf := z.output_buf[bytes_written:][:count];
+			z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+		}
+
+	} else {
+		buf := make([]u8, count, context.temp_allocator);
+		#no_bounds_check for i in 0..<count {
+			buf[i] = c;
+			cb.last[z.bytes_written & cb.window_mask] = c;
+			z.bytes_written += 1;
+		}
+		when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
+
+		_, e := z.output->impl_write(buf);
+		if e != .None {
+			return e;
+		}		
 	}
+
 	return .None;
 }
 
@@ -190,22 +268,45 @@ repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) ->
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		the output stream, just give it _that_ slice.
 	*/
-	buf := make([]u8, count, context.temp_allocator);
 
 	offset := z.bytes_written - i64(distance);
-	#no_bounds_check for i in 0..<count {
-		c := cb.last[offset & cb.window_mask];
 
-		cb.last[z.bytes_written & cb.window_mask] = c;
-		buf[i] = c;
-		z.bytes_written += 1; offset += 1;
-	}
-	when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
+	if !z.output_to_stream {
+		if int(z.bytes_written) + int(count) >= len(z.output_buf) {
+			grow_buffer(z.output_buf);
+		}
 
-	_, e := z.output->impl_write(buf);
-	if e != .None {
-		return e;
+		#no_bounds_check {
+			for _ in 0..<count {
+				c := cb.last[offset & cb.window_mask];
+				z.output_buf[z.bytes_written] = c;
+				cb.last[z.bytes_written & cb.window_mask] = c;
+				z.bytes_written += 1; offset += 1;
+			}
+		}
+
+		when INLINE_ADLER {
+			/* TODO(Jeroen): Test */
+			buf := z.output_buf[bytes_written:][:count];
+			z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+		}
+	} else {
+		buf := make([]u8, count, context.temp_allocator);
+		#no_bounds_check for i in 0..<count {
+			c := cb.last[offset & cb.window_mask];
+
+			cb.last[z.bytes_written & cb.window_mask] = c;
+			buf[i] = c;
+			z.bytes_written += 1; offset += 1;
+		}
+		when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
+
+		_, e := z.output->impl_write(buf);
+		if e != .None {
+			return e;
+		}
 	}
+
 	return .None;
 }
 
@@ -495,7 +596,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 
 	if expected_output_size > -1 && expected_output_size <= compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
 		reserve(z.output_buf, expected_output_size);
-		// resize (z.output_buf, expected_output_size);
+		resize (z.output_buf, expected_output_size);
 	} else {
 		reserve(z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
 	}

From 8ba1c9a6cd1f5f05d6b40b68576858c50ccb9a7b Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sat, 26 Jun 2021 17:44:12 +0200
Subject: [PATCH 3/9] ZLIB: Remove superfluous code.

---
 core/compress/common.odin       |  20 +---
 core/compress/gzip/gzip.odin    |   9 +-
 core/compress/zlib/example.odin |   2 +
 core/compress/zlib/zlib.odin    | 188 ++++++++++----------------------
 4 files changed, 69 insertions(+), 150 deletions(-)

diff --git a/core/compress/common.odin b/core/compress/common.odin
index e21339839..2b7c1afe1 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -10,6 +10,7 @@ package compress
 
 import "core:io"
 import "core:image"
+import "core:bytes"
 
 /*
 	These settings bound how much compression algorithms will allocate for their output buffer.
@@ -44,8 +45,6 @@ when size_of(uintptr) == 8 {
 }
 
 
-// when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
-
 Error :: union {
 	General_Error,
 	Deflate_Error,
@@ -132,8 +131,7 @@ Context :: struct #packed {
 	input:             io.Stream,
 	input_data:        []u8,
 
-	output:            io.Stream,
-	output_buf:        ^[dynamic]u8,
+	output:            ^bytes.Buffer,
 	bytes_written:     i64,
 
 	/*
@@ -159,6 +157,10 @@ Context :: struct #packed {
 	input_refills_from_stream: b8,
 	output_to_stream: b8,
 	reserved_flag: b8,
+
+	bit_buffer_stuff: [3]u64,
+
+
 }
 // #assert(size_of(Context) == 128);
 
@@ -188,8 +190,6 @@ Code_Buffer :: struct #packed {
 
 @(optimization_mode="speed")
 read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Slice"); }
-
 	#no_bounds_check {
 		if len(z.input_data) >= size {
 			res = z.input_data[:size];
@@ -220,8 +220,6 @@ read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.
 
 @(optimization_mode="speed")
 read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); }
-
 	b, e := read_slice(z, size_of(T));
 	if e == .None {
 		return (^T)(&b[0])^, .None;
@@ -232,8 +230,6 @@ read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 
 @(optimization_mode="speed")
 read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read u8"); }
-
 	#no_bounds_check {
 		if len(z.input_data) >= 1 {
 			res = z.input_data[0];
@@ -252,8 +248,6 @@ read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 
 @(optimization_mode="speed")
 peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Peek Data"); }
-
 	size :: size_of(T);
 
 	#no_bounds_check {
@@ -304,8 +298,6 @@ peek_back_byte :: #force_inline proc(cb: ^Code_Buffer, offset: i64) -> (res: u8,
 // Generalized bit reader LSB
 @(optimization_mode="speed")
 refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); }
-
 	refill := u64(width);
 
 	for {
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index 4d185fa6e..3d3c28447 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -110,10 +110,11 @@ load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1,
 	stream := bytes.reader_to_stream(&r);
 
 	ctx := &compress.Context{
-		input  = stream,
+		input = stream,
 		input_data = slice,
 		input_fully_in_memory = true,
 		input_refills_from_stream = true,
+		output = buf,
 	};
 
 	err = load_from_stream(ctx, buf, known_gzip_size, expected_output_size, allocator);
@@ -138,8 +139,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 	input_data_consumed := 0;
 
-	ws := bytes.buffer_to_stream(buf);
-	ctx.output = ws;
+	ctx.output = buf;
 
 	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
 		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
@@ -365,6 +365,9 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 	payload_u32le, footer_error = compress.read_data(ctx, u32le);
 
 	payload := bytes.buffer_to_bytes(buf);
+
+	// fmt.printf("GZIP payload: %v\n", string(payload));
+
 	crc32 := u32le(hash.crc32(payload));
 
 	if crc32 != payload_crc {
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
index cfbbcd717..2aabd7b6c 100644
--- a/core/compress/zlib/example.odin
+++ b/core/compress/zlib/example.odin
@@ -13,6 +13,7 @@ package zlib
 
 import "core:bytes"
 import "core:fmt"
+import "core:compress"
 
 main :: proc() {
 
@@ -37,6 +38,7 @@ main :: proc() {
 	};
 	OUTPUT_SIZE :: 438;
 
+	fmt.printf("size_of(Context): %v\n", size_of(compress.Context));
 
 	buf: bytes.Buffer;
 
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index 25911ba88..032c9c8e0 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -13,13 +13,10 @@ import "core:compress"
 
 import "core:mem"
 import "core:io"
-import "core:bytes"
 import "core:hash"
-
+import "core:bytes"
 // import "core:fmt"
 
-// when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
-
 /*
 	zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
 	Returns: Error.
@@ -33,8 +30,6 @@ import "core:hash"
 	`Context.rolling_hash` if not inlining it is still faster.
 
 */
-INLINE_ADLER :: false;
-
 Context     :: compress.Context;
 Code_Buffer :: compress.Code_Buffer;
 
@@ -172,89 +167,48 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
 
 @(optimization_mode="speed")
 write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: io.Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); }
-	c := c;
-
-	if !z.output_to_stream {
-		/*
-			Resize if needed.
-		*/
-		if int(z.bytes_written) + 1 >= len(z.output_buf) {
-			grow_buffer(z.output_buf);
+	/*
+		Resize if needed.
+	*/
+	if int(z.bytes_written) + 1 >= len(z.output.buf) {
+		e := grow_buffer(&z.output.buf);
+		if e != nil {
+			return .Short_Write;
 		}
-
-		#no_bounds_check {
-			z.output_buf[z.bytes_written] = c;
-			cb.last[z.bytes_written & cb.window_mask] = c;
-		}
-		z.bytes_written += 1;
-
-		when INLINE_ADLER {
-			buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
-			z.rolling_hash = hash.adler32(buf, z.rolling_hash);
-		}
-	} else {
-		buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
-		when INLINE_ADLER {
-			z.rolling_hash = hash.adler32(buf, z.rolling_hash);
-		}
-
-		_, e := z.output->impl_write(buf);
-		if e != .None {
-			return e;
-		}
-
-		#no_bounds_check cb.last[z.bytes_written & cb.window_mask] = c;
-		z.bytes_written += 1;
 	}
 
+	#no_bounds_check {
+		z.output.buf[z.bytes_written] = c;
+		cb.last[z.bytes_written & cb.window_mask] = c;
+	}
+	z.bytes_written += 1;
 	return .None;
 }
 
 @(optimization_mode="speed")
 repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); }
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		the output stream, just give it _that_ slice.
 	*/
 
-	if !z.output_to_stream {
-		/*
-			Resize if needed.
-		*/
-		if int(z.bytes_written) + int(count) >= len(z.output_buf) {
-			grow_buffer(z.output_buf);
+	/*
+	Resize if needed.
+	*/
+	if int(z.bytes_written) + int(count) >= len(z.output.buf) {
+		e := grow_buffer(&z.output.buf);
+		if e != nil {
+			return .Short_Write;
 		}
+	}
 
-		#no_bounds_check {
-			for _ in 0..<count {
-				z.output_buf[z.bytes_written] = c;
-				cb.last[z.bytes_written & cb.window_mask] = c;
-				z.bytes_written += 1;
-			}
-		}
-
-		when INLINE_ADLER {
-			/* TODO(Jeroen): Test */
-			buf := z.output_buf[bytes_written:][:count];
-			z.rolling_hash = hash.adler32(buf, z.rolling_hash);
-		}
-
-	} else {
-		buf := make([]u8, count, context.temp_allocator);
-		#no_bounds_check for i in 0..<count {
-			buf[i] = c;
+	#no_bounds_check {
+		for _ in 0..<count {
+			z.output.buf[z.bytes_written] = c;
 			cb.last[z.bytes_written & cb.window_mask] = c;
 			z.bytes_written += 1;
 		}
-		when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
-
-		_, e := z.output->impl_write(buf);
-		if e != .None {
-			return e;
-		}		
 	}
 
 	return .None;
@@ -262,7 +216,6 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.
 
 @(optimization_mode="speed")
 repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) -> (err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Bytes"); }
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
@@ -271,40 +224,20 @@ repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) ->
 
 	offset := z.bytes_written - i64(distance);
 
-	if !z.output_to_stream {
-		if int(z.bytes_written) + int(count) >= len(z.output_buf) {
-			grow_buffer(z.output_buf);
+	if int(z.bytes_written) + int(count) >= len(z.output.buf) {
+		e := grow_buffer(&z.output.buf);
+		if e != nil {
+			return .Short_Write;
 		}
+	}
 
-		#no_bounds_check {
-			for _ in 0..<count {
-				c := cb.last[offset & cb.window_mask];
-				z.output_buf[z.bytes_written] = c;
-				cb.last[z.bytes_written & cb.window_mask] = c;
-				z.bytes_written += 1; offset += 1;
-			}
-		}
-
-		when INLINE_ADLER {
-			/* TODO(Jeroen): Test */
-			buf := z.output_buf[bytes_written:][:count];
-			z.rolling_hash = hash.adler32(buf, z.rolling_hash);
-		}
-	} else {
-		buf := make([]u8, count, context.temp_allocator);
-		#no_bounds_check for i in 0..<count {
+	#no_bounds_check {
+		for _ in 0..<count {
 			c := cb.last[offset & cb.window_mask];
-
+			z.output.buf[z.bytes_written] = c;
 			cb.last[z.bytes_written & cb.window_mask] = c;
-			buf[i] = c;
 			z.bytes_written += 1; offset += 1;
 		}
-		when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
-
-		_, e := z.output->impl_write(buf);
-		if e != .None {
-			return e;
-		}
 	}
 
 	return .None;
@@ -317,7 +250,6 @@ allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_T
 
 @(optimization_mode="speed")
 build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Build Huffman Table"); }
 	sizes:     [HUFFMAN_MAX_BITS+1]int;
 	next_code: [HUFFMAN_MAX_BITS]int;
 
@@ -377,7 +309,6 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 
 @(optimization_mode="speed")
 decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman Slow"); }
 	code := u16(compress.peek_bits_lsb(z, cb, 16));
 
 	k := int(z_bit_reverse(code, 16));
@@ -409,7 +340,6 @@ decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table
 
 @(optimization_mode="speed")
 decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman"); }
 	if cb.num_bits < 16 {
 		if cb.num_bits > 63 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
@@ -430,7 +360,6 @@ decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r:
 
 @(optimization_mode="speed")
 parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Parse Huffman Block"); }
 	#no_bounds_check for {
 		value, e := decode_huffman(z, cb, z_repeat);
 		if e != nil {
@@ -573,10 +502,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 		compress.discard_to_next_byte_lsb(cb);
 		adler32 := compress.read_bits_lsb(ctx, cb, 8) << 24 | compress.read_bits_lsb(ctx, cb, 8) << 16 | compress.read_bits_lsb(ctx, cb, 8) << 8 | compress.read_bits_lsb(ctx, cb, 8);
 
-		when !INLINE_ADLER {
-			buf := (^bytes.Buffer)(ctx.output.stream_data).buf[:];
-			ctx.rolling_hash = hash.adler32(buf);
-		}
+		ctx.rolling_hash = hash.adler32(ctx.output.buf[:]);
 
 		if ctx.rolling_hash != u32(adler32) {
 			return E_General.Checksum_Failed;
@@ -587,26 +513,28 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 
 @(optimization_mode="speed")
 inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
+	expected_output_size := expected_output_size;
 
-	buf := (^bytes.Buffer)(z.output.stream_data);
-	z.output_buf = &buf.buf;
-
-	// fmt.printf("ZLIB: Expected Payload Size: %v\n", expected_output_size);
-
-	if expected_output_size > -1 && expected_output_size <= compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
-		reserve(z.output_buf, expected_output_size);
-		resize (z.output_buf, expected_output_size);
-	} else {
-		reserve(z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
+	if expected_output_size <= 0 {
+		/*
+			Always set up a minimum allocation size.
+		*/
+		expected_output_size = compress.COMPRESS_OUTPUT_ALLOCATE_MIN;
 	}
 
-	// reserve(&z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
-	// resize (&z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
-	// fmt.printf("ZLIB: buf: %v\n", buf);
-	// fmt.printf("ZLIB: output_buf: %v\n", z.output_buf);
-	// fmt.printf("ZLIB: z.output: %v\n", z.output);
+	// fmt.printf("\nZLIB: Expected Payload Size: %v\n\n", expected_output_size);
 
+	if expected_output_size > 0 && expected_output_size <= compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
+		/*
+			Try to pre-allocate the output buffer.
+		*/
+		reserve(&z.output.buf, expected_output_size);
+		resize (&z.output.buf, expected_output_size);
+	};
+
+	if len(z.output.buf) != expected_output_size {
+		return .Resize_Failed;
+	}
 
 	cb.num_bits    = 0;
 	cb.code_buffer = 0;
@@ -651,7 +579,6 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 
 		switch type {
 		case 0:
-			when #config(TRACY_ENABLE, false) { tracy.ZoneN("Literal Block"); }
 			// Uncompressed block
 
 			// Discard bits until next byte boundary
@@ -680,7 +607,6 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 		case 3:
 			return E_Deflate.BType_3;
 		case:
-			when #config(TRACY_ENABLE, false) { tracy.ZoneN("Huffman Block"); }
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			if type == 1 {
 				// Use fixed code lengths.
@@ -784,8 +710,8 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 	}
 
 	// fmt.printf("ZLIB: Bytes written: %v\n", z.bytes_written);
-	if int(z.bytes_written) != len(buf.buf) {
-		resize(&buf.buf, int(z.bytes_written));
+	if int(z.bytes_written) != len(z.output.buf) {
+		resize(&z.output.buf, int(z.bytes_written));
 	}
 
 	return nil;
@@ -801,9 +727,7 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, e
 	ctx.input_data = input;
 	ctx.input_fully_in_memory = true;
 
-	buf := buf;
-	ws  := bytes.buffer_to_stream(buf);
-	ctx.output = ws;
+	ctx.output = buf;
 
 	err = inflate_from_stream(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
 
@@ -820,9 +744,7 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_B
 	ctx.input_data = input;
 	ctx.input_fully_in_memory = true;
 
-	buf := buf;
-	ws  := bytes.buffer_to_stream(buf);
-	ctx.output = ws;
+	ctx.output = buf;
 
 	return inflate_from_stream_raw(z=&ctx, cb=cb, expected_output_size=expected_output_size);
 }

From 30a580846038ae0e5a314102a1134688b15adfb8 Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sat, 26 Jun 2021 20:40:39 +0200
Subject: [PATCH 4/9] ZLIB: Moar faster.

---
 core/compress/common.odin    |  94 +++++++++----------------
 core/compress/gzip/gzip.odin |  41 +++++------
 core/compress/zlib/zlib.odin | 133 +++++++++++++++--------------------
 3 files changed, 109 insertions(+), 159 deletions(-)

diff --git a/core/compress/common.odin b/core/compress/common.odin
index 2b7c1afe1..47b80a995 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -127,10 +127,9 @@ Deflate_Error :: enum {
 
 
 // General I/O context for ZLIB, LZW, etc.
-Context :: struct #packed {
-	input:             io.Stream,
+Context :: struct {
 	input_data:        []u8,
-
+	input:             io.Stream,
 	output:            ^bytes.Buffer,
 	bytes_written:     i64,
 
@@ -140,14 +139,9 @@ Context :: struct #packed {
 	size_packed:   i64,
 	size_unpacked: i64,
 
-	/*
-		Used to update hash as we write instead of all at once.
-	*/
-	rolling_hash:  u32,
-	/*
-		Reserved
-	*/
-	reserved:      [2]u32,
+	code_buffer: u64,
+	num_bits:    u64,
+
 	/*
 		Flags:
 			`input_fully_in_memory` tells us whether we're EOF when `input_data` is empty.
@@ -155,28 +149,8 @@ Context :: struct #packed {
 	*/
 	input_fully_in_memory: b8,
 	input_refills_from_stream: b8,
-	output_to_stream: b8,
-	reserved_flag: b8,
-
-	bit_buffer_stuff: [3]u64,
-
-
 }
-// #assert(size_of(Context) == 128);
 
-/*
-	Compression algorithm context
-*/
-Code_Buffer :: struct #packed {
-	code_buffer: u64,
-	num_bits:    u64,
-	/*
-		Sliding window buffer. Size must be a power of two.
-	*/
-	window_mask: i64,
-	last:        [dynamic]u8,
-}
-#assert(size_of(Code_Buffer) == 64);
 
 // Stream helpers
 /*
@@ -290,26 +264,26 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 
 // Sliding window read back
 @(optimization_mode="speed")
-peek_back_byte :: #force_inline proc(cb: ^Code_Buffer, offset: i64) -> (res: u8, err: io.Error) {
+peek_back_byte :: #force_inline proc(z: ^Context, offset: i64) -> (res: u8, err: io.Error) {
 	// Look back into the sliding window.
-	return cb.last[offset & cb.window_mask], .None;
+	return z.output.buf[z.bytes_written - offset], .None;
 }
 
 // Generalized bit reader LSB
 @(optimization_mode="speed")
-refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
+refill_lsb :: proc(z: ^Context, width := i8(24)) {
 	refill := u64(width);
 
 	for {
-		if cb.num_bits > refill {
+		if z.num_bits > refill {
 			break;
 		}
-		if cb.code_buffer == 0 && cb.num_bits > 63 {
-			cb.num_bits = 0;
+		if z.code_buffer == 0 && z.num_bits > 63 {
+			z.num_bits = 0;
 		}
-		if cb.code_buffer >= 1 << uint(cb.num_bits) {
+		if z.code_buffer >= 1 << uint(z.num_bits) {
 			// Code buffer is malformed.
-			cb.num_bits = max(u64);
+			z.num_bits = max(u64);
 			return;
 		}
 		b, err := read_u8(z);
@@ -317,48 +291,48 @@ refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
 			// This is fine at the end of the file.
 			return;
 		}
-		cb.code_buffer |= (u64(b) << u8(cb.num_bits));
-		cb.num_bits += 8;
+		z.code_buffer |= (u64(b) << u8(z.num_bits));
+		z.num_bits += 8;
 	}
 }
 
 @(optimization_mode="speed")
-consume_bits_lsb :: #force_inline proc(cb: ^Code_Buffer, width: u8) {
-	cb.code_buffer >>= width;
-	cb.num_bits -= u64(width);
+consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= u64(width);
 }
 
 @(optimization_mode="speed")
-peek_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	if cb.num_bits < u64(width) {
-		refill_lsb(z, cb);
+peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	if z.num_bits < u64(width) {
+		refill_lsb(z);
 	}
 	// assert(z.num_bits >= i8(width));
-	return u32(cb.code_buffer & ~(~u64(0) << width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 
 @(optimization_mode="speed")
-peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	assert(cb.num_bits >= u64(width));
-	return u32(cb.code_buffer & ~(~u64(0) << width));
+peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	assert(z.num_bits >= u64(width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 
 @(optimization_mode="speed")
-read_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	k := peek_bits_lsb(z, cb, width);
-	consume_bits_lsb(cb, width);
+read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_lsb(z, width);
+	consume_bits_lsb(z, width);
 	return k;
 }
 
 @(optimization_mode="speed")
-read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	k := peek_bits_no_refill_lsb(z, cb, width);
-	consume_bits_lsb(cb, width);
+read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, width);
+	consume_bits_lsb(z, width);
 	return k;
 }
 
 @(optimization_mode="speed")
-discard_to_next_byte_lsb :: proc(cb: ^Code_Buffer) {
-	discard := u8(cb.num_bits & 7);
-	consume_bits_lsb(cb, discard);
+discard_to_next_byte_lsb :: proc(z: ^Context) {
+	discard := u8(z.num_bits & 7);
+	consume_bits_lsb(z, discard);
 }
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index 3d3c28447..38cb77b20 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -133,13 +133,13 @@ load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_siz
 	return;
 }
 
-load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	buf := buf;
 	expected_output_size := expected_output_size;
 
 	input_data_consumed := 0;
 
-	ctx.output = buf;
+	z.output = buf;
 
 	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
 		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
@@ -151,7 +151,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 	b: []u8;
 
-	header, e := compress.read_data(ctx, Header);
+	header, e := compress.read_data(z, Header);
 	if e != .None {
 		return E_General.File_Too_Short;
 	}
@@ -180,7 +180,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 	// printf("os: %v\n", OS_Name[header.os]);
 
 	if .extra in header.flags {
-		xlen, e_extra := compress.read_data(ctx, u16le);
+		xlen, e_extra := compress.read_data(z, u16le);
 		input_data_consumed += 2;
 
 		if e_extra != .None {
@@ -198,7 +198,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 		for xlen >= 4 {
 			// println("Parsing Extra field(s).");
-			field_id, field_error = compress.read_data(ctx, [2]u8);
+			field_id, field_error = compress.read_data(z, [2]u8);
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
@@ -206,7 +206,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 			xlen -= 2;
 			input_data_consumed += 2;
 
-			field_length, field_error = compress.read_data(ctx, u16le);
+			field_length, field_error = compress.read_data(z, u16le);
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
@@ -222,7 +222,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
 			if field_length > 0 {
-				b, field_error = compress.read_slice(ctx, int(field_length));
+				b, field_error = compress.read_slice(z, int(field_length));
 				if field_error != .None {
 					// printf("Parsing Extra returned: %v\n", field_error);
 					return E_General.Stream_Too_Short;
@@ -246,7 +246,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 		name_error: io.Error;
 
 		for i < len(name) {
-			b, name_error = compress.read_slice(ctx, 1);
+			b, name_error = compress.read_slice(z, 1);
 			if name_error != .None {
 				return E_General.Stream_Too_Short;
 			}
@@ -270,7 +270,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 		comment_error: io.Error;
 
 		for i < len(comment) {
-			b, comment_error = compress.read_slice(ctx, 1);
+			b, comment_error = compress.read_slice(z, 1);
 			if comment_error != .None {
 				return E_General.Stream_Too_Short;
 			}
@@ -289,7 +289,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 	if .header_crc in header.flags {
 		crc_error: io.Error;
-		_, crc_error = compress.read_slice(ctx, 2);
+		_, crc_error = compress.read_slice(z, 2);
 		input_data_consumed += 2;
 		if crc_error != .None {
 			return E_General.Stream_Too_Short;
@@ -303,9 +303,6 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 	/*
 		We should have arrived at the ZLIB payload.
 	*/
-	code_buffer := compress.Code_Buffer{};
-	cb := &code_buffer;
-
 	payload_u32le: u32le;
 
 	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
@@ -325,10 +322,10 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
 
 		*/
-		if ctx.input_fully_in_memory && known_gzip_size > -1 {
+		if z.input_fully_in_memory && known_gzip_size > -1 {
 			offset := known_gzip_size - input_data_consumed - 4;
-			if len(ctx.input_data) >= offset + 4 {
-				length_bytes         := ctx.input_data[offset:][:4];
+			if len(z.input_data) >= offset + 4 {
+				length_bytes         := z.input_data[offset:][:4];
 				payload_u32le         = (^u32le)(&length_bytes[0])^;
 				expected_output_size = int(payload_u32le);
 			}
@@ -342,27 +339,27 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_
 
 	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
 
-	zlib_error := zlib.inflate_raw(z=ctx, cb=&code_buffer, expected_output_size=expected_output_size);
+	zlib_error := zlib.inflate_raw(z=z, expected_output_size=expected_output_size);
 	if zlib_error != nil {
 		return zlib_error;
 	}
 	/*
 		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
 	*/
-	compress.discard_to_next_byte_lsb(cb);
+	compress.discard_to_next_byte_lsb(z);
 
 	footer_error: io.Error;
 
 	payload_crc_b: [4]u8;
 	for _, i in payload_crc_b {
-		if cb.num_bits >= 8 {
-			payload_crc_b[i] = u8(compress.read_bits_lsb(ctx, cb, 8));
+		if z.num_bits >= 8 {
+			payload_crc_b[i] = u8(compress.read_bits_lsb(z, 8));
 		} else {
-			payload_crc_b[i], footer_error = compress.read_u8(ctx);
+			payload_crc_b[i], footer_error = compress.read_u8(z);
 		}
 	}
 	payload_crc := transmute(u32le)payload_crc_b;
-	payload_u32le, footer_error = compress.read_data(ctx, u32le);
+	payload_u32le, footer_error = compress.read_data(z, u32le);
 
 	payload := bytes.buffer_to_bytes(buf);
 
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index 032c9c8e0..aaa549e7b 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -30,8 +30,7 @@ import "core:bytes"
 	`Context.rolling_hash` if not inlining it is still faster.
 
 */
-Context     :: compress.Context;
-Code_Buffer :: compress.Code_Buffer;
+Context :: compress.Context;
 
 Compression_Method :: enum u8 {
 	DEFLATE  = 8,
@@ -166,7 +165,7 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
 */
 
 @(optimization_mode="speed")
-write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: io.Error) #no_bounds_check {
+write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
 	/*
 		Resize if needed.
 	*/
@@ -179,14 +178,13 @@ write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err:
 
 	#no_bounds_check {
 		z.output.buf[z.bytes_written] = c;
-		cb.last[z.bytes_written & cb.window_mask] = c;
 	}
 	z.bytes_written += 1;
 	return .None;
 }
 
 @(optimization_mode="speed")
-repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
+repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
@@ -206,7 +204,6 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.
 	#no_bounds_check {
 		for _ in 0..<count {
 			z.output.buf[z.bytes_written] = c;
-			cb.last[z.bytes_written & cb.window_mask] = c;
 			z.bytes_written += 1;
 		}
 	}
@@ -215,14 +212,14 @@ repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.
 }
 
 @(optimization_mode="speed")
-repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) -> (err: io.Error) {
+repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		the output stream, just give it _that_ slice.
 	*/
 
-	offset := z.bytes_written - i64(distance);
+	offset := i64(distance);
 
 	if int(z.bytes_written) + int(count) >= len(z.output.buf) {
 		e := grow_buffer(&z.output.buf);
@@ -233,10 +230,9 @@ repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) ->
 
 	#no_bounds_check {
 		for _ in 0..<count {
-			c := cb.last[offset & cb.window_mask];
+			c := z.output.buf[z.bytes_written - offset];
 			z.output.buf[z.bytes_written] = c;
-			cb.last[z.bytes_written & cb.window_mask] = c;
-			z.bytes_written += 1; offset += 1;
+			z.bytes_written += 1;
 		}
 	}
 
@@ -308,8 +304,8 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 }
 
 @(optimization_mode="speed")
-decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	code := u16(compress.peek_bits_lsb(z, cb, 16));
+decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	code := u16(compress.peek_bits_lsb(z,16));
 
 	k := int(z_bit_reverse(code, 16));
 	s: u8;
@@ -332,41 +328,41 @@ decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table
 		return 0, E_Deflate.Bad_Huffman_Code;
 	}
 
-	compress.consume_bits_lsb(cb, s);
+	compress.consume_bits_lsb(z, s);
 
 	r = t.value[b];
 	return r, nil;
 }
 
 @(optimization_mode="speed")
-decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	if cb.num_bits < 16 {
-		if cb.num_bits > 63 {
+decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	if z.num_bits < 16 {
+		if z.num_bits > 63 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
 		}
-		compress.refill_lsb(z, cb);
-		if cb.num_bits > 63 {
+		compress.refill_lsb(z);
+		if z.num_bits > 63 {
 			return 0, E_General.Stream_Too_Short;
 		}
 	}
-	#no_bounds_check b := t.fast[cb.code_buffer & ZFAST_MASK];
+	#no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK];
 	if b != 0 {
 		s := u8(b >> ZFAST_BITS);
-		compress.consume_bits_lsb(cb, s);
+		compress.consume_bits_lsb(z, s);
 		return b & 511, nil;
 	}
-	return decode_huffman_slowpath(z, cb, t);
+	return decode_huffman_slowpath(z, t);
 }
 
 @(optimization_mode="speed")
-parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
 	#no_bounds_check for {
-		value, e := decode_huffman(z, cb, z_repeat);
+		value, e := decode_huffman(z, z_repeat);
 		if e != nil {
 			return err;
 		}
 		if value < 256 {
-			e := write_byte(z, cb, u8(value));
+			e := write_byte(z, u8(value));
 			if e != .None {
 				return E_General.Output_Too_Short;
 			}
@@ -379,17 +375,17 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 			value -= 257;
 			length := Z_LENGTH_BASE[value];
 			if Z_LENGTH_EXTRA[value] > 0 {
-				length += u16(compress.read_bits_lsb(z, cb, Z_LENGTH_EXTRA[value]));
+				length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value]));
 			}
 
-			value, e = decode_huffman(z, cb, z_offset);
+			value, e = decode_huffman(z, z_offset);
 			if e != nil {
 				return E_Deflate.Bad_Huffman_Code;
 			}
 
 			distance := Z_DIST_BASE[value];
 			if Z_DIST_EXTRA[value] > 0 {
-				distance += u16(compress.read_bits_lsb(z, cb, Z_DIST_EXTRA[value]));
+				distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value]));
 			}
 
 			if z.bytes_written < i64(distance) {
@@ -397,7 +393,6 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 				return E_Deflate.Bad_Distance;
 			}
 
-			offset := i64(z.bytes_written - i64(distance));
 			/*
 				These might be sped up with a repl_byte call that copies
 				from the already written output more directly, and that
@@ -410,15 +405,15 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 					Replicate the last outputted byte, length times.
 				*/
 				if length > 0 {
-					c := cb.last[offset & cb.window_mask];
-					e := repl_byte(z, cb, length, c);
+					c := z.output.buf[z.bytes_written - i64(distance)];
+					e := repl_byte(z, length, c);
 					if e != .None {
 						return E_General.Output_Too_Short;
 					}
 				}
 			} else {
 				if length > 0 {
-					e := repl_bytes(z, cb, length, distance);
+					e := repl_bytes(z, length, distance);
 					if e != .None {
 						return E_General.Output_Too_Short;
 					}
@@ -442,9 +437,6 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 		DEFLATE stream.
 	*/
 
-	code_buffer := Code_Buffer{};
-	cb := &code_buffer;
-
 	if !raw {
 		data_size := io.size(ctx.input);
 		if data_size < 6 {
@@ -462,8 +454,6 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 		if cinfo > 7 {
 			return E_ZLIB.Unsupported_Window_Size;
 		}
-		cb.window_mask = i64((1 << (cinfo + 8) - 1));
-
 		flg, _ := compress.read_u8(ctx);
 
 		fcheck  := flg & 0x1f;
@@ -488,23 +478,21 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 			at the end to compare checksums.
 		*/
 
-		// Seed the Adler32 rolling checksum.
-		ctx.rolling_hash = 1;
 	}
 
 	// Parse ZLIB stream without header.
-	err = inflate_raw(z=ctx, cb=cb, expected_output_size=expected_output_size);
+	err = inflate_raw(z=ctx, expected_output_size=expected_output_size);
 	if err != nil {
 		return err;
 	}
 
 	if !raw {
-		compress.discard_to_next_byte_lsb(cb);
-		adler32 := compress.read_bits_lsb(ctx, cb, 8) << 24 | compress.read_bits_lsb(ctx, cb, 8) << 16 | compress.read_bits_lsb(ctx, cb, 8) << 8 | compress.read_bits_lsb(ctx, cb, 8);
+		compress.discard_to_next_byte_lsb(ctx);
+		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
 
-		ctx.rolling_hash = hash.adler32(ctx.output.buf[:]);
+		output_hash := hash.adler32(ctx.output.buf[:]);
 
-		if ctx.rolling_hash != u32(adler32) {
+		if output_hash != u32(adler32) {
 			return E_General.Checksum_Failed;
 		}
 	}
@@ -512,7 +500,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 }
 
 @(optimization_mode="speed")
-inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_from_stream_raw :: proc(z: ^Context, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	expected_output_size := expected_output_size;
 
 	if expected_output_size <= 0 {
@@ -536,8 +524,8 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 		return .Resize_Failed;
 	}
 
-	cb.num_bits    = 0;
-	cb.code_buffer = 0;
+	z.num_bits    = 0;
+	z.code_buffer = 0;
 
 	z_repeat:      ^Huffman_Table;
 	z_offset:      ^Huffman_Table;
@@ -559,21 +547,12 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 	defer free(z_offset);
 	defer free(codelength_ht);
 
-	if cb.window_mask == 0 {
-		cb.window_mask = DEFLATE_MAX_DISTANCE - 1;
-	}
-
-	// Allocate rolling window buffer.
-	cb.last = mem.make_dynamic_array_len_cap([dynamic]u8, cb.window_mask + 1, cb.window_mask + 1, allocator);
-	defer delete(cb.last);
-
-
 	final := u32(0);
 	type  := u32(0);
 
 	for {
-		final = compress.read_bits_lsb(z, cb, 1);
-		type  = compress.read_bits_lsb(z, cb, 2);
+		final = compress.read_bits_lsb(z, 1);
+		type  = compress.read_bits_lsb(z, 2);
 
 		// fmt.printf("Final: %v | Type: %v\n", final, type);
 
@@ -582,10 +561,10 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 			// Uncompressed block
 
 			// Discard bits until next byte boundary
-			compress.discard_to_next_byte_lsb(cb);
+			compress.discard_to_next_byte_lsb(z);
 
-			uncompressed_len  := i16(compress.read_bits_lsb(z, cb, 16));
-			length_check      := i16(compress.read_bits_lsb(z, cb, 16));
+			uncompressed_len  := i16(compress.read_bits_lsb(z, 16));
+			length_check      := i16(compress.read_bits_lsb(z, 16));
 
 			// fmt.printf("LEN: %v, ~LEN: %v, NLEN: %v, ~NLEN: %v\n", uncompressed_len, ~uncompressed_len, length_check, ~length_check);
 
@@ -599,9 +578,9 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				and a single Adler32 update after.
 			*/
 			#no_bounds_check for uncompressed_len > 0 {
-				compress.refill_lsb(z, cb);
-				lit := compress.read_bits_lsb(z, cb, 8);
-				write_byte(z, cb, u8(lit));
+				compress.refill_lsb(z);
+				lit := compress.read_bits_lsb(z, 8);
+				write_byte(z, u8(lit));
 				uncompressed_len -= 1;
 			}
 		case 3:
@@ -625,14 +604,14 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				//i: u32;
 				n: u32;
 
-				compress.refill_lsb(z, cb, 14);
-				hlit  := compress.read_bits_no_refill_lsb(z, cb, 5) + 257;
-				hdist := compress.read_bits_no_refill_lsb(z, cb, 5) + 1;
-				hclen := compress.read_bits_no_refill_lsb(z, cb, 4) + 4;
+				compress.refill_lsb(z, 14);
+				hlit  := compress.read_bits_no_refill_lsb(z, 5) + 257;
+				hdist := compress.read_bits_no_refill_lsb(z, 5) + 1;
+				hclen := compress.read_bits_no_refill_lsb(z, 4) + 4;
 				ntot  := hlit + hdist;
 
 				#no_bounds_check for i in 0..<hclen {
-					s := compress.read_bits_lsb(z, cb, 3);
+					s := compress.read_bits_lsb(z, 3);
 					codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
 				}
 				err = build_huffman(codelength_ht, codelength_sizes[:]);
@@ -644,7 +623,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 				c: u16;
 
 				for n < ntot {
-					c, err = decode_huffman(z, cb, codelength_ht);
+					c, err = decode_huffman(z, codelength_ht);
 					if err != nil {
 						return err;
 					}
@@ -657,18 +636,18 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 						n += 1;
 					} else {
 						fill := u8(0);
-						compress.refill_lsb(z, cb, 7);
+						compress.refill_lsb(z, 7);
 						switch c {
 						case 16:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 2) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3);
 							if n == 0 {
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 							}
 							fill = lencodes[n - 1];
 						case 17:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 3) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3);
 						case 18:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 7) + 11);
+							c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
 						case:
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 						}
@@ -698,7 +677,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_s
 					return err;
 				}
 			}
-			err = parse_huffman_block(z, cb, z_repeat, z_offset);
+			err = parse_huffman_block(z, z_repeat, z_offset);
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			if err != nil {
 				return err;
@@ -734,7 +713,7 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, e
 	return err;
 }
 
-inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
+inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
 	ctx := Context{};
 
 	r := bytes.Reader{};
@@ -746,7 +725,7 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_B
 
 	ctx.output = buf;
 
-	return inflate_from_stream_raw(z=&ctx, cb=cb, expected_output_size=expected_output_size);
+	return inflate_from_stream_raw(z=&ctx, expected_output_size=expected_output_size);
 }
 
 inflate     :: proc{inflate_from_stream, inflate_from_byte_array};

From 4689a6b341843e18f712025aac049c2938a0ed21 Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sat, 26 Jun 2021 22:25:55 +0200
Subject: [PATCH 5/9] Refactor compress.Context struct.

---
 core/compress/common.odin    | 24 ++++++++++++++----------
 core/compress/gzip/gzip.odin |  1 -
 core/compress/zlib/zlib.odin |  5 +++--
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/core/compress/common.odin b/core/compress/common.odin
index 47b80a995..d086c71f8 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -127,28 +127,30 @@ Deflate_Error :: enum {
 
 
 // General I/O context for ZLIB, LZW, etc.
-Context :: struct {
+Context :: struct #packed {
 	input_data:        []u8,
 	input:             io.Stream,
 	output:            ^bytes.Buffer,
 	bytes_written:     i64,
 
-	/*
-		If we know the data size, we can optimize the reads and writes.
-	*/    
-	size_packed:   i64,
-	size_unpacked: i64,
-
 	code_buffer: u64,
 	num_bits:    u64,
 
+	/*
+		If we know the data size, we can optimize the reads and writes.
+	*/
+	size_packed:   i64,
+	size_unpacked: i64,
+
 	/*
 		Flags:
-			`input_fully_in_memory` tells us whether we're EOF when `input_data` is empty.
-			`input_refills_from_stream` tells us we can then possibly refill from the stream.
+			`input_fully_in_memory`
+				true  = This tells us we read input from `input_data` exclusively. [] = EOF.
+				false = Try to refill `input_data` from the `input` stream.
 	*/
 	input_fully_in_memory: b8,
-	input_refills_from_stream: b8,
+
+	padding: [1]u8,
 }
 
 
@@ -162,6 +164,8 @@ Context :: struct {
 	This simplifies end-of-stream handling where bits may be left in the bit buffer.
 */
 
+// TODO: Make these return compress.Error errors.
+
 @(optimization_mode="speed")
 read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.Error) {
 	#no_bounds_check {
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index 38cb77b20..ac3940763 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -113,7 +113,6 @@ load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1,
 		input = stream,
 		input_data = slice,
 		input_fully_in_memory = true,
-		input_refills_from_stream = true,
 		output = buf,
 	};
 
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index aaa549e7b..fcdb42c44 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -430,8 +430,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 		- read
 		- size
 
-		ctx.output must be an io.Stream backed by an implementation that supports:
-		- write
+		ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
 
 		raw determines whether the ZLIB header is processed, or we're inflating a raw
 		DEFLATE stream.
@@ -499,6 +498,8 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 	return nil;
 }
 
+// TODO: Check alignment of reserve/resize.
+
 @(optimization_mode="speed")
 inflate_from_stream_raw :: proc(z: ^Context, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	expected_output_size := expected_output_size;

From 02f96681856fa4123746bda0f44324a7bdb98528 Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sun, 27 Jun 2021 13:19:24 +0200
Subject: [PATCH 6/9] ZLIB: Split up input from stream and memory into own code
 paths.

---
 core/compress/common.odin       | 164 ++++++++++++++++---
 core/compress/gzip/example.odin |   2 +-
 core/compress/gzip/gzip.odin    | 270 +++++++++++++++++++++++++++++---
 core/compress/zlib/example.odin |   2 +-
 core/compress/zlib/zlib.odin    | 111 +++++++++----
 5 files changed, 475 insertions(+), 74 deletions(-)

diff --git a/core/compress/common.odin b/core/compress/common.odin
index d086c71f8..1aff6db6e 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -127,7 +127,24 @@ Deflate_Error :: enum {
 
 
 // General I/O context for ZLIB, LZW, etc.
-Context :: struct #packed {
+Context_Memory_Input :: struct #packed {
+	input_data:        []u8,
+	output:            ^bytes.Buffer,
+	bytes_written:     i64,
+
+	code_buffer: u64,
+	num_bits:    u64,
+
+	/*
+		If we know the data size, we can optimize the reads and writes.
+	*/
+	size_packed:   i64,
+	size_unpacked: i64,
+
+	padding: [1]u8,
+}
+
+Context_Stream_Input :: struct #packed {
 	input_data:        []u8,
 	input:             io.Stream,
 	output:            ^bytes.Buffer,
@@ -153,7 +170,6 @@ Context :: struct #packed {
 	padding: [1]u8,
 }
 
-
 // Stream helpers
 /*
 	TODO: These need to be optimized.
@@ -167,7 +183,7 @@ Context :: struct #packed {
 // TODO: Make these return compress.Error errors.
 
 @(optimization_mode="speed")
-read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.Error) {
+read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int) -> (res: []u8, err: io.Error) {
 	#no_bounds_check {
 		if len(z.input_data) >= size {
 			res = z.input_data[:size];
@@ -176,17 +192,15 @@ read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.
 		}
 	}
 
-	if z.input_fully_in_memory {
-		if len(z.input_data) == 0 {
-			return []u8{}, .EOF;
-		} else {
-			return []u8{}, .Short_Buffer;
-		}
+	if len(z.input_data) == 0 {
+		return []u8{}, .EOF;
+	} else {
+		return []u8{}, .Short_Buffer;
 	}
+}
 
-	/*
-		TODO: Try to refill z.input_data from stream, using packed_data as a guide.
-	*/
+@(optimization_mode="speed")
+read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int) -> (res: []u8, err: io.Error) {
 	b := make([]u8, size, context.temp_allocator);
 	_, e := z.input->impl_read(b[:]);
 	if e == .None {
@@ -196,8 +210,10 @@ read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.
 	return []u8{}, e;
 }
 
+read_slice :: proc{read_slice_from_memory, read_slice_from_stream};
+
 @(optimization_mode="speed")
-read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+read_data :: #force_inline proc(z: ^$C, $T: typeid) -> (res: T, err: io.Error) {
 	b, e := read_slice(z, size_of(T));
 	if e == .None {
 		return (^T)(&b[0])^, .None;
@@ -207,7 +223,7 @@ read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 }
 
 @(optimization_mode="speed")
-read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
+read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8, err: io.Error) {
 	#no_bounds_check {
 		if len(z.input_data) >= 1 {
 			res = z.input_data[0];
@@ -215,8 +231,12 @@ read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 			return res, .None;
 		}
 	}
+	return 0, .EOF;
+}
 
-	b, e := read_slice(z, 1);
+@(optimization_mode="speed")
+read_u8_from_stream :: #force_inline proc(z: ^Context_Stream_Input) -> (res: u8, err: io.Error) {
+	b, e := read_slice_from_stream(z, 1);
 	if e == .None {
 		return b[0], .None;
 	}
@@ -224,8 +244,10 @@ read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 	return 0, e;
 }
 
+read_u8 :: proc{read_u8_from_memory, read_u8_from_stream};
+
 @(optimization_mode="speed")
-peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid) -> (res: T, err: io.Error) {
 	size :: size_of(T);
 
 	#no_bounds_check {
@@ -242,6 +264,11 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 			return T{}, .Short_Buffer;
 		}
 	}
+}
+
+@(optimization_mode="speed")
+peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid) -> (res: T, err: io.Error) {
+	size :: size_of(T);
 
 	// Get current position to read from.
 	curr, e1 := z.input->impl_seek(0, .Current);
@@ -266,16 +293,20 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 	return res, .None;
 }
 
+peek_data :: proc{peek_data_from_memory, peek_data_from_stream};
+
+
+
 // Sliding window read back
 @(optimization_mode="speed")
-peek_back_byte :: #force_inline proc(z: ^Context, offset: i64) -> (res: u8, err: io.Error) {
+peek_back_byte :: #force_inline proc(z: ^$C, offset: i64) -> (res: u8, err: io.Error) {
 	// Look back into the sliding window.
 	return z.output.buf[z.bytes_written - offset], .None;
 }
 
 // Generalized bit reader LSB
 @(optimization_mode="speed")
-refill_lsb :: proc(z: ^Context, width := i8(24)) {
+refill_lsb_from_memory :: proc(z: ^Context_Memory_Input, width := i8(24)) {
 	refill := u64(width);
 
 	for {
@@ -300,43 +331,126 @@ refill_lsb :: proc(z: ^Context, width := i8(24)) {
 	}
 }
 
+// Generalized bit reader LSB
 @(optimization_mode="speed")
-consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
+refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
+	refill := u64(width);
+
+	for {
+		if z.num_bits > refill {
+			break;
+		}
+		if z.code_buffer == 0 && z.num_bits > 63 {
+			z.num_bits = 0;
+		}
+		if z.code_buffer >= 1 << uint(z.num_bits) {
+			// Code buffer is malformed.
+			z.num_bits = max(u64);
+			return;
+		}
+		b, err := read_u8(z);
+		if err != .None {
+			// This is fine at the end of the file.
+			return;
+		}
+		z.code_buffer |= (u64(b) << u8(z.num_bits));
+		z.num_bits += 8;
+	}
+}
+
+refill_lsb :: proc{refill_lsb_from_memory, refill_lsb_from_stream};
+
+
+@(optimization_mode="speed")
+consume_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) {
 	z.code_buffer >>= width;
 	z.num_bits -= u64(width);
 }
 
 @(optimization_mode="speed")
-peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= u64(width);
+}
+
+consume_bits_lsb :: proc{consume_bits_lsb_from_memory, consume_bits_lsb_from_stream};
+
+@(optimization_mode="speed")
+peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
 	if z.num_bits < u64(width) {
 		refill_lsb(z);
 	}
-	// assert(z.num_bits >= i8(width));
 	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 
 @(optimization_mode="speed")
-peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	if z.num_bits < u64(width) {
+		refill_lsb(z);
+	}
+	return u32(z.code_buffer & ~(~u64(0) << width));
+}
+
+peek_bits_lsb :: proc{peek_bits_lsb_from_memory, peek_bits_lsb_from_stream};
+
+@(optimization_mode="speed")
+peek_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
 	assert(z.num_bits >= u64(width));
 	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 
 @(optimization_mode="speed")
-read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	assert(z.num_bits >= u64(width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
+}
+
+peek_bits_no_refill_lsb :: proc{peek_bits_no_refill_lsb_from_memory, peek_bits_no_refill_lsb_from_stream};
+
+@(optimization_mode="speed")
+read_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
 	k := peek_bits_lsb(z, width);
 	consume_bits_lsb(z, width);
 	return k;
 }
 
 @(optimization_mode="speed")
-read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	k := peek_bits_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
+}
+
+read_bits_lsb :: proc{read_bits_lsb_from_memory, read_bits_lsb_from_stream};
+
+@(optimization_mode="speed")
+read_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
 	k := peek_bits_no_refill_lsb(z, width);
 	consume_bits_lsb(z, width);
 	return k;
 }
 
 @(optimization_mode="speed")
-discard_to_next_byte_lsb :: proc(z: ^Context) {
+read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
+}
+
+read_bits_no_refill_lsb :: proc{read_bits_no_refill_lsb_from_memory, read_bits_no_refill_lsb_from_stream};
+
+
+@(optimization_mode="speed")
+discard_to_next_byte_lsb_from_memory :: proc(z: ^Context_Memory_Input) {
 	discard := u8(z.num_bits & 7);
 	consume_bits_lsb(z, discard);
 }
+
+
+@(optimization_mode="speed")
+discard_to_next_byte_lsb_from_stream :: proc(z: ^Context_Stream_Input) {
+	discard := u8(z.num_bits & 7);
+	consume_bits_lsb(z, discard);
+}
+
+discard_to_next_byte_lsb :: proc{discard_to_next_byte_lsb_from_memory, discard_to_next_byte_lsb_from_stream};
\ No newline at end of file
diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin
index bfb4267b8..9dfd68f23 100644
--- a/core/compress/gzip/example.odin
+++ b/core/compress/gzip/example.odin
@@ -65,7 +65,7 @@ main :: proc() {
 		if file == "-" {
 			// Read from stdin
 			s := os.stream_from_handle(os.stdin);
-			ctx := &compress.Context{
+			ctx := &compress.Context_Stream_Input{
 				input = s,
 			};
 			err = load(ctx, &buf);
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index ac3940763..23446d80b 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -103,23 +103,7 @@ E_Deflate :: compress.Deflate_Error;
 
 GZIP_MAX_PAYLOAD_SIZE :: int(max(u32le));
 
-load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
-
-	r := bytes.Reader{};
-	bytes.reader_init(&r, slice);
-	stream := bytes.reader_to_stream(&r);
-
-	ctx := &compress.Context{
-		input = stream,
-		input_data = slice,
-		input_fully_in_memory = true,
-		output = buf,
-	};
-
-	err = load_from_stream(ctx, buf, known_gzip_size, expected_output_size, allocator);
-
-	return err;
-}
+load :: proc{load_from_slice, load_from_stream, load_from_file};
 
 load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	data, ok := os.read_entire_file(filename, allocator);
@@ -132,7 +116,255 @@ load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_siz
 	return;
 }
 
-load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+	buf := buf;
+
+	z := &compress.Context_Memory_Input{
+		input_data = slice,
+		output = buf,
+	};
+
+	expected_output_size := expected_output_size;
+	input_data_consumed := 0;
+	z.output = buf;
+
+	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
+		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
+	}
+
+	if expected_output_size > compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
+		return E_GZIP.Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX;
+	}
+
+	b: []u8;
+
+	header, e := compress.read_data(z, Header);
+	if e != .None {
+		return E_General.File_Too_Short;
+	}
+	input_data_consumed += size_of(Header);
+
+	if header.magic != .GZIP {
+		return E_GZIP.Invalid_GZIP_Signature;
+	}
+	if header.compression_method != .DEFLATE {
+		return E_General.Unknown_Compression_Method;
+	}
+
+	if header.os >= ._Unknown {
+		header.os = .Unknown;
+	}
+
+	if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags {
+		return E_GZIP.Reserved_Flag_Set;
+	}
+
+	// printf("signature: %v\n", header.magic);
+	// printf("compression: %v\n", header.compression_method);
+	// printf("flags: %v\n", header.flags);
+	// printf("modification time: %v\n", time.unix(i64(header.modification_time), 0));
+	// printf("xfl: %v (%v)\n", header.xfl, int(header.xfl));
+	// printf("os: %v\n", OS_Name[header.os]);
+
+	if .extra in header.flags {
+		xlen, e_extra := compress.read_data(z, u16le);
+		input_data_consumed += 2;
+
+		if e_extra != .None {
+			return E_General.Stream_Too_Short;
+		}
+		// printf("Extra data present (%v bytes)\n", xlen);
+		if xlen < 4 {
+			// Minimum length is 2 for ID + 2 for a field length, if set to zero.
+			return E_GZIP.Invalid_Extra_Data;
+		}
+
+		field_id:     [2]u8;
+		field_length: u16le;
+		field_error: io.Error;
+
+		for xlen >= 4 {
+			// println("Parsing Extra field(s).");
+			field_id, field_error = compress.read_data(z, [2]u8);
+			if field_error != .None {
+				// printf("Parsing Extra returned: %v\n", field_error);
+				return E_General.Stream_Too_Short;
+			}
+			xlen -= 2;
+			input_data_consumed += 2;
+
+			field_length, field_error = compress.read_data(z, u16le);
+			if field_error != .None {
+				// printf("Parsing Extra returned: %v\n", field_error);
+				return E_General.Stream_Too_Short;
+			}
+			xlen -= 2;
+			input_data_consumed += 2;
+
+			if xlen <= 0 {
+				// We're not going to try and recover by scanning for a ZLIB header.
+				// Who knows what else is wrong with this file.
+				return E_GZIP.Invalid_Extra_Data;
+			}
+
+			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
+			if field_length > 0 {
+				b, field_error = compress.read_slice(z, int(field_length));
+				if field_error != .None {
+					// printf("Parsing Extra returned: %v\n", field_error);
+					return E_General.Stream_Too_Short;
+				}
+				xlen -= field_length;
+				input_data_consumed += int(field_length);
+
+				// printf("%v\n", string(field_data));
+			}
+
+			if xlen != 0 {
+				return E_GZIP.Invalid_Extra_Data;
+			}
+		}
+	}
+
+	if .name in header.flags {
+		// Should be enough.
+		name: [1024]u8;
+		i := 0;
+		name_error: io.Error;
+
+		for i < len(name) {
+			b, name_error = compress.read_slice(z, 1);
+			if name_error != .None {
+				return E_General.Stream_Too_Short;
+			}
+			input_data_consumed += 1;
+			if b[0] == 0 {
+				break;
+			}
+			name[i] = b[0];
+			i += 1;
+			if i >= len(name) {
+				return E_GZIP.Original_Name_Too_Long;
+			}
+		}
+		// printf("Original filename: %v\n", string(name[:i]));
+	}
+
+	if .comment in header.flags {
+		// Should be enough.
+		comment: [1024]u8;
+		i := 0;
+		comment_error: io.Error;
+
+		for i < len(comment) {
+			b, comment_error = compress.read_slice(z, 1);
+			if comment_error != .None {
+				return E_General.Stream_Too_Short;
+			}
+			input_data_consumed += 1;
+			if b[0] == 0 {
+				break;
+			}
+			comment[i] = b[0];
+			i += 1;
+			if i >= len(comment) {
+				return E_GZIP.Comment_Too_Long;
+			}
+		}
+		// printf("Comment: %v\n", string(comment[:i]));
+	}
+
+	if .header_crc in header.flags {
+		crc_error: io.Error;
+		_, crc_error = compress.read_slice(z, 2);
+		input_data_consumed += 2;
+		if crc_error != .None {
+			return E_General.Stream_Too_Short;
+		}
+		/*
+			We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field).
+			If we find a gzip file in the wild that sets this field, we can add proper support for it.
+		*/
+	}
+
+	/*
+		We should have arrived at the ZLIB payload.
+	*/
+	payload_u32le: u32le;
+
+	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
+
+	if expected_output_size > -1 {
+		/*
+			We already checked that it's not larger than the output buffer max,
+			or GZIP length field's max.
+
+			We'll just pass it on to `zlib.inflate_raw`;
+		*/
+	} else {
+		/*
+			If we know the size of the GZIP file *and* it is fully in memory,
+			then we can peek at the unpacked size at the end.
+
+			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
+
+		*/
+		if known_gzip_size > -1 {
+			offset := known_gzip_size - input_data_consumed - 4;
+			if len(z.input_data) >= offset + 4 {
+				length_bytes         := z.input_data[offset:][:4];
+				payload_u32le         = (^u32le)(&length_bytes[0])^;
+				expected_output_size = int(payload_u32le);
+			}
+		} else {
+			/*
+				TODO(Jeroen): When reading a GZIP from a stream, check if impl_seek is present.
+				If so, we can seek to the end, grab the size from the footer, and seek back to payload start.
+			*/
+		}
+	}
+
+	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
+
+	zlib_error := zlib.inflate_raw(z=z, expected_output_size=expected_output_size);
+	if zlib_error != nil {
+		return zlib_error;
+	}
+	/*
+		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
+	*/
+	compress.discard_to_next_byte_lsb(z);
+
+	footer_error: io.Error;
+
+	payload_crc_b: [4]u8;
+	for _, i in payload_crc_b {
+		if z.num_bits >= 8 {
+			payload_crc_b[i] = u8(compress.read_bits_lsb(z, 8));
+		} else {
+			payload_crc_b[i], footer_error = compress.read_u8(z);
+		}
+	}
+	payload_crc := transmute(u32le)payload_crc_b;
+	payload_u32le, footer_error = compress.read_data(z, u32le);
+
+	payload := bytes.buffer_to_bytes(buf);
+
+	// fmt.printf("GZIP payload: %v\n", string(payload));
+
+	crc32 := u32le(hash.crc32(payload));
+
+	if crc32 != payload_crc {
+		return E_GZIP.Payload_CRC_Invalid;
+	}
+
+	if len(payload) != int(payload_u32le) {
+		return E_GZIP.Payload_Length_Invalid;
+	}
+	return nil;
+}
+
+load_from_stream :: proc(z: ^compress.Context_Stream_Input, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	buf := buf;
 	expected_output_size := expected_output_size;
 
@@ -375,5 +607,3 @@ load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_si
 	}
 	return nil;
 }
-
-load :: proc{load_from_file, load_from_slice, load_from_stream};
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
index 2aabd7b6c..cbc638b73 100644
--- a/core/compress/zlib/example.odin
+++ b/core/compress/zlib/example.odin
@@ -38,7 +38,7 @@ main :: proc() {
 	};
 	OUTPUT_SIZE :: 438;
 
-	fmt.printf("size_of(Context): %v\n", size_of(compress.Context));
+	fmt.printf("size_of(Context): %v\n", size_of(compress.Context_Memory_Input));
 
 	buf: bytes.Buffer;
 
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index fcdb42c44..a527d4f5f 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -30,7 +30,6 @@ import "core:bytes"
 	`Context.rolling_hash` if not inlining it is still faster.
 
 */
-Context :: compress.Context;
 
 Compression_Method :: enum u8 {
 	DEFLATE  = 8,
@@ -165,7 +164,7 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
 */
 
 @(optimization_mode="speed")
-write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
+write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_check {
 	/*
 		Resize if needed.
 	*/
@@ -184,7 +183,7 @@ write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_boun
 }
 
 @(optimization_mode="speed")
-repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
+repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
@@ -212,7 +211,7 @@ repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) 	#no_bounds
 }
 
 @(optimization_mode="speed")
-repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
+repl_bytes :: proc(z: ^$C, count: u16, distance: u16) -> (err: io.Error) {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
@@ -304,7 +303,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 }
 
 @(optimization_mode="speed")
-decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
 	code := u16(compress.peek_bits_lsb(z,16));
 
 	k := int(z_bit_reverse(code, 16));
@@ -335,7 +334,7 @@ decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err:
 }
 
 @(optimization_mode="speed")
-decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
 	if z.num_bits < 16 {
 		if z.num_bits > 63 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
@@ -355,7 +354,7 @@ decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #
 }
 
 @(optimization_mode="speed")
-parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
 	#no_bounds_check for {
 		value, e := decode_huffman(z, z_repeat);
 		if e != nil {
@@ -424,7 +423,78 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
 }
 
 @(optimization_mode="speed")
-inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+__inflate_from_memory :: proc(using ctx: ^compress.Context_Memory_Input, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+	/*
+		ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
+
+		raw determines whether the ZLIB header is processed, or we're inflating a raw
+		DEFLATE stream.
+	*/
+
+	if !raw {
+		if len(ctx.input_data) < 6 {
+			return E_General.Stream_Too_Short;
+		}
+
+		cmf, _ := compress.read_u8(ctx);
+
+		method := Compression_Method(cmf & 0xf);
+		if method != .DEFLATE {
+			return E_General.Unknown_Compression_Method;
+		}
+
+		cinfo  := (cmf >> 4) & 0xf;
+		if cinfo > 7 {
+			return E_ZLIB.Unsupported_Window_Size;
+		}
+		flg, _ := compress.read_u8(ctx);
+
+		fcheck  := flg & 0x1f;
+		fcheck_computed := (cmf << 8 | flg) & 0x1f;
+		if fcheck != fcheck_computed {
+			return E_General.Checksum_Failed;
+		}
+
+		fdict   := (flg >> 5) & 1;
+		/*
+			We don't handle built-in dictionaries for now.
+			They're application specific and PNG doesn't use them.
+		*/
+		if fdict != 0 {
+			return E_ZLIB.FDICT_Unsupported;
+		}
+
+		// flevel  := Compression_Level((flg >> 6) & 3);
+		/*
+			Inflate can consume bits belonging to the Adler checksum.
+			We pass the entire stream to Inflate and will unget bytes if we need to
+			at the end to compare checksums.
+		*/
+
+	}
+
+	// Parse ZLIB stream without header.
+	err = inflate_raw(z=ctx, expected_output_size=expected_output_size);
+	if err != nil {
+		return err;
+	}
+
+	if !raw {
+		compress.discard_to_next_byte_lsb(ctx);
+		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
+
+		output_hash := hash.adler32(ctx.output.buf[:]);
+
+		if output_hash != u32(adler32) {
+			return E_General.Checksum_Failed;
+		}
+	}
+	return nil;
+}
+
+
+@(optimization_mode="speed")
+__inflate_from_stream :: proc(using ctx: ^$C, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	/*
 		ctx.input must be an io.Stream backed by an implementation that supports:
 		- read
@@ -501,7 +571,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 // TODO: Check alignment of reserve/resize.
 
 @(optimization_mode="speed")
-inflate_from_stream_raw :: proc(z: ^Context, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_raw :: proc(z: ^$C, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	expected_output_size := expected_output_size;
 
 	if expected_output_size <= 0 {
@@ -698,36 +768,23 @@ inflate_from_stream_raw :: proc(z: ^Context, expected_output_size := -1, allocat
 }
 
 inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
-	ctx := Context{};
+	ctx := compress.Context_Memory_Input{};
 
-	r := bytes.Reader{};
-	bytes.reader_init(&r, input);
-	rs := bytes.reader_to_stream(&r);
-	ctx.input = rs;
 	ctx.input_data = input;
-	ctx.input_fully_in_memory = true;
-
 	ctx.output = buf;
 
-	err = inflate_from_stream(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
+	err = __inflate_from_memory(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
 
 	return err;
 }
 
 inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
-	ctx := Context{};
+	ctx := compress.Context_Memory_Input{};
 
-	r := bytes.Reader{};
-	bytes.reader_init(&r, input);
-	rs := bytes.reader_to_stream(&r);
-	ctx.input = rs;
 	ctx.input_data = input;
-	ctx.input_fully_in_memory = true;
-
 	ctx.output = buf;
 
-	return inflate_from_stream_raw(z=&ctx, expected_output_size=expected_output_size);
+	return inflate_raw(z=&ctx, expected_output_size=expected_output_size);
 }
 
-inflate     :: proc{inflate_from_stream, inflate_from_byte_array};
-inflate_raw :: proc{inflate_from_stream_raw, inflate_from_byte_array_raw};
+inflate     :: proc{__inflate_from_stream, inflate_from_byte_array};
\ No newline at end of file

From eaf88bcc4d2a1058d7987654e0adf6b81f30e078 Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sun, 27 Jun 2021 13:51:52 +0200
Subject: [PATCH 7/9] PNG: Let PNG use the new compress I/O routines.

---
 core/compress/common.odin    |  20 ++-
 core/compress/gzip/gzip.odin | 257 ++---------------------------------
 core/compress/zlib/zlib.odin |  85 +-----------
 core/image/png/png.odin      |  18 +--
 4 files changed, 35 insertions(+), 345 deletions(-)

diff --git a/core/compress/common.odin b/core/compress/common.odin
index 1aff6db6e..a1ed32aaf 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -182,6 +182,16 @@ Context_Stream_Input :: struct #packed {
 
 // TODO: Make these return compress.Error errors.
 
+input_size_from_memory :: proc(z: ^Context_Memory_Input) -> (res: i64, err: Error) {
+	return i64(len(z.input_data)), nil;
+}
+
+input_size_from_stream :: proc(z: ^Context_Stream_Input) -> (res: i64, err: Error) {
+	return io.size(z.input), nil;
+}
+
+input_size :: proc{input_size_from_memory, input_size_from_stream};
+
 @(optimization_mode="speed")
 read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int) -> (res: []u8, err: io.Error) {
 	#no_bounds_check {
@@ -257,12 +267,10 @@ peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid
 		}
 	}
 
-	if z.input_fully_in_memory {
-		if len(z.input_data) < size {
-			return T{}, .EOF;
-		} else {
-			return T{}, .Short_Buffer;
-		}
+	if len(z.input_data) == 0 {
+		return T{}, .EOF;
+	} else {
+		return T{}, .Short_Buffer;
 	}
 }
 
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index 23446d80b..31037da67 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -103,7 +103,7 @@ E_Deflate :: compress.Deflate_Error;
 
 GZIP_MAX_PAYLOAD_SIZE :: int(max(u32le));
 
-load :: proc{load_from_slice, load_from_stream, load_from_file};
+load :: proc{load_from_slice, load_from_file, load_from_context};
 
 load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	data, ok := os.read_entire_file(filename, allocator);
@@ -123,9 +123,15 @@ load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1,
 		input_data = slice,
 		output = buf,
 	};
+	return load_from_context(z, buf, known_gzip_size, expected_output_size, allocator);
+}
 
+load_from_context :: proc(z: ^$C, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+	buf := buf;
 	expected_output_size := expected_output_size;
+
 	input_data_consumed := 0;
+
 	z.output = buf;
 
 	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
@@ -310,252 +316,9 @@ load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1,
 
 		*/
 		if known_gzip_size > -1 {
-			offset := known_gzip_size - input_data_consumed - 4;
-			if len(z.input_data) >= offset + 4 {
-				length_bytes         := z.input_data[offset:][:4];
-				payload_u32le         = (^u32le)(&length_bytes[0])^;
-				expected_output_size = int(payload_u32le);
-			}
-		} else {
-			/*
-				TODO(Jeroen): When reading a GZIP from a stream, check if impl_seek is present.
-				If so, we can seek to the end, grab the size from the footer, and seek back to payload start.
-			*/
-		}
-	}
-
-	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
-
-	zlib_error := zlib.inflate_raw(z=z, expected_output_size=expected_output_size);
-	if zlib_error != nil {
-		return zlib_error;
-	}
-	/*
-		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
-	*/
-	compress.discard_to_next_byte_lsb(z);
-
-	footer_error: io.Error;
-
-	payload_crc_b: [4]u8;
-	for _, i in payload_crc_b {
-		if z.num_bits >= 8 {
-			payload_crc_b[i] = u8(compress.read_bits_lsb(z, 8));
-		} else {
-			payload_crc_b[i], footer_error = compress.read_u8(z);
-		}
-	}
-	payload_crc := transmute(u32le)payload_crc_b;
-	payload_u32le, footer_error = compress.read_data(z, u32le);
-
-	payload := bytes.buffer_to_bytes(buf);
-
-	// fmt.printf("GZIP payload: %v\n", string(payload));
-
-	crc32 := u32le(hash.crc32(payload));
-
-	if crc32 != payload_crc {
-		return E_GZIP.Payload_CRC_Invalid;
-	}
-
-	if len(payload) != int(payload_u32le) {
-		return E_GZIP.Payload_Length_Invalid;
-	}
-	return nil;
-}
-
-load_from_stream :: proc(z: ^compress.Context_Stream_Input, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
-	buf := buf;
-	expected_output_size := expected_output_size;
-
-	input_data_consumed := 0;
-
-	z.output = buf;
-
-	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
-		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
-	}
-
-	if expected_output_size > compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
-		return E_GZIP.Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX;
-	}
-
-	b: []u8;
-
-	header, e := compress.read_data(z, Header);
-	if e != .None {
-		return E_General.File_Too_Short;
-	}
-	input_data_consumed += size_of(Header);
-
-	if header.magic != .GZIP {
-		return E_GZIP.Invalid_GZIP_Signature;
-	}
-	if header.compression_method != .DEFLATE {
-		return E_General.Unknown_Compression_Method;
-	}
-
-	if header.os >= ._Unknown {
-		header.os = .Unknown;
-	}
-
-	if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags {
-		return E_GZIP.Reserved_Flag_Set;
-	}
-
-	// printf("signature: %v\n", header.magic);
-	// printf("compression: %v\n", header.compression_method);
-	// printf("flags: %v\n", header.flags);
-	// printf("modification time: %v\n", time.unix(i64(header.modification_time), 0));
-	// printf("xfl: %v (%v)\n", header.xfl, int(header.xfl));
-	// printf("os: %v\n", OS_Name[header.os]);
-
-	if .extra in header.flags {
-		xlen, e_extra := compress.read_data(z, u16le);
-		input_data_consumed += 2;
-
-		if e_extra != .None {
-			return E_General.Stream_Too_Short;
-		}
-		// printf("Extra data present (%v bytes)\n", xlen);
-		if xlen < 4 {
-			// Minimum length is 2 for ID + 2 for a field length, if set to zero.
-			return E_GZIP.Invalid_Extra_Data;
-		}
-
-		field_id:     [2]u8;
-		field_length: u16le;
-		field_error: io.Error;
-
-		for xlen >= 4 {
-			// println("Parsing Extra field(s).");
-			field_id, field_error = compress.read_data(z, [2]u8);
-			if field_error != .None {
-				// printf("Parsing Extra returned: %v\n", field_error);
-				return E_General.Stream_Too_Short;
-			}
-			xlen -= 2;
-			input_data_consumed += 2;
-
-			field_length, field_error = compress.read_data(z, u16le);
-			if field_error != .None {
-				// printf("Parsing Extra returned: %v\n", field_error);
-				return E_General.Stream_Too_Short;
-			}
-			xlen -= 2;
-			input_data_consumed += 2;
-
-			if xlen <= 0 {
-				// We're not going to try and recover by scanning for a ZLIB header.
-				// Who knows what else is wrong with this file.
-				return E_GZIP.Invalid_Extra_Data;
-			}
-
-			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
-			if field_length > 0 {
-				b, field_error = compress.read_slice(z, int(field_length));
-				if field_error != .None {
-					// printf("Parsing Extra returned: %v\n", field_error);
-					return E_General.Stream_Too_Short;
-				}
-				xlen -= field_length;
-				input_data_consumed += int(field_length);
-
-				// printf("%v\n", string(field_data));
-			}
-
-			if xlen != 0 {
-				return E_GZIP.Invalid_Extra_Data;
-			}
-		}
-	}
-
-	if .name in header.flags {
-		// Should be enough.
-		name: [1024]u8;
-		i := 0;
-		name_error: io.Error;
-
-		for i < len(name) {
-			b, name_error = compress.read_slice(z, 1);
-			if name_error != .None {
-				return E_General.Stream_Too_Short;
-			}
-			input_data_consumed += 1;
-			if b[0] == 0 {
-				break;
-			}
-			name[i] = b[0];
-			i += 1;
-			if i >= len(name) {
-				return E_GZIP.Original_Name_Too_Long;
-			}
-		}
-		// printf("Original filename: %v\n", string(name[:i]));
-	}
-
-	if .comment in header.flags {
-		// Should be enough.
-		comment: [1024]u8;
-		i := 0;
-		comment_error: io.Error;
-
-		for i < len(comment) {
-			b, comment_error = compress.read_slice(z, 1);
-			if comment_error != .None {
-				return E_General.Stream_Too_Short;
-			}
-			input_data_consumed += 1;
-			if b[0] == 0 {
-				break;
-			}
-			comment[i] = b[0];
-			i += 1;
-			if i >= len(comment) {
-				return E_GZIP.Comment_Too_Long;
-			}
-		}
-		// printf("Comment: %v\n", string(comment[:i]));
-	}
-
-	if .header_crc in header.flags {
-		crc_error: io.Error;
-		_, crc_error = compress.read_slice(z, 2);
-		input_data_consumed += 2;
-		if crc_error != .None {
-			return E_General.Stream_Too_Short;
-		}
-		/*
-			We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field).
-			If we find a gzip file in the wild that sets this field, we can add proper support for it.
-		*/
-	}
-
-	/*
-		We should have arrived at the ZLIB payload.
-	*/
-	payload_u32le: u32le;
-
-	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
-
-	if expected_output_size > -1 {
-		/*
-			We already checked that it's not larger than the output buffer max,
-			or GZIP length field's max.
-
-			We'll just pass it on to `zlib.inflate_raw`;
-		*/
-	} else {
-		/*
-			If we know the size of the GZIP file *and* it is fully in memory,
-			then we can peek at the unpacked size at the end.
-
-			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
-
-		*/
-		if z.input_fully_in_memory && known_gzip_size > -1 {
-			offset := known_gzip_size - input_data_consumed - 4;
-			if len(z.input_data) >= offset + 4 {
+			offset := i64(known_gzip_size - input_data_consumed - 4);
+			size, _ := compress.input_size(z);
+			if size >= offset + 4 {
 				length_bytes         := z.input_data[offset:][:4];
 				payload_u32le         = (^u32le)(&length_bytes[0])^;
 				expected_output_size = int(payload_u32le);
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index a527d4f5f..363b7c001 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -423,7 +423,7 @@ parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err:
 }
 
 @(optimization_mode="speed")
-__inflate_from_memory :: proc(using ctx: ^compress.Context_Memory_Input, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	/*
 		ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
 
@@ -432,83 +432,8 @@ __inflate_from_memory :: proc(using ctx: ^compress.Context_Memory_Input, raw :=
 	*/
 
 	if !raw {
-		if len(ctx.input_data) < 6 {
-			return E_General.Stream_Too_Short;
-		}
-
-		cmf, _ := compress.read_u8(ctx);
-
-		method := Compression_Method(cmf & 0xf);
-		if method != .DEFLATE {
-			return E_General.Unknown_Compression_Method;
-		}
-
-		cinfo  := (cmf >> 4) & 0xf;
-		if cinfo > 7 {
-			return E_ZLIB.Unsupported_Window_Size;
-		}
-		flg, _ := compress.read_u8(ctx);
-
-		fcheck  := flg & 0x1f;
-		fcheck_computed := (cmf << 8 | flg) & 0x1f;
-		if fcheck != fcheck_computed {
-			return E_General.Checksum_Failed;
-		}
-
-		fdict   := (flg >> 5) & 1;
-		/*
-			We don't handle built-in dictionaries for now.
-			They're application specific and PNG doesn't use them.
-		*/
-		if fdict != 0 {
-			return E_ZLIB.FDICT_Unsupported;
-		}
-
-		// flevel  := Compression_Level((flg >> 6) & 3);
-		/*
-			Inflate can consume bits belonging to the Adler checksum.
-			We pass the entire stream to Inflate and will unget bytes if we need to
-			at the end to compare checksums.
-		*/
-
-	}
-
-	// Parse ZLIB stream without header.
-	err = inflate_raw(z=ctx, expected_output_size=expected_output_size);
-	if err != nil {
-		return err;
-	}
-
-	if !raw {
-		compress.discard_to_next_byte_lsb(ctx);
-		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
-
-		output_hash := hash.adler32(ctx.output.buf[:]);
-
-		if output_hash != u32(adler32) {
-			return E_General.Checksum_Failed;
-		}
-	}
-	return nil;
-}
-
-
-@(optimization_mode="speed")
-__inflate_from_stream :: proc(using ctx: ^$C, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
-	/*
-		ctx.input must be an io.Stream backed by an implementation that supports:
-		- read
-		- size
-
-		ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
-
-		raw determines whether the ZLIB header is processed, or we're inflating a raw
-		DEFLATE stream.
-	*/
-
-	if !raw {
-		data_size := io.size(ctx.input);
-		if data_size < 6 {
+		size, size_err := compress.input_size(ctx);
+		if size < 6 || size_err != nil {
 			return E_General.Stream_Too_Short;
 		}
 
@@ -773,7 +698,7 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, e
 	ctx.input_data = input;
 	ctx.output = buf;
 
-	err = __inflate_from_memory(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
+	err = inflate_from_context(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
 
 	return err;
 }
@@ -787,4 +712,4 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := fals
 	return inflate_raw(z=&ctx, expected_output_size=expected_output_size);
 }
 
-inflate     :: proc{__inflate_from_stream, inflate_from_byte_array};
\ No newline at end of file
+inflate     :: proc{inflate_from_context, inflate_from_byte_array};
\ No newline at end of file
diff --git a/core/image/png/png.odin b/core/image/png/png.odin
index afb71ca54..40f111895 100644
--- a/core/image/png/png.odin
+++ b/core/image/png/png.odin
@@ -245,7 +245,7 @@ ADAM7_Y_SPACING := []int{ 8,8,8,4,4,2,2 };
 
 // Implementation starts here
 
-read_chunk :: proc(ctx: ^compress.Context) -> (chunk: Chunk, err: Error) {
+read_chunk :: proc(ctx: ^$C) -> (chunk: Chunk, err: Error) {
 	ch, e := compress.read_data(ctx, Chunk_Header);
 	if e != .None {
 		return {}, E_General.Stream_Too_Short;
@@ -274,7 +274,7 @@ read_chunk :: proc(ctx: ^compress.Context) -> (chunk: Chunk, err: Error) {
 	return chunk, nil;
 }
 
-read_header :: proc(ctx: ^compress.Context) -> (IHDR, Error) {
+read_header :: proc(ctx: ^$C) -> (IHDR, Error) {
 	c, e := read_chunk(ctx);
 	if e != nil {
 		return {}, e;
@@ -353,14 +353,8 @@ chunk_type_to_name :: proc(type: ^Chunk_Type) -> string {
 }
 
 load_from_slice :: proc(slice: []u8, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
-	r := bytes.Reader{};
-	bytes.reader_init(&r, slice);
-	stream := bytes.reader_to_stream(&r);
-
-	ctx := &compress.Context{
-		input = stream,
+	ctx := &compress.Context_Memory_Input{
 		input_data = slice,
-		input_fully_in_memory = true,
 	};
 
 	/*
@@ -368,7 +362,7 @@ load_from_slice :: proc(slice: []u8, options := Options{}, allocator := context.
 		This way the stream reader could avoid the copy into the temp memory returned by it,
 		and instead return a slice into the original memory that's already owned by the caller.
 	*/
-	img, err = load_from_stream(ctx, options, allocator);
+	img, err = load_from_context(ctx, options, allocator);
 
 	return img, err;
 }
@@ -386,7 +380,7 @@ load_from_file :: proc(filename: string, options := Options{}, allocator := cont
 	}
 }
 
-load_from_stream :: proc(ctx: ^compress.Context, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
+load_from_context :: proc(ctx: ^$C, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
 	options := options;
 	if .info in options {
 		options |= {.return_metadata, .do_not_decompress_image};
@@ -1657,4 +1651,4 @@ defilter :: proc(img: ^Image, filter_bytes: ^bytes.Buffer, header: ^IHDR, option
 	return nil;
 }
 
-load :: proc{load_from_file, load_from_slice, load_from_stream};
+load :: proc{load_from_file, load_from_slice, load_from_context};

From 064516bf0b088ed0a3194a6134ba33600d8c2af3 Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sun, 27 Jun 2021 13:57:12 +0200
Subject: [PATCH 8/9] PNG: Inform `inflate` about expected output size for
 extra speed.

---
 core/image/png/png.odin | 56 +++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/core/image/png/png.odin b/core/image/png/png.odin
index 40f111895..e3a36c2fe 100644
--- a/core/image/png/png.odin
+++ b/core/image/png/png.odin
@@ -668,39 +668,41 @@ load_from_context :: proc(ctx: ^$C, options := Options{}, allocator := context.a
 		return img, E_PNG.IDAT_Missing;
 	}
 
+	/*
+		Calculate the expected output size, to help `inflate` make better decisions about the output buffer.
+		We'll also use it to check the returned buffer size is what we expected it to be.
+
+		Let's calcalate the expected size of the IDAT based on its dimensions, and whether or not it's interlaced.
+	*/
+	expected_size: int;
+
+	if header.interlace_method != .Adam7 {
+		expected_size = compute_buffer_size(int(header.width), int(header.height), int(img.channels), int(header.bit_depth), 1);
+	} else {
+		/*
+			Because Adam7 divides the image up into sub-images, and each scanline must start
+			with a filter byte, Adam7 interlaced images can have a larger raw size.
+		*/
+		for p := 0; p < 7; p += 1 {
+			x := (int(header.width)  - ADAM7_X_ORIG[p] + ADAM7_X_SPACING[p] - 1) / ADAM7_X_SPACING[p];
+			y := (int(header.height) - ADAM7_Y_ORIG[p] + ADAM7_Y_SPACING[p] - 1) / ADAM7_Y_SPACING[p];
+			if x > 0 && y > 0 {
+				expected_size += compute_buffer_size(int(x), int(y), int(img.channels), int(header.bit_depth), 1);
+			}
+		}
+	}
+
 	buf: bytes.Buffer;
-	zlib_error := zlib.inflate(idat, &buf);
+	zlib_error := zlib.inflate(idat, &buf, false, expected_size);
 	defer bytes.buffer_destroy(&buf);
 
 	if zlib_error != nil {
 		return {}, zlib_error;
-	} else {
-		/*
-			Let's calcalate the expected size of the IDAT based on its dimensions,
-			and whether or not it's interlaced
-		*/
-		expected_size: int;
-		buf_len := len(buf.buf);
+	}
 
-		if header.interlace_method != .Adam7 {
-			expected_size = compute_buffer_size(int(header.width), int(header.height), int(img.channels), int(header.bit_depth), 1);
-		} else {
-			/*
-				Because Adam7 divides the image up into sub-images, and each scanline must start
-				with a filter byte, Adam7 interlaced images can have a larger raw size.
-			*/
-			for p := 0; p < 7; p += 1 {
-				x := (int(header.width)  - ADAM7_X_ORIG[p] + ADAM7_X_SPACING[p] - 1) / ADAM7_X_SPACING[p];
-				y := (int(header.height) - ADAM7_Y_ORIG[p] + ADAM7_Y_SPACING[p] - 1) / ADAM7_Y_SPACING[p];
-				if x > 0 && y > 0 {
-					expected_size += compute_buffer_size(int(x), int(y), int(img.channels), int(header.bit_depth), 1);
-				}
-			}
-		}
-
-		if expected_size != buf_len {
-			return {}, E_PNG.IDAT_Corrupt;
-		}
+	buf_len := len(buf.buf);
+	if expected_size != buf_len {
+		return {}, E_PNG.IDAT_Corrupt;
 	}
 
 	/*

From d949d5a046297b3aa08da03bb65cc13c2d5528ac Mon Sep 17 00:00:00 2001
From: Jeroen van Rijn <Kelimion@users.noreply.github.com>
Date: Sun, 27 Jun 2021 16:48:12 +0200
Subject: [PATCH 9/9] ZLIB: cleanup.

---
 core/compress/common.odin       | 84 +++++++++++++++++++--------------
 core/compress/gzip/gzip.odin    | 23 ++++-----
 core/compress/zlib/example.odin |  3 --
 core/compress/zlib/zlib.odin    | 12 +++--
 core/image/png/example.odin     |  4 +-
 5 files changed, 67 insertions(+), 59 deletions(-)

diff --git a/core/compress/common.odin b/core/compress/common.odin
index a1ed32aaf..ca63168a9 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -132,17 +132,16 @@ Context_Memory_Input :: struct #packed {
 	output:            ^bytes.Buffer,
 	bytes_written:     i64,
 
-	code_buffer: u64,
-	num_bits:    u64,
+	code_buffer:       u64,
+	num_bits:          u64,
 
 	/*
 		If we know the data size, we can optimize the reads and writes.
 	*/
-	size_packed:   i64,
-	size_unpacked: i64,
-
-	padding: [1]u8,
+	size_packed:       i64,
+	size_unpacked:     i64,
 }
+#assert(size_of(Context_Memory_Input) == 64);
 
 Context_Stream_Input :: struct #packed {
 	input_data:        []u8,
@@ -150,14 +149,14 @@ Context_Stream_Input :: struct #packed {
 	output:            ^bytes.Buffer,
 	bytes_written:     i64,
 
-	code_buffer: u64,
-	num_bits:    u64,
+	code_buffer:       u64,
+	num_bits:          u64,
 
 	/*
 		If we know the data size, we can optimize the reads and writes.
 	*/
-	size_packed:   i64,
-	size_unpacked: i64,
+	size_packed:       i64,
+	size_unpacked:     i64,
 
 	/*
 		Flags:
@@ -170,11 +169,8 @@ Context_Stream_Input :: struct #packed {
 	padding: [1]u8,
 }
 
-// Stream helpers
 /*
-	TODO: These need to be optimized.
-
-	Streams should really only check if a certain method is available once, perhaps even during setup.
+	TODO: The stream versions should really only check if a certain method is available once, perhaps even during setup.
 
 	Bit and byte readers may be merged so that reading bytes will grab them from the bit buffer first.
 	This simplifies end-of-stream handling where bits may be left in the bit buffer.
@@ -256,6 +252,25 @@ read_u8_from_stream :: #force_inline proc(z: ^Context_Stream_Input) -> (res: u8,
 
 read_u8 :: proc{read_u8_from_memory, read_u8_from_stream};
 
+/*
+	You would typically only use this at the end of Inflate, to drain bits from the code buffer
+	preferentially.
+*/
+@(optimization_mode="speed")
+read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: io.Error) {
+	if z.num_bits >= 8 {
+		res = u8(read_bits_no_refill_lsb(z, 8));
+	} else {
+		size, _ := input_size(z);
+		if size > 0 {
+			res, err = read_u8(z);
+		} else {
+			err = .EOF;
+		}
+	}
+	return;
+}
+
 @(optimization_mode="speed")
 peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid) -> (res: T, err: io.Error) {
 	size :: size_of(T);
@@ -314,28 +329,27 @@ peek_back_byte :: #force_inline proc(z: ^$C, offset: i64) -> (res: u8, err: io.E
 
 // Generalized bit reader LSB
 @(optimization_mode="speed")
-refill_lsb_from_memory :: proc(z: ^Context_Memory_Input, width := i8(24)) {
+refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width := i8(48)) {
 	refill := u64(width);
+	b      := u64(0);
+
+	if z.num_bits > refill {
+		return;
+	}
 
 	for {
+		if len(z.input_data) != 0 {
+			b = u64(z.input_data[0]);
+			z.input_data = z.input_data[1:];
+		} else {
+			b = 0;
+		}
+
+		z.code_buffer |= b << u8(z.num_bits);
+		z.num_bits += 8;
 		if z.num_bits > refill {
 			break;
 		}
-		if z.code_buffer == 0 && z.num_bits > 63 {
-			z.num_bits = 0;
-		}
-		if z.code_buffer >= 1 << uint(z.num_bits) {
-			// Code buffer is malformed.
-			z.num_bits = max(u64);
-			return;
-		}
-		b, err := read_u8(z);
-		if err != .None {
-			// This is fine at the end of the file.
-			return;
-		}
-		z.code_buffer |= (u64(b) << u8(z.num_bits));
-		z.num_bits += 8;
 	}
 }
 
@@ -417,8 +431,8 @@ peek_bits_no_refill_lsb :: proc{peek_bits_no_refill_lsb_from_memory, peek_bits_n
 
 @(optimization_mode="speed")
 read_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
-	k := peek_bits_lsb(z, width);
-	consume_bits_lsb(z, width);
+	k := #force_inline peek_bits_lsb(z, width);
+	#force_inline consume_bits_lsb(z, width);
 	return k;
 }
 
@@ -433,8 +447,8 @@ read_bits_lsb :: proc{read_bits_lsb_from_memory, read_bits_lsb_from_stream};
 
 @(optimization_mode="speed")
 read_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
-	k := peek_bits_no_refill_lsb(z, width);
-	consume_bits_lsb(z, width);
+	k := #force_inline peek_bits_no_refill_lsb(z, width);
+	#force_inline consume_bits_lsb(z, width);
 	return k;
 }
 
@@ -451,7 +465,7 @@ read_bits_no_refill_lsb :: proc{read_bits_no_refill_lsb_from_memory, read_bits_n
 @(optimization_mode="speed")
 discard_to_next_byte_lsb_from_memory :: proc(z: ^Context_Memory_Input) {
 	discard := u8(z.num_bits & 7);
-	consume_bits_lsb(z, discard);
+	#force_inline consume_bits_lsb(z, discard);
 }
 
 
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index 31037da67..6a17627bc 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -21,8 +21,6 @@ import "core:io"
 import "core:bytes"
 import "core:hash"
 
-// import "core:fmt"
-
 Magic :: enum u16le {
 	GZIP = 0x8b << 8 | 0x1f,
 }
@@ -346,26 +344,23 @@ load_from_context :: proc(z: ^$C, buf: ^bytes.Buffer, known_gzip_size := -1, exp
 
 	payload_crc_b: [4]u8;
 	for _, i in payload_crc_b {
-		if z.num_bits >= 8 {
-			payload_crc_b[i] = u8(compress.read_bits_lsb(z, 8));
-		} else {
-			payload_crc_b[i], footer_error = compress.read_u8(z);
-		}
+		payload_crc_b[i], footer_error = compress.read_u8_prefer_code_buffer_lsb(z);
 	}
 	payload_crc := transmute(u32le)payload_crc_b;
-	payload_u32le, footer_error = compress.read_data(z, u32le);
 
 	payload := bytes.buffer_to_bytes(buf);
-
-	// fmt.printf("GZIP payload: %v\n", string(payload));
-
-	crc32 := u32le(hash.crc32(payload));
-
+	crc32   := u32le(hash.crc32(payload));
 	if crc32 != payload_crc {
 		return E_GZIP.Payload_CRC_Invalid;
 	}
 
-	if len(payload) != int(payload_u32le) {
+	payload_len_b: [4]u8;
+	for _, i in payload_len_b {
+		payload_len_b[i], footer_error = compress.read_u8_prefer_code_buffer_lsb(z);
+	}
+	payload_len := transmute(u32le)payload_len_b;
+
+	if len(payload) != int(payload_len) {
 		return E_GZIP.Payload_Length_Invalid;
 	}
 	return nil;
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
index cbc638b73..aab074fb4 100644
--- a/core/compress/zlib/example.odin
+++ b/core/compress/zlib/example.odin
@@ -13,7 +13,6 @@ package zlib
 
 import "core:bytes"
 import "core:fmt"
-import "core:compress"
 
 main :: proc() {
 
@@ -38,8 +37,6 @@ main :: proc() {
 	};
 	OUTPUT_SIZE :: 438;
 
-	fmt.printf("size_of(Context): %v\n", size_of(compress.Context_Memory_Input));
-
 	buf: bytes.Buffer;
 
 	// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index 363b7c001..c9439b285 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -15,7 +15,6 @@ import "core:mem"
 import "core:io"
 import "core:hash"
 import "core:bytes"
-// import "core:fmt"
 
 /*
 	zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
@@ -147,7 +146,6 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
 		Double until we reach the maximum allowed.
 	*/
 	new_size := min(len(buf) << 1, compress.COMPRESS_OUTPUT_ALLOCATE_MAX);
-
 	resize(buf, new_size);
 	if len(buf) != new_size {
 		/*
@@ -482,11 +480,16 @@ inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := f
 
 	if !raw {
 		compress.discard_to_next_byte_lsb(ctx);
-		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
+
+		adler_b: [4]u8;
+		for _, i in adler_b {
+			adler_b[i], _ = compress.read_u8_prefer_code_buffer_lsb(ctx);
+		}
+		adler := transmute(u32be)adler_b;
 
 		output_hash := hash.adler32(ctx.output.buf[:]);
 
-		if output_hash != u32(adler32) {
+		if output_hash != u32(adler) {
 			return E_General.Checksum_Failed;
 		}
 	}
@@ -684,7 +687,6 @@ inflate_raw :: proc(z: ^$C, expected_output_size := -1, allocator := context.all
 		}
 	}
 
-	// fmt.printf("ZLIB: Bytes written: %v\n", z.bytes_written);
 	if int(z.bytes_written) != len(z.output.buf) {
 		resize(&z.output.buf, int(z.bytes_written));
 	}
diff --git a/core/image/png/example.odin b/core/image/png/example.odin
index 8fca684ab..b84876ac8 100644
--- a/core/image/png/example.odin
+++ b/core/image/png/example.odin
@@ -41,7 +41,7 @@ main :: proc() {
 demo :: proc() {
 	file: string;
 
-	options := image.Options{.return_metadata};
+	options := image.Options{}; // {.return_metadata};
 	err:       compress.Error;
 	img:      ^image.Image;
 
@@ -56,9 +56,9 @@ demo :: proc() {
 		v: ^Info;
 
 		fmt.printf("Image: %vx%vx%v, %v-bit.\n", img.width, img.height, img.channels, img.depth);
-
 		if img.metadata_ptr != nil && img.metadata_type == Info {
 			v = (^Info)(img.metadata_ptr);
+
 			// Handle ancillary chunks as you wish.
 			// We provide helper functions for a few types.
 			for c in v.chunks {