Merge pull request #1035 from Kelimion/zlib_optimize

Zlib optimize
2026-02-15 23:54:07 +00:00 · 2021-06-23 20:15:32 +02:00
parent bb3ffdbdfb 342adb627d
commit ea0ce7bd2c
7 changed files with 296 additions and 188 deletions
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -72,34 +72,54 @@ Deflate_Error :: enum {
 	BType_3,
 }

-// General context for ZLIB, LZW, etc.
-Context :: struct {
-	code_buffer: u32,
-	num_bits: i8,
-	/*
-		num_bits will be set to -100 if the buffer is malformed
-	*/
-	eof: b8,

-	input:  io.Stream,
-	output: io.Stream,
-	bytes_written: i64,
+// General I/O context for ZLIB, LZW, etc.
+Context :: struct #packed {
+	input:             io.Stream,
+	input_data:        []u8,
+
+	output:            io.Stream,
+	output_buf:        [dynamic]u8,
+	bytes_written:     i64,
+
+	/*
+		If we know the data size, we can optimize the reads and writes.
+	*/    
+	size_packed:   i64,
+	size_unpacked: i64,
+
 	/*
 		Used to update hash as we write instead of all at once.
 	*/
-	rolling_hash: u32,
-
-	// Sliding window buffer. Size must be a power of two.
-	window_size: i64,
-	window_mask: i64,
-	last: ^[dynamic]byte,
-
+	rolling_hash:  u32,
 	/*
-		If we know the raw data size, we can optimize the reads.
+		Reserved
 	*/
-	uncompressed_size: i64,
-	input_data: []u8,
+	reserved:      [2]u32,
+	/*
+		Flags:
+			`input_fully_in_memory` tells us whether we're EOF when `input_data` is empty.
+			`input_refills_from_stream` tells us we can then possibly refill from the stream.
+	*/
+	input_fully_in_memory: b8,
+	input_refills_from_stream: b8,
+	reserved_flags: [2]b8,
 }
+#assert(size_of(Context) == 128);
+
+/*
+	Compression algorithm context
+*/
+Code_Buffer :: struct #packed {
+	code_buffer: u64,
+	num_bits:    u64,
+	/*
+		Sliding window buffer. Size must be a power of two.
+	*/
+	window_mask: i64,
+	last:        [dynamic]u8,
+}
+#assert(size_of(Code_Buffer) == 64);

 // Stream helpers
 /*
@@ -111,109 +131,160 @@ Context :: struct {
 	This simplifies end-of-stream handling where bits may be left in the bit buffer.
 */

-read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); }
-	b := make([]u8, size_of(T), context.temp_allocator);
-	r, e1 := io.to_reader(c.input);
-	_, e2 := io.read(r, b);
-	if !e1 || e2 != .None {
-		return T{}, e2;
+read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.Error) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Slice"); }
+
+	if len(z.input_data) >= size {
+		res = z.input_data[:size];
+		z.input_data = z.input_data[size:];
+		return res, .None;
 	}

-	res = (^T)(raw_data(b))^;
-	return res, .None;
+	if z.input_fully_in_memory {
+		if len(z.input_data) == 0 {
+			return []u8{}, .EOF;
+		} else {
+			return []u8{}, .Short_Buffer;
+		}
+	}
+
+	/*
+		TODO: Try to refill z.input_data from stream, using packed_data as a guide.
+	*/
+	b := make([]u8, size, context.temp_allocator);
+	_, e := z.input->impl_read(b[:]);
+	if e == .None {
+		return b, .None;
+	}
+
+	return []u8{}, e;
+}
+
+read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); }
+
+	b, e := read_slice(z, size_of(T));
+	if e == .None {
+		return (^T)(&b[0])^, .None;
+	}
+
+	return T{}, e;
 }

 read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read u8"); }
-	return read_data(z, u8);
+
+	b, e := read_slice(z, 1);
+	if e == .None {
+		return b[0], .None;
+	}
+
+	return 0, e;
 }

-peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Peek Data"); }
+
+	size :: size_of(T);
+
+	if len(z.input_data) >= size {
+		buf := z.input_data[:size];
+		return (^T)(&buf[0])^, .None;
+	}
+
+	if z.input_fully_in_memory {
+		if len(z.input_data) < size {
+			return T{}, .EOF;
+		} else {
+			return T{}, .Short_Buffer;
+		}
+	}
+
 	// Get current position to read from.
-	curr, e1 := c.input->impl_seek(0, .Current);
+	curr, e1 := z.input->impl_seek(0, .Current);
 	if e1 != .None {
 		return T{}, e1;
 	}
-	r, e2 := io.to_reader_at(c.input);
+	r, e2 := io.to_reader_at(z.input);
 	if !e2 {
 		return T{}, .Empty;
 	}
-	b := make([]u8, size_of(T), context.temp_allocator);
-	_, e3 := io.read_at(r, b, curr);
+	when size <= 128 {
+		b: [size]u8;
+	} else {
+		b := make([]u8, size, context.temp_allocator);
+	}
+	_, e3 := io.read_at(r, b[:], curr);
 	if e3 != .None {
 		return T{}, .Empty;
 	}

-	res = (^T)(raw_data(b))^;
+	res = (^T)(&b[0])^;
 	return res, .None;
 }

 // Sliding window read back
-peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) {
+peek_back_byte :: #force_inline proc(cb: ^Code_Buffer, offset: i64) -> (res: u8, err: io.Error) {
 	// Look back into the sliding window.
-	return c.last[offset % c.window_size], .None;
+	return cb.last[offset & cb.window_mask], .None;
 }

 // Generalized bit reader LSB
-refill_lsb :: proc(z: ^Context, width := i8(24)) {
+refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); }
 	for {
-		if z.num_bits > width {
+		if cb.num_bits > u64(width) {
 			break;
 		}
-		if z.code_buffer == 0 && z.num_bits == -1 {
-			z.num_bits = 0;
+		if cb.code_buffer == 0 && cb.num_bits > 63 {
+			cb.num_bits = 0;
 		}
-		if z.code_buffer >= 1 << uint(z.num_bits) {
+		if cb.code_buffer >= 1 << uint(cb.num_bits) {
 			// Code buffer is malformed.
-			z.num_bits = -100;
+			cb.num_bits = max(u64);
 			return;
 		}
-		c, err := read_u8(z);
+		b, err := read_u8(z);
 		if err != .None {
 			// This is fine at the end of the file.
-			z.num_bits = -42;
-			z.eof = true;
 			return;
 		}
-		z.code_buffer |= (u32(c) << u8(z.num_bits));
-		z.num_bits += 8;
+		cb.code_buffer |= (u64(b) << u8(cb.num_bits));
+		cb.num_bits += 8;
 	}
 }

-consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
-	z.code_buffer >>= width;
-	z.num_bits -= i8(width);
+consume_bits_lsb :: #force_inline proc(cb: ^Code_Buffer, width: u8) {
+	cb.code_buffer >>= width;
+	cb.num_bits -= u64(width);
 }

-peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
-	if z.num_bits < i8(width) {
-		refill_lsb(z);
+peek_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
+	if cb.num_bits < u64(width) {
+		refill_lsb(z, cb);
 	}
 	// assert(z.num_bits >= i8(width));
-	return z.code_buffer & ~(~u32(0) << width);
+	return u32(cb.code_buffer & ~(~u64(0) << width));
 }

-peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
-	assert(z.num_bits >= i8(width));
-	return z.code_buffer & ~(~u32(0) << width);
+peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
+	assert(cb.num_bits >= u64(width));
+	return u32(cb.code_buffer & ~(~u64(0) << width));
 }

-read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
-	k := peek_bits_lsb(z, width);
-	consume_bits_lsb(z, width);
+read_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
+	k := peek_bits_lsb(z, cb, width);
+	consume_bits_lsb(cb, width);
 	return k;
 }

-read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
-	k := peek_bits_no_refill_lsb(z, width);
-	consume_bits_lsb(z, width);
+read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, cb, width);
+	consume_bits_lsb(cb, width);
 	return k;
 }

-discard_to_next_byte_lsb :: proc(z: ^Context) {
-	discard := u8(z.num_bits & 7);
-	consume_bits_lsb(z, discard);
+discard_to_next_byte_lsb :: proc(cb: ^Code_Buffer) {
+	discard := u8(cb.num_bits & 7);
+	consume_bits_lsb(cb, discard);
 }
--- a/core/compress/gzip/example.odin
+++ b/core/compress/gzip/example.odin
@@ -12,9 +12,10 @@ package gzip
 	A small GZIP implementation as an example.
 */

-import "core:compress/gzip"
 import "core:bytes"
 import "core:os"
+import "core:compress"
+import "core:fmt"

 // Small GZIP file with fextra, fname and fcomment present.
@private
@@ -31,7 +32,7 @@ TEST: []u8 = {

 main :: proc() {
 	// Set up output buffer.
-	buf: bytes.Buffer;
+	buf := bytes.Buffer{};

 	stdout :: proc(s: string) {
 		os.write_string(os.stdout, s);
@@ -44,26 +45,32 @@ main :: proc() {

 	if len(args) < 2 {
 		stderr("No input file specified.\n");
-		err := gzip.load(TEST, &buf);
-		if err != nil {
+		err := load(TEST, &buf);
+		if err == nil {
 			stdout("Displaying test vector: ");
 			stdout(bytes.buffer_to_string(&buf));
 			stdout("\n");
+		} else {
+			fmt.printf("gzip.load returned %v\n", err);
 		}
 		bytes.buffer_destroy(&buf);
+		os.exit(0);
 	}

 	// The rest are all files.
 	args = args[1:];
-	err: gzip.Error;
+	err: Error;

 	for file in args {
 		if file == "-" {
 			// Read from stdin
 			s := os.stream_from_handle(os.stdin);
-			err = gzip.load(s, &buf);
+			ctx := &compress.Context{
+				input = s,
+			};
+			err = load(ctx, &buf);
 		} else {
-			err = gzip.load(file, &buf);
+			err = load(file, &buf);
 		}
 		if err != nil {
 			if err != E_General.File_Not_Found {
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -21,11 +21,6 @@ import "core:io"
 import "core:bytes"
 import "core:hash"

-/*
-
-
-*/
-
 Magic :: enum u16le {
 	GZIP = 0x8b << 8 | 0x1f,
 }
@@ -110,7 +105,13 @@ load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.al
 	bytes.reader_init(&r, slice);
 	stream := bytes.reader_to_stream(&r);

-	err = load_from_stream(stream, buf, allocator);
+	ctx := &compress.Context{
+		input  = stream,
+		input_data = slice,
+		input_fully_in_memory = true,
+		input_refills_from_stream = true,
+	};
+	err = load_from_stream(ctx, buf, allocator);

 	return err;
 }
@@ -126,15 +127,14 @@ load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := contex
 	return;
 }

-load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
-	ctx := compress.Context{
-		input  = stream,
-	};
+load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
 	buf := buf;
 	ws := bytes.buffer_to_stream(buf);
 	ctx.output = ws;

-	header, e := compress.read_data(&ctx, Header);
+	b: []u8;
+
+	header, e := compress.read_data(ctx, Header);
 	if e != .None {
 		return E_General.File_Too_Short;
 	}
@@ -162,7 +162,7 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
 	// printf("os: %v\n", OS_Name[header.os]);

 	if .extra in header.flags {
-		xlen, e_extra := compress.read_data(&ctx, u16le);
+		xlen, e_extra := compress.read_data(ctx, u16le);
 		if e_extra != .None {
 			return E_General.Stream_Too_Short;
 		}
@@ -178,14 +178,14 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con

 		for xlen >= 4 {
 			// println("Parsing Extra field(s).");
-			field_id, field_error = compress.read_data(&ctx, [2]u8);
+			field_id, field_error = compress.read_data(ctx, [2]u8);
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
 			}
 			xlen -= 2;

-			field_length, field_error = compress.read_data(&ctx, u16le);
+			field_length, field_error = compress.read_data(ctx, u16le);
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
@@ -200,8 +200,7 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con

 			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
 			if field_length > 0 {
-				field_data := make([]u8, field_length, context.temp_allocator);
-				_, field_error = ctx.input->impl_read(field_data);
+				b, field_error = compress.read_slice(ctx, int(field_length));
 				if field_error != .None {
 					// printf("Parsing Extra returned: %v\n", field_error);
 					return E_General.Stream_Too_Short;
@@ -220,16 +219,15 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
 	if .name in header.flags {
 		// Should be enough.
 		name: [1024]u8;
-		b: [1]u8;
 		i := 0;
 		name_error: io.Error;

 		for i < len(name) {
-			_, name_error = ctx.input->impl_read(b[:]);
+			b, name_error = compress.read_slice(ctx, 1);
 			if name_error != .None {
 				return E_General.Stream_Too_Short;
 			}
-			if b == 0 {
+			if b[0] == 0 {
 				break;
 			}
 			name[i] = b[0];
@@ -244,16 +242,15 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
 	if .comment in header.flags {
 		// Should be enough.
 		comment: [1024]u8;
-		b: [1]u8;
 		i := 0;
 		comment_error: io.Error;

 		for i < len(comment) {
-			_, comment_error = ctx.input->impl_read(b[:]);
+			b, comment_error = compress.read_slice(ctx, 1);
 			if comment_error != .None {
 				return E_General.Stream_Too_Short;
 			}
-			if b == 0 {
+			if b[0] == 0 {
 				break;
 			}
 			comment[i] = b[0];
@@ -266,9 +263,8 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
 	}

 	if .header_crc in header.flags {
-		crc16: [2]u8;
 		crc_error: io.Error;
-		_, crc_error = ctx.input->impl_read(crc16[:]);
+		_, crc_error = compress.read_slice(ctx, 2);
 		if crc_error != .None {
 			return E_General.Stream_Too_Short;
 		}
@@ -281,30 +277,32 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
 	/*
 		We should have arrived at the ZLIB payload.
 	*/
+	code_buffer := compress.Code_Buffer{};
+	cb := &code_buffer;

-	zlib_error := zlib.inflate_raw(&ctx);
-
-	// fmt.printf("ZLIB returned: %v\n", zlib_error);
-
+	zlib_error := zlib.inflate_raw(ctx, &code_buffer);
 	if zlib_error != nil {
 		return zlib_error;
 	}
-
 	/*
 		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
 	*/
-	compress.discard_to_next_byte_lsb(&ctx);
+	compress.discard_to_next_byte_lsb(cb);
+
+	footer_error: io.Error;

 	payload_crc_b: [4]u8;
-	payload_len_b: [4]u8;
 	for _, i in payload_crc_b {
-		payload_crc_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
+		if cb.num_bits >= 8 {
+			payload_crc_b[i] = u8(compress.read_bits_lsb(ctx, cb, 8));
+		} else {
+			payload_crc_b[i], footer_error = compress.read_u8(ctx);
+		}
 	}
 	payload_crc := transmute(u32le)payload_crc_b;
-	for _, i in payload_len_b {
-		payload_len_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
-	}
-	payload_len := int(transmute(u32le)payload_len_b);
+
+	payload_len: u32le;
+	payload_len, footer_error = compress.read_data(ctx, u32le);

 	payload := bytes.buffer_to_bytes(buf);
 	crc32 := u32le(hash.crc32(payload));
@@ -313,7 +311,7 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
 		return E_GZIP.Payload_CRC_Invalid;
 	}

-	if len(payload) != payload_len {
+	if len(payload) != int(payload_len) {
 		return E_GZIP.Payload_Length_Invalid;
 	}
 	return nil;
--- a/core/compress/zlib/example.odin
+++ b/core/compress/zlib/example.odin
@@ -11,7 +11,6 @@ package zlib
 	An example of how to use `zlib.inflate`.
 */

-import "core:compress/zlib"
 import "core:bytes"
 import "core:fmt"

@@ -40,7 +39,7 @@ main :: proc() {
 	buf: bytes.Buffer;

 	// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
-	err := zlib.inflate(ODIN_DEMO, &buf);
+	err := inflate(ODIN_DEMO, &buf);
 	defer bytes.buffer_destroy(&buf);

 	if err != nil {
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -23,7 +23,8 @@ import "core:hash"
 	Returns: Error.
 */

-Context :: compress.Context;
+Context     :: compress.Context;
+Code_Buffer :: compress.Code_Buffer;

 Compression_Method :: enum u8 {
 	DEFLATE  = 8,
@@ -129,7 +130,7 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
 	return;
 }

-write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
+write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: io.Error) #no_bounds_check {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); }
 	c := c;
 	buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
@@ -139,13 +140,13 @@ write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_boun
 	if e != .None {
 		return e;
 	}
-	z.last[z.bytes_written & z.window_mask] = c;
+	cb.last[z.bytes_written & cb.window_mask] = c;

 	z.bytes_written += 1;
 	return .None;
 }

-repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) {
+repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); }
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
@@ -155,7 +156,7 @@ repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) {
 	buf := make([]u8, count, context.temp_allocator);
 	#no_bounds_check for i in 0..<count {
 		buf[i] = c;
-		z.last[z.bytes_written & z.window_mask] = c;
+		cb.last[z.bytes_written & cb.window_mask] = c;
 		z.bytes_written += 1;
 	}
 	z.rolling_hash = hash.adler32(buf, z.rolling_hash);
@@ -167,7 +168,7 @@ repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) {
 	return .None;
 }

-repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
+repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) -> (err: io.Error) {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Bytes"); }
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
@@ -178,9 +179,9 @@ repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {

 	offset := z.bytes_written - i64(distance);
 	#no_bounds_check for i in 0..<count {
-		c := z.last[offset & z.window_mask];
+		c := cb.last[offset & cb.window_mask];

-		z.last[z.bytes_written & z.window_mask] = c;
+		cb.last[z.bytes_written & cb.window_mask] = c;
 		buf[i] = c;
 		z.bytes_written += 1; offset += 1;
 	}
@@ -257,9 +258,9 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 	return nil;
 }

-decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman Slow"); }
-	code := u16(compress.peek_bits_lsb(z, 16));
+	code := u16(compress.peek_bits_lsb(z, cb, 16));

 	k := int(z_bit_reverse(code, 16));
 	s: u8;
@@ -282,41 +283,41 @@ decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err:
 		return 0, E_Deflate.Bad_Huffman_Code;
 	}

-	compress.consume_bits_lsb(z, s);
+	compress.consume_bits_lsb(cb, s);

 	r = t.value[b];
 	return r, nil;
 }

-decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman"); }
-	if z.num_bits < 16 {
-		if z.num_bits == -100 {
+	if cb.num_bits < 16 {
+		if cb.num_bits > 63 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
 		}
-		compress.refill_lsb(z);
-		if z.eof {
+		compress.refill_lsb(z, cb);
+		if cb.num_bits > 63 {
 			return 0, E_General.Stream_Too_Short;
 		}
 	}
-	#no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK];
+	#no_bounds_check b := t.fast[cb.code_buffer & ZFAST_MASK];
 	if b != 0 {
 		s := u8(b >> ZFAST_BITS);
-		compress.consume_bits_lsb(z, s);
+		compress.consume_bits_lsb(cb, s);
 		return b & 511, nil;
 	}
-	return decode_huffman_slowpath(z, t);
+	return decode_huffman_slowpath(z, cb, t);
 }

-parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Parse Huffman Block"); }
 	#no_bounds_check for {
-		value, e := decode_huffman(z, z_repeat);
+		value, e := decode_huffman(z, cb, z_repeat);
 		if e != nil {
 			return err;
 		}
 		if value < 256 {
-			e := write_byte(z, u8(value));
+			e := write_byte(z, cb, u8(value));
 			if e != .None {
 				return E_General.Output_Too_Short;
 			}
@@ -329,17 +330,17 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
 			value -= 257;
 			length := Z_LENGTH_BASE[value];
 			if Z_LENGTH_EXTRA[value] > 0 {
-				length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value]));
+				length += u16(compress.read_bits_lsb(z, cb, Z_LENGTH_EXTRA[value]));
 			}

-			value, e = decode_huffman(z, z_offset);
+			value, e = decode_huffman(z, cb, z_offset);
 			if e != nil {
 				return E_Deflate.Bad_Huffman_Code;
 			}

 			distance := Z_DIST_BASE[value];
 			if Z_DIST_EXTRA[value] > 0 {
-				distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value]));
+				distance += u16(compress.read_bits_lsb(z, cb, Z_DIST_EXTRA[value]));
 			}

 			if z.bytes_written < i64(distance) {
@@ -360,15 +361,15 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
 					Replicate the last outputted byte, length times.
 				*/
 				if length > 0 {
-					c := z.last[offset & z.window_mask];
-					e := repl_byte(z, length, c);
+					c := cb.last[offset & cb.window_mask];
+					e := repl_byte(z, cb, length, c);
 					if e != .None {
 						return E_General.Output_Too_Short;
 					}
 				}
 			} else {
 				if length > 0 {
-					e := repl_bytes(z, length, distance);
+					e := repl_bytes(z, cb, length, distance);
 					if e != .None {
 						return E_General.Output_Too_Short;
 					}
@@ -391,6 +392,9 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 		DEFLATE stream.
 	*/

+	code_buffer := Code_Buffer{};
+	cb := &code_buffer;
+
 	if !raw {
 		data_size := io.size(ctx.input);
 		if data_size < 6 {
@@ -408,7 +412,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 		if cinfo > 7 {
 			return E_ZLIB.Unsupported_Window_Size;
 		}
-		ctx.window_size = 1 << (cinfo + 8);
+		cb.window_mask = i64((1 << (cinfo + 8) - 1));

 		flg, _ := compress.read_u8(ctx);

@@ -439,15 +443,15 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 	}

 	// Parse ZLIB stream without header.
-	err = inflate_raw(ctx);
+	err = inflate_raw(ctx, cb);
 	if err != nil {
 		return err;
 	}

 	if !raw {
-		compress.discard_to_next_byte_lsb(ctx);
+		compress.discard_to_next_byte_lsb(cb);

-		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
+		adler32 := compress.read_bits_lsb(ctx, cb, 8) << 24 | compress.read_bits_lsb(ctx, cb, 8) << 16 | compress.read_bits_lsb(ctx, cb, 8) << 8 | compress.read_bits_lsb(ctx, cb, 8);
 		if ctx.rolling_hash != u32(adler32) {
 			return E_General.Checksum_Failed;
 		}
@@ -456,13 +460,13 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 }

 // @(optimization_mode="speed")
-inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
 	final := u32(0);
 	type := u32(0);

-	z.num_bits = 0;
-	z.code_buffer = 0;
+	cb.num_bits = 0;
+	cb.code_buffer = 0;

 	z_repeat:      ^Huffman_Table;
 	z_offset:      ^Huffman_Table;
@@ -484,19 +488,17 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 	defer free(z_offset);
 	defer free(codelength_ht);

-	if z.window_size == 0 {
-		z.window_size = DEFLATE_MAX_DISTANCE;
+	if cb.window_mask == 0 {
+		cb.window_mask = DEFLATE_MAX_DISTANCE - 1;
 	}
-	z.window_mask = z.window_size - 1;

 	// Allocate rolling window buffer.
-	last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator);
-	z.last = &last_b;
-	defer delete(last_b);
+	cb.last = mem.make_dynamic_array_len_cap([dynamic]u8, cb.window_mask + 1, cb.window_mask + 1, allocator);
+	defer delete(cb.last);

 	for {
-		final = compress.read_bits_lsb(z, 1);
-		type  = compress.read_bits_lsb(z, 2);
+		final = compress.read_bits_lsb(z, cb, 1);
+		type  = compress.read_bits_lsb(z, cb, 2);

 		// fmt.printf("Final: %v | Type: %v\n", final, type);

@@ -506,10 +508,10 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 			// Uncompressed block

 			// Discard bits until next byte boundary
-			compress.discard_to_next_byte_lsb(z);
+			compress.discard_to_next_byte_lsb(cb);

-			uncompressed_len  := i16(compress.read_bits_lsb(z, 16));
-			length_check      := i16(compress.read_bits_lsb(z, 16));
+			uncompressed_len  := i16(compress.read_bits_lsb(z, cb, 16));
+			length_check      := i16(compress.read_bits_lsb(z, cb, 16));

 			// fmt.printf("LEN: %v, ~LEN: %v, NLEN: %v, ~NLEN: %v\n", uncompressed_len, ~uncompressed_len, length_check, ~length_check);

@@ -523,9 +525,9 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 				and a single Adler32 update after.
 			*/
 			#no_bounds_check for uncompressed_len > 0 {
-				compress.refill_lsb(z);
-				lit := compress.read_bits_lsb(z, 8);
-				write_byte(z, u8(lit));
+				compress.refill_lsb(z, cb);
+				lit := compress.read_bits_lsb(z, cb, 8);
+				write_byte(z, cb, u8(lit));
 				uncompressed_len -= 1;
 			}
 		case 3:
@@ -550,14 +552,14 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 				//i: u32;
 				n: u32;

-				compress.refill_lsb(z, 14);
-				hlit  := compress.read_bits_no_refill_lsb(z, 5) + 257;
-				hdist := compress.read_bits_no_refill_lsb(z, 5) + 1;
-				hclen := compress.read_bits_no_refill_lsb(z, 4) + 4;
+				compress.refill_lsb(z, cb, 14);
+				hlit  := compress.read_bits_no_refill_lsb(z, cb, 5) + 257;
+				hdist := compress.read_bits_no_refill_lsb(z, cb, 5) + 1;
+				hclen := compress.read_bits_no_refill_lsb(z, cb, 4) + 4;
 				ntot  := hlit + hdist;

 				#no_bounds_check for i in 0..<hclen {
-					s := compress.read_bits_lsb(z, 3);
+					s := compress.read_bits_lsb(z, cb, 3);
 					codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
 				}
 				err = build_huffman(codelength_ht, codelength_sizes[:]);
@@ -569,7 +571,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 				c: u16;

 				for n < ntot {
-					c, err = decode_huffman(z, codelength_ht);
+					c, err = decode_huffman(z, cb, codelength_ht);
 					if err != nil {
 						return err;
 					}
@@ -582,18 +584,18 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 						n += 1;
 					} else {
 						fill := u8(0);
-						compress.refill_lsb(z, 7);
+						compress.refill_lsb(z, cb, 7);
 						switch c {
 						case 16:
-							c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, cb, 2) + 3);
 							if n == 0 {
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 							}
 							fill = lencodes[n - 1];
 						case 17:
-							c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, cb, 3) + 3);
 						case 18:
-							c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
+							c = u16(compress.read_bits_no_refill_lsb(z, cb, 7) + 11);
 						case:
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 						}
@@ -623,7 +625,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
 					return err;
 				}
 			}
-			err = parse_huffman_block(z, z_repeat, z_offset);
+			err = parse_huffman_block(z, cb, z_repeat, z_offset);
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			if err != nil {
 				return err;
@@ -643,6 +645,8 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -
 	bytes.reader_init(&r, input);
 	rs := bytes.reader_to_stream(&r);
 	ctx.input = rs;
+	ctx.input_data = input;
+	ctx.input_fully_in_memory = true;

 	buf := buf;
 	ws := bytes.buffer_to_stream(buf);
@@ -653,8 +657,21 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -
 	return err;
 }

-inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
-	return inflate_from_byte_array(input, buf, true);
+inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false) -> (err: Error) {
+	ctx := Context{};
+
+	r := bytes.Reader{};
+	bytes.reader_init(&r, input);
+	rs := bytes.reader_to_stream(&r);
+	ctx.input = rs;
+	ctx.input_data = input;
+	ctx.input_fully_in_memory = true;
+
+	buf := buf;
+	ws := bytes.buffer_to_stream(buf);
+	ctx.output = ws;
+
+	return inflate_from_stream_raw(&ctx, cb);
 }

 inflate     :: proc{inflate_from_stream, inflate_from_byte_array};
--- a/core/image/png/example.odin
+++ b/core/image/png/example.odin
@@ -23,6 +23,22 @@ import "core:mem"
 import "core:os"

 main :: proc() {
+	track := mem.Tracking_Allocator{};
+	mem.tracking_allocator_init(&track, context.allocator);
+
+	context.allocator = mem.tracking_allocator(&track);
+
+	demo();
+
+	if len(track.allocation_map) > 0 {
+		fmt.println("Leaks:");
+		for _, v in track.allocation_map {
+			fmt.printf("\t%v\n\n", v);
+		}
+	}
+}
+
+demo :: proc() {
 	file: string;

 	options := image.Options{.return_metadata};
--- a/core/image/png/png.odin
+++ b/core/image/png/png.odin
@@ -252,17 +252,15 @@ read_chunk :: proc(ctx: ^compress.Context) -> (chunk: Chunk, err: Error) {
 	}
 	chunk.header = ch;

-	data := make([]u8, ch.length, context.temp_allocator);
-	_, e2 := ctx.input->impl_read(data);
-	if e2 != .None {
+	chunk.data, e = compress.read_slice(ctx, int(ch.length));
+	if e != .None {
 		return {}, E_General.Stream_Too_Short;
 	}
-	chunk.data = data;

 	// Compute CRC over chunk type + data
 	type := (^[4]byte)(&ch.type)^;
 	computed_crc := hash.crc32(type[:]);
-	computed_crc =  hash.crc32(data, computed_crc);
+	computed_crc =  hash.crc32(chunk.data, computed_crc);

 	crc, e3 := compress.read_data(ctx, u32be);
 	if e3 != .None {
@@ -359,12 +357,18 @@ load_from_slice :: proc(slice: []u8, options := Options{}, allocator := context.
 	bytes.reader_init(&r, slice);
 	stream := bytes.reader_to_stream(&r);

+	ctx := &compress.Context{
+		input = stream,
+		input_data = slice,
+		input_fully_in_memory = true,
+	};
+
 	/*
 		TODO: Add a flag to tell the PNG loader that the stream is backed by a slice.
 		This way the stream reader could avoid the copy into the temp memory returned by it,
 		and instead return a slice into the original memory that's already owned by the caller.
 	*/
-	img, err = load_from_stream(stream, options, allocator);
+	img, err = load_from_stream(ctx, options, allocator);

 	return img, err;
 }
@@ -382,7 +386,7 @@ load_from_file :: proc(filename: string, options := Options{}, allocator := cont
 	}
 }

-load_from_stream :: proc(stream: io.Stream, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
+load_from_stream :: proc(ctx: ^compress.Context, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
 	options := options;
 	if .info in options {
 		options |= {.return_metadata, .do_not_decompress_image};
@@ -405,10 +409,6 @@ load_from_stream :: proc(stream: io.Stream, options := Options{}, allocator := c
 	img.metadata_ptr  = info;
 	img.metadata_type = typeid_of(Info);

-	ctx := &compress.Context{
-		input = stream,
-	};
-
 	signature, io_error := compress.read_data(ctx, Signature);
 	if io_error != .None || signature != .PNG {
 		return img, E_PNG.Invalid_PNG_Signature;