ZLIB: Split up input from stream and memory into own code paths.

2026-06-02 16:57:57 +00:00 · 2021-06-27 13:19:24 +02:00
parent 4689a6b341
commit 02f9668185
5 changed files with 475 additions and 74 deletions
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -127,7 +127,24 @@ Deflate_Error :: enum {


 // General I/O context for ZLIB, LZW, etc.
-Context :: struct #packed {
+Context_Memory_Input :: struct #packed {
+	input_data:        []u8,
+	output:            ^bytes.Buffer,
+	bytes_written:     i64,
+
+	code_buffer: u64,
+	num_bits:    u64,
+
+	/*
+		If we know the data size, we can optimize the reads and writes.
+	*/
+	size_packed:   i64,
+	size_unpacked: i64,
+
+	padding: [1]u8,
+}
+
+Context_Stream_Input :: struct #packed {
 	input_data:        []u8,
 	input:             io.Stream,
 	output:            ^bytes.Buffer,
@@ -153,7 +170,6 @@ Context :: struct #packed {
 	padding: [1]u8,
 }

-
 // Stream helpers
 /*
 	TODO: These need to be optimized.
@@ -167,7 +183,7 @@ Context :: struct #packed {
 // TODO: Make these return compress.Error errors.

@(optimization_mode="speed")
-read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.Error) {
+read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int) -> (res: []u8, err: io.Error) {
 	#no_bounds_check {
 		if len(z.input_data) >= size {
 			res = z.input_data[:size];
@@ -176,17 +192,15 @@ read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.
 		}
 	}

-	if z.input_fully_in_memory {
-		if len(z.input_data) == 0 {
-			return []u8{}, .EOF;
-		} else {
-			return []u8{}, .Short_Buffer;
-		}
+	if len(z.input_data) == 0 {
+		return []u8{}, .EOF;
+	} else {
+		return []u8{}, .Short_Buffer;
 	}
+}

-	/*
-		TODO: Try to refill z.input_data from stream, using packed_data as a guide.
-	*/
+@(optimization_mode="speed")
+read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int) -> (res: []u8, err: io.Error) {
 	b := make([]u8, size, context.temp_allocator);
 	_, e := z.input->impl_read(b[:]);
 	if e == .None {
@@ -196,8 +210,10 @@ read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.
 	return []u8{}, e;
 }

+read_slice :: proc{read_slice_from_memory, read_slice_from_stream};
+
@(optimization_mode="speed")
-read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+read_data :: #force_inline proc(z: ^$C, $T: typeid) -> (res: T, err: io.Error) {
 	b, e := read_slice(z, size_of(T));
 	if e == .None {
 		return (^T)(&b[0])^, .None;
@@ -207,7 +223,7 @@ read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 }

@(optimization_mode="speed")
-read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
+read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8, err: io.Error) {
 	#no_bounds_check {
 		if len(z.input_data) >= 1 {
 			res = z.input_data[0];
@@ -215,8 +231,12 @@ read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 			return res, .None;
 		}
 	}
+	return 0, .EOF;
+}

-	b, e := read_slice(z, 1);
+@(optimization_mode="speed")
+read_u8_from_stream :: #force_inline proc(z: ^Context_Stream_Input) -> (res: u8, err: io.Error) {
+	b, e := read_slice_from_stream(z, 1);
 	if e == .None {
 		return b[0], .None;
 	}
@@ -224,8 +244,10 @@ read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 	return 0, e;
 }

+read_u8 :: proc{read_u8_from_memory, read_u8_from_stream};
+
@(optimization_mode="speed")
-peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid) -> (res: T, err: io.Error) {
 	size :: size_of(T);

 	#no_bounds_check {
@@ -242,6 +264,11 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 			return T{}, .Short_Buffer;
 		}
 	}
+}
+
+@(optimization_mode="speed")
+peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid) -> (res: T, err: io.Error) {
+	size :: size_of(T);

 	// Get current position to read from.
 	curr, e1 := z.input->impl_seek(0, .Current);
@@ -266,16 +293,20 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 	return res, .None;
 }

+peek_data :: proc{peek_data_from_memory, peek_data_from_stream};
+
+
+
 // Sliding window read back
@(optimization_mode="speed")
-peek_back_byte :: #force_inline proc(z: ^Context, offset: i64) -> (res: u8, err: io.Error) {
+peek_back_byte :: #force_inline proc(z: ^$C, offset: i64) -> (res: u8, err: io.Error) {
 	// Look back into the sliding window.
 	return z.output.buf[z.bytes_written - offset], .None;
 }

 // Generalized bit reader LSB
@(optimization_mode="speed")
-refill_lsb :: proc(z: ^Context, width := i8(24)) {
+refill_lsb_from_memory :: proc(z: ^Context_Memory_Input, width := i8(24)) {
 	refill := u64(width);

 	for {
@@ -300,43 +331,126 @@ refill_lsb :: proc(z: ^Context, width := i8(24)) {
 	}
 }

+// Generalized bit reader LSB
@(optimization_mode="speed")
-consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
+refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
+	refill := u64(width);
+
+	for {
+		if z.num_bits > refill {
+			break;
+		}
+		if z.code_buffer == 0 && z.num_bits > 63 {
+			z.num_bits = 0;
+		}
+		if z.code_buffer >= 1 << uint(z.num_bits) {
+			// Code buffer is malformed.
+			z.num_bits = max(u64);
+			return;
+		}
+		b, err := read_u8(z);
+		if err != .None {
+			// This is fine at the end of the file.
+			return;
+		}
+		z.code_buffer |= (u64(b) << u8(z.num_bits));
+		z.num_bits += 8;
+	}
+}
+
+refill_lsb :: proc{refill_lsb_from_memory, refill_lsb_from_stream};
+
+
+@(optimization_mode="speed")
+consume_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) {
 	z.code_buffer >>= width;
 	z.num_bits -= u64(width);
 }

@(optimization_mode="speed")
-peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= u64(width);
+}
+
+consume_bits_lsb :: proc{consume_bits_lsb_from_memory, consume_bits_lsb_from_stream};
+
+@(optimization_mode="speed")
+peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
 	if z.num_bits < u64(width) {
 		refill_lsb(z);
 	}
-	// assert(z.num_bits >= i8(width));
 	return u32(z.code_buffer & ~(~u64(0) << width));
 }

@(optimization_mode="speed")
-peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	if z.num_bits < u64(width) {
+		refill_lsb(z);
+	}
+	return u32(z.code_buffer & ~(~u64(0) << width));
+}
+
+peek_bits_lsb :: proc{peek_bits_lsb_from_memory, peek_bits_lsb_from_stream};
+
+@(optimization_mode="speed")
+peek_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
 	assert(z.num_bits >= u64(width));
 	return u32(z.code_buffer & ~(~u64(0) << width));
 }

@(optimization_mode="speed")
-read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	assert(z.num_bits >= u64(width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
+}
+
+peek_bits_no_refill_lsb :: proc{peek_bits_no_refill_lsb_from_memory, peek_bits_no_refill_lsb_from_stream};
+
+@(optimization_mode="speed")
+read_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
 	k := peek_bits_lsb(z, width);
 	consume_bits_lsb(z, width);
 	return k;
 }

@(optimization_mode="speed")
-read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	k := peek_bits_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
+}
+
+read_bits_lsb :: proc{read_bits_lsb_from_memory, read_bits_lsb_from_stream};
+
+@(optimization_mode="speed")
+read_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
 	k := peek_bits_no_refill_lsb(z, width);
 	consume_bits_lsb(z, width);
 	return k;
 }

@(optimization_mode="speed")
-discard_to_next_byte_lsb :: proc(z: ^Context) {
+read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
+}
+
+read_bits_no_refill_lsb :: proc{read_bits_no_refill_lsb_from_memory, read_bits_no_refill_lsb_from_stream};
+
+
+@(optimization_mode="speed")
+discard_to_next_byte_lsb_from_memory :: proc(z: ^Context_Memory_Input) {
 	discard := u8(z.num_bits & 7);
 	consume_bits_lsb(z, discard);
 }
+
+
+@(optimization_mode="speed")
+discard_to_next_byte_lsb_from_stream :: proc(z: ^Context_Stream_Input) {
+	discard := u8(z.num_bits & 7);
+	consume_bits_lsb(z, discard);
+}
+
+discard_to_next_byte_lsb :: proc{discard_to_next_byte_lsb_from_memory, discard_to_next_byte_lsb_from_stream};
--- a/core/compress/gzip/example.odin
+++ b/core/compress/gzip/example.odin
@@ -65,7 +65,7 @@ main :: proc() {
 		if file == "-" {
 			// Read from stdin
 			s := os.stream_from_handle(os.stdin);
-			ctx := &compress.Context{
+			ctx := &compress.Context_Stream_Input{
 				input = s,
 			};
 			err = load(ctx, &buf);
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -103,23 +103,7 @@ E_Deflate :: compress.Deflate_Error;

 GZIP_MAX_PAYLOAD_SIZE :: int(max(u32le));

-load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
-
-	r := bytes.Reader{};
-	bytes.reader_init(&r, slice);
-	stream := bytes.reader_to_stream(&r);
-
-	ctx := &compress.Context{
-		input = stream,
-		input_data = slice,
-		input_fully_in_memory = true,
-		output = buf,
-	};
-
-	err = load_from_stream(ctx, buf, known_gzip_size, expected_output_size, allocator);
-
-	return err;
-}
+load :: proc{load_from_slice, load_from_stream, load_from_file};

 load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	data, ok := os.read_entire_file(filename, allocator);
@@ -132,7 +116,255 @@ load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_siz
 	return;
 }

-load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+	buf := buf;
+
+	z := &compress.Context_Memory_Input{
+		input_data = slice,
+		output = buf,
+	};
+
+	expected_output_size := expected_output_size;
+	input_data_consumed := 0;
+	z.output = buf;
+
+	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
+		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
+	}
+
+	if expected_output_size > compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
+		return E_GZIP.Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX;
+	}
+
+	b: []u8;
+
+	header, e := compress.read_data(z, Header);
+	if e != .None {
+		return E_General.File_Too_Short;
+	}
+	input_data_consumed += size_of(Header);
+
+	if header.magic != .GZIP {
+		return E_GZIP.Invalid_GZIP_Signature;
+	}
+	if header.compression_method != .DEFLATE {
+		return E_General.Unknown_Compression_Method;
+	}
+
+	if header.os >= ._Unknown {
+		header.os = .Unknown;
+	}
+
+	if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags {
+		return E_GZIP.Reserved_Flag_Set;
+	}
+
+	// printf("signature: %v\n", header.magic);
+	// printf("compression: %v\n", header.compression_method);
+	// printf("flags: %v\n", header.flags);
+	// printf("modification time: %v\n", time.unix(i64(header.modification_time), 0));
+	// printf("xfl: %v (%v)\n", header.xfl, int(header.xfl));
+	// printf("os: %v\n", OS_Name[header.os]);
+
+	if .extra in header.flags {
+		xlen, e_extra := compress.read_data(z, u16le);
+		input_data_consumed += 2;
+
+		if e_extra != .None {
+			return E_General.Stream_Too_Short;
+		}
+		// printf("Extra data present (%v bytes)\n", xlen);
+		if xlen < 4 {
+			// Minimum length is 2 for ID + 2 for a field length, if set to zero.
+			return E_GZIP.Invalid_Extra_Data;
+		}
+
+		field_id:     [2]u8;
+		field_length: u16le;
+		field_error: io.Error;
+
+		for xlen >= 4 {
+			// println("Parsing Extra field(s).");
+			field_id, field_error = compress.read_data(z, [2]u8);
+			if field_error != .None {
+				// printf("Parsing Extra returned: %v\n", field_error);
+				return E_General.Stream_Too_Short;
+			}
+			xlen -= 2;
+			input_data_consumed += 2;
+
+			field_length, field_error = compress.read_data(z, u16le);
+			if field_error != .None {
+				// printf("Parsing Extra returned: %v\n", field_error);
+				return E_General.Stream_Too_Short;
+			}
+			xlen -= 2;
+			input_data_consumed += 2;
+
+			if xlen <= 0 {
+				// We're not going to try and recover by scanning for a ZLIB header.
+				// Who knows what else is wrong with this file.
+				return E_GZIP.Invalid_Extra_Data;
+			}
+
+			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
+			if field_length > 0 {
+				b, field_error = compress.read_slice(z, int(field_length));
+				if field_error != .None {
+					// printf("Parsing Extra returned: %v\n", field_error);
+					return E_General.Stream_Too_Short;
+				}
+				xlen -= field_length;
+				input_data_consumed += int(field_length);
+
+				// printf("%v\n", string(field_data));
+			}
+
+			if xlen != 0 {
+				return E_GZIP.Invalid_Extra_Data;
+			}
+		}
+	}
+
+	if .name in header.flags {
+		// Should be enough.
+		name: [1024]u8;
+		i := 0;
+		name_error: io.Error;
+
+		for i < len(name) {
+			b, name_error = compress.read_slice(z, 1);
+			if name_error != .None {
+				return E_General.Stream_Too_Short;
+			}
+			input_data_consumed += 1;
+			if b[0] == 0 {
+				break;
+			}
+			name[i] = b[0];
+			i += 1;
+			if i >= len(name) {
+				return E_GZIP.Original_Name_Too_Long;
+			}
+		}
+		// printf("Original filename: %v\n", string(name[:i]));
+	}
+
+	if .comment in header.flags {
+		// Should be enough.
+		comment: [1024]u8;
+		i := 0;
+		comment_error: io.Error;
+
+		for i < len(comment) {
+			b, comment_error = compress.read_slice(z, 1);
+			if comment_error != .None {
+				return E_General.Stream_Too_Short;
+			}
+			input_data_consumed += 1;
+			if b[0] == 0 {
+				break;
+			}
+			comment[i] = b[0];
+			i += 1;
+			if i >= len(comment) {
+				return E_GZIP.Comment_Too_Long;
+			}
+		}
+		// printf("Comment: %v\n", string(comment[:i]));
+	}
+
+	if .header_crc in header.flags {
+		crc_error: io.Error;
+		_, crc_error = compress.read_slice(z, 2);
+		input_data_consumed += 2;
+		if crc_error != .None {
+			return E_General.Stream_Too_Short;
+		}
+		/*
+			We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field).
+			If we find a gzip file in the wild that sets this field, we can add proper support for it.
+		*/
+	}
+
+	/*
+		We should have arrived at the ZLIB payload.
+	*/
+	payload_u32le: u32le;
+
+	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
+
+	if expected_output_size > -1 {
+		/*
+			We already checked that it's not larger than the output buffer max,
+			or GZIP length field's max.
+
+			We'll just pass it on to `zlib.inflate_raw`;
+		*/
+	} else {
+		/*
+			If we know the size of the GZIP file *and* it is fully in memory,
+			then we can peek at the unpacked size at the end.
+
+			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
+
+		*/
+		if known_gzip_size > -1 {
+			offset := known_gzip_size - input_data_consumed - 4;
+			if len(z.input_data) >= offset + 4 {
+				length_bytes         := z.input_data[offset:][:4];
+				payload_u32le         = (^u32le)(&length_bytes[0])^;
+				expected_output_size = int(payload_u32le);
+			}
+		} else {
+			/*
+				TODO(Jeroen): When reading a GZIP from a stream, check if impl_seek is present.
+				If so, we can seek to the end, grab the size from the footer, and seek back to payload start.
+			*/
+		}
+	}
+
+	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
+
+	zlib_error := zlib.inflate_raw(z=z, expected_output_size=expected_output_size);
+	if zlib_error != nil {
+		return zlib_error;
+	}
+	/*
+		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
+	*/
+	compress.discard_to_next_byte_lsb(z);
+
+	footer_error: io.Error;
+
+	payload_crc_b: [4]u8;
+	for _, i in payload_crc_b {
+		if z.num_bits >= 8 {
+			payload_crc_b[i] = u8(compress.read_bits_lsb(z, 8));
+		} else {
+			payload_crc_b[i], footer_error = compress.read_u8(z);
+		}
+	}
+	payload_crc := transmute(u32le)payload_crc_b;
+	payload_u32le, footer_error = compress.read_data(z, u32le);
+
+	payload := bytes.buffer_to_bytes(buf);
+
+	// fmt.printf("GZIP payload: %v\n", string(payload));
+
+	crc32 := u32le(hash.crc32(payload));
+
+	if crc32 != payload_crc {
+		return E_GZIP.Payload_CRC_Invalid;
+	}
+
+	if len(payload) != int(payload_u32le) {
+		return E_GZIP.Payload_Length_Invalid;
+	}
+	return nil;
+}
+
+load_from_stream :: proc(z: ^compress.Context_Stream_Input, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	buf := buf;
 	expected_output_size := expected_output_size;

@@ -375,5 +607,3 @@ load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_si
 	}
 	return nil;
 }
-
-load :: proc{load_from_file, load_from_slice, load_from_stream};
--- a/core/compress/zlib/example.odin
+++ b/core/compress/zlib/example.odin
@@ -38,7 +38,7 @@ main :: proc() {
 	};
 	OUTPUT_SIZE :: 438;

-	fmt.printf("size_of(Context): %v\n", size_of(compress.Context));
+	fmt.printf("size_of(Context): %v\n", size_of(compress.Context_Memory_Input));

 	buf: bytes.Buffer;

--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -30,7 +30,6 @@ import "core:bytes"
 	`Context.rolling_hash` if not inlining it is still faster.

 */
-Context :: compress.Context;

 Compression_Method :: enum u8 {
 	DEFLATE  = 8,
@@ -165,7 +164,7 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
 */

@(optimization_mode="speed")
-write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
+write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_check {
 	/*
 		Resize if needed.
 	*/
@@ -184,7 +183,7 @@ write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_boun
 }

@(optimization_mode="speed")
-repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
+repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
@@ -212,7 +211,7 @@ repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) 	#no_bounds
 }

@(optimization_mode="speed")
-repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
+repl_bytes :: proc(z: ^$C, count: u16, distance: u16) -> (err: io.Error) {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
@@ -304,7 +303,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 }

@(optimization_mode="speed")
-decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
 	code := u16(compress.peek_bits_lsb(z,16));

 	k := int(z_bit_reverse(code, 16));
@@ -335,7 +334,7 @@ decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err:
 }

@(optimization_mode="speed")
-decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
 	if z.num_bits < 16 {
 		if z.num_bits > 63 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
@@ -355,7 +354,7 @@ decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #
 }

@(optimization_mode="speed")
-parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
 	#no_bounds_check for {
 		value, e := decode_huffman(z, z_repeat);
 		if e != nil {
@@ -424,7 +423,78 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
 }

@(optimization_mode="speed")
-inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+__inflate_from_memory :: proc(using ctx: ^compress.Context_Memory_Input, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+	/*
+		ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
+
+		raw determines whether the ZLIB header is processed, or we're inflating a raw
+		DEFLATE stream.
+	*/
+
+	if !raw {
+		if len(ctx.input_data) < 6 {
+			return E_General.Stream_Too_Short;
+		}
+
+		cmf, _ := compress.read_u8(ctx);
+
+		method := Compression_Method(cmf & 0xf);
+		if method != .DEFLATE {
+			return E_General.Unknown_Compression_Method;
+		}
+
+		cinfo  := (cmf >> 4) & 0xf;
+		if cinfo > 7 {
+			return E_ZLIB.Unsupported_Window_Size;
+		}
+		flg, _ := compress.read_u8(ctx);
+
+		fcheck  := flg & 0x1f;
+		fcheck_computed := (cmf << 8 | flg) & 0x1f;
+		if fcheck != fcheck_computed {
+			return E_General.Checksum_Failed;
+		}
+
+		fdict   := (flg >> 5) & 1;
+		/*
+			We don't handle built-in dictionaries for now.
+			They're application specific and PNG doesn't use them.
+		*/
+		if fdict != 0 {
+			return E_ZLIB.FDICT_Unsupported;
+		}
+
+		// flevel  := Compression_Level((flg >> 6) & 3);
+		/*
+			Inflate can consume bits belonging to the Adler checksum.
+			We pass the entire stream to Inflate and will unget bytes if we need to
+			at the end to compare checksums.
+		*/
+
+	}
+
+	// Parse ZLIB stream without header.
+	err = inflate_raw(z=ctx, expected_output_size=expected_output_size);
+	if err != nil {
+		return err;
+	}
+
+	if !raw {
+		compress.discard_to_next_byte_lsb(ctx);
+		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
+
+		output_hash := hash.adler32(ctx.output.buf[:]);
+
+		if output_hash != u32(adler32) {
+			return E_General.Checksum_Failed;
+		}
+	}
+	return nil;
+}
+
+
+@(optimization_mode="speed")
+__inflate_from_stream :: proc(using ctx: ^$C, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	/*
 		ctx.input must be an io.Stream backed by an implementation that supports:
 		- read
@@ -501,7 +571,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_s
 // TODO: Check alignment of reserve/resize.

@(optimization_mode="speed")
-inflate_from_stream_raw :: proc(z: ^Context, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_raw :: proc(z: ^$C, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	expected_output_size := expected_output_size;

 	if expected_output_size <= 0 {
@@ -698,36 +768,23 @@ inflate_from_stream_raw :: proc(z: ^Context, expected_output_size := -1, allocat
 }

 inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
-	ctx := Context{};
+	ctx := compress.Context_Memory_Input{};

-	r := bytes.Reader{};
-	bytes.reader_init(&r, input);
-	rs := bytes.reader_to_stream(&r);
-	ctx.input = rs;
 	ctx.input_data = input;
-	ctx.input_fully_in_memory = true;
-
 	ctx.output = buf;

-	err = inflate_from_stream(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
+	err = __inflate_from_memory(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);

 	return err;
 }

 inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
-	ctx := Context{};
+	ctx := compress.Context_Memory_Input{};

-	r := bytes.Reader{};
-	bytes.reader_init(&r, input);
-	rs := bytes.reader_to_stream(&r);
-	ctx.input = rs;
 	ctx.input_data = input;
-	ctx.input_fully_in_memory = true;
-
 	ctx.output = buf;

-	return inflate_from_stream_raw(z=&ctx, expected_output_size=expected_output_size);
+	return inflate_raw(z=&ctx, expected_output_size=expected_output_size);
 }

-inflate     :: proc{inflate_from_stream, inflate_from_byte_array};
-inflate_raw :: proc{inflate_from_stream_raw, inflate_from_byte_array_raw};
+inflate     :: proc{__inflate_from_stream, inflate_from_byte_array};