mirror of
https://github.com/odin-lang/Odin.git
synced 2026-02-24 20:04:57 +00:00
ZLIB: If output size is known, reserve that much.
This commit is contained in:
@@ -11,6 +11,39 @@ package compress
|
||||
import "core:io"
|
||||
import "core:image"
|
||||
|
||||
/*
|
||||
These settings bound how much compression algorithms will allocate for their output buffer.
|
||||
If streaming their output, these are unnecessary and will be ignored.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
When a decompression routine doesn't stream its output, but writes to a buffer,
|
||||
we pre-allocate an output buffer to speed up decompression. The default is 1 MiB.
|
||||
*/
|
||||
COMPRESS_OUTPUT_ALLOCATE_MIN :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MIN, 1 << 20));
|
||||
|
||||
/*
|
||||
This bounds the maximum a buffer will resize to as needed, or the maximum we'll
|
||||
pre-allocate if you inform the decompression routine you know the payload size.
|
||||
|
||||
For reference, the largest payload size of a GZIP file is 4 GiB.
|
||||
|
||||
*/
|
||||
when size_of(uintptr) == 8 {
|
||||
/*
|
||||
For 64-bit platforms, we set the default max buffer size to 4 GiB,
|
||||
which is GZIP and PKZIP's max payload size.
|
||||
*/
|
||||
COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 32));
|
||||
} else {
|
||||
/*
|
||||
For 32-bit platforms, we set the default max buffer size to 512 MiB.
|
||||
*/
|
||||
COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29));
|
||||
}
|
||||
|
||||
|
||||
// when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
|
||||
|
||||
Error :: union {
|
||||
@@ -46,6 +79,20 @@ GZIP_Error :: enum {
|
||||
Comment_Too_Long,
|
||||
Payload_Length_Invalid,
|
||||
Payload_CRC_Invalid,
|
||||
|
||||
/*
|
||||
GZIP's payload can be a maximum of max(u32le), or 4 GiB.
|
||||
If you tell it you expect it to contain more, that's obviously an error.
|
||||
*/
|
||||
Payload_Size_Exceeds_Max_Payload,
|
||||
/*
|
||||
For buffered instead of streamed output, the payload size can't exceed
|
||||
the max set by the `COMPRESS_OUTPUT_ALLOCATE_MAX` switch in compress/common.odin.
|
||||
|
||||
You can tweak this setting using `-define:COMPRESS_OUTPUT_ALLOCATE_MAX=size_in_bytes`
|
||||
*/
|
||||
Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX,
|
||||
|
||||
}
|
||||
|
||||
ZIP_Error :: enum {
|
||||
@@ -79,7 +126,7 @@ Context :: struct #packed {
|
||||
input_data: []u8,
|
||||
|
||||
output: io.Stream,
|
||||
output_buf: [dynamic]u8,
|
||||
output_buf: ^[dynamic]u8,
|
||||
bytes_written: i64,
|
||||
|
||||
/*
|
||||
@@ -103,9 +150,10 @@ Context :: struct #packed {
|
||||
*/
|
||||
input_fully_in_memory: b8,
|
||||
input_refills_from_stream: b8,
|
||||
reserved_flags: [2]b8,
|
||||
output_to_stream: b8,
|
||||
reserved_flag: b8,
|
||||
}
|
||||
#assert(size_of(Context) == 128);
|
||||
// #assert(size_of(Context) == 128);
|
||||
|
||||
/*
|
||||
Compression algorithm context
|
||||
|
||||
@@ -45,7 +45,7 @@ main :: proc() {
|
||||
|
||||
if len(args) < 2 {
|
||||
stderr("No input file specified.\n");
|
||||
err := load(TEST, &buf);
|
||||
err := load(slice=TEST, buf=&buf, known_gzip_size=len(TEST));
|
||||
if err == nil {
|
||||
stdout("Displaying test vector: ");
|
||||
stdout(bytes.buffer_to_string(&buf));
|
||||
|
||||
@@ -21,6 +21,8 @@ import "core:io"
|
||||
import "core:bytes"
|
||||
import "core:hash"
|
||||
|
||||
// import "core:fmt"
|
||||
|
||||
Magic :: enum u16le {
|
||||
GZIP = 0x8b << 8 | 0x1f,
|
||||
}
|
||||
@@ -99,7 +101,9 @@ E_GZIP :: compress.GZIP_Error;
|
||||
E_ZLIB :: compress.ZLIB_Error;
|
||||
E_Deflate :: compress.Deflate_Error;
|
||||
|
||||
load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
|
||||
GZIP_MAX_PAYLOAD_SIZE :: int(max(u32le));
|
||||
|
||||
load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
|
||||
|
||||
r := bytes.Reader{};
|
||||
bytes.reader_init(&r, slice);
|
||||
@@ -111,33 +115,47 @@ load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.al
|
||||
input_fully_in_memory = true,
|
||||
input_refills_from_stream = true,
|
||||
};
|
||||
err = load_from_stream(ctx, buf, allocator);
|
||||
|
||||
err = load_from_stream(ctx, buf, known_gzip_size, expected_output_size, allocator);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
|
||||
load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
|
||||
data, ok := os.read_entire_file(filename, allocator);
|
||||
defer delete(data);
|
||||
|
||||
err = E_General.File_Not_Found;
|
||||
if ok {
|
||||
err = load_from_slice(data, buf, allocator);
|
||||
err = load_from_slice(data, buf, len(data), expected_output_size, allocator);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
|
||||
load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
|
||||
buf := buf;
|
||||
expected_output_size := expected_output_size;
|
||||
|
||||
input_data_consumed := 0;
|
||||
|
||||
ws := bytes.buffer_to_stream(buf);
|
||||
ctx.output = ws;
|
||||
|
||||
if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
|
||||
return E_GZIP.Payload_Size_Exceeds_Max_Payload;
|
||||
}
|
||||
|
||||
if expected_output_size > compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
|
||||
return E_GZIP.Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX;
|
||||
}
|
||||
|
||||
b: []u8;
|
||||
|
||||
header, e := compress.read_data(ctx, Header);
|
||||
if e != .None {
|
||||
return E_General.File_Too_Short;
|
||||
}
|
||||
input_data_consumed += size_of(Header);
|
||||
|
||||
if header.magic != .GZIP {
|
||||
return E_GZIP.Invalid_GZIP_Signature;
|
||||
@@ -163,6 +181,8 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
|
||||
if .extra in header.flags {
|
||||
xlen, e_extra := compress.read_data(ctx, u16le);
|
||||
input_data_consumed += 2;
|
||||
|
||||
if e_extra != .None {
|
||||
return E_General.Stream_Too_Short;
|
||||
}
|
||||
@@ -184,6 +204,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
return E_General.Stream_Too_Short;
|
||||
}
|
||||
xlen -= 2;
|
||||
input_data_consumed += 2;
|
||||
|
||||
field_length, field_error = compress.read_data(ctx, u16le);
|
||||
if field_error != .None {
|
||||
@@ -191,6 +212,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
return E_General.Stream_Too_Short;
|
||||
}
|
||||
xlen -= 2;
|
||||
input_data_consumed += 2;
|
||||
|
||||
if xlen <= 0 {
|
||||
// We're not going to try and recover by scanning for a ZLIB header.
|
||||
@@ -206,6 +228,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
return E_General.Stream_Too_Short;
|
||||
}
|
||||
xlen -= field_length;
|
||||
input_data_consumed += int(field_length);
|
||||
|
||||
// printf("%v\n", string(field_data));
|
||||
}
|
||||
@@ -227,6 +250,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
if name_error != .None {
|
||||
return E_General.Stream_Too_Short;
|
||||
}
|
||||
input_data_consumed += 1;
|
||||
if b[0] == 0 {
|
||||
break;
|
||||
}
|
||||
@@ -250,6 +274,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
if comment_error != .None {
|
||||
return E_General.Stream_Too_Short;
|
||||
}
|
||||
input_data_consumed += 1;
|
||||
if b[0] == 0 {
|
||||
break;
|
||||
}
|
||||
@@ -265,6 +290,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
if .header_crc in header.flags {
|
||||
crc_error: io.Error;
|
||||
_, crc_error = compress.read_slice(ctx, 2);
|
||||
input_data_consumed += 2;
|
||||
if crc_error != .None {
|
||||
return E_General.Stream_Too_Short;
|
||||
}
|
||||
@@ -280,7 +306,43 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
code_buffer := compress.Code_Buffer{};
|
||||
cb := &code_buffer;
|
||||
|
||||
zlib_error := zlib.inflate_raw(ctx, &code_buffer);
|
||||
payload_u32le: u32le;
|
||||
|
||||
// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
|
||||
|
||||
if expected_output_size > -1 {
|
||||
/*
|
||||
We already checked that it's not larger than the output buffer max,
|
||||
or GZIP length field's max.
|
||||
|
||||
We'll just pass it on to `zlib.inflate_raw`;
|
||||
*/
|
||||
} else {
|
||||
/*
|
||||
If we know the size of the GZIP file *and* it is fully in memory,
|
||||
then we can peek at the unpacked size at the end.
|
||||
|
||||
We'll still want to ensure there's capacity left in the output buffer when we write, of course.
|
||||
|
||||
*/
|
||||
if ctx.input_fully_in_memory && known_gzip_size > -1 {
|
||||
offset := known_gzip_size - input_data_consumed - 4;
|
||||
if len(ctx.input_data) >= offset + 4 {
|
||||
length_bytes := ctx.input_data[offset:][:4];
|
||||
payload_u32le = (^u32le)(&length_bytes[0])^;
|
||||
expected_output_size = int(payload_u32le);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
TODO(Jeroen): When reading a GZIP from a stream, check if impl_seek is present.
|
||||
If so, we can seek to the end, grab the size from the footer, and seek back to payload start.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
|
||||
|
||||
zlib_error := zlib.inflate_raw(z=ctx, cb=&code_buffer, expected_output_size=expected_output_size);
|
||||
if zlib_error != nil {
|
||||
return zlib_error;
|
||||
}
|
||||
@@ -300,9 +362,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
}
|
||||
}
|
||||
payload_crc := transmute(u32le)payload_crc_b;
|
||||
|
||||
payload_len: u32le;
|
||||
payload_len, footer_error = compress.read_data(ctx, u32le);
|
||||
payload_u32le, footer_error = compress.read_data(ctx, u32le);
|
||||
|
||||
payload := bytes.buffer_to_bytes(buf);
|
||||
crc32 := u32le(hash.crc32(payload));
|
||||
@@ -311,7 +371,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
|
||||
return E_GZIP.Payload_CRC_Invalid;
|
||||
}
|
||||
|
||||
if len(payload) != int(payload_len) {
|
||||
if len(payload) != int(payload_u32le) {
|
||||
return E_GZIP.Payload_Length_Invalid;
|
||||
}
|
||||
return nil;
|
||||
|
||||
@@ -35,11 +35,13 @@ main :: proc() {
|
||||
171, 15, 18, 59, 138, 112, 63, 23, 205, 110, 254, 136, 109, 78, 231,
|
||||
63, 234, 138, 133, 204,
|
||||
};
|
||||
OUTPUT_SIZE :: 438;
|
||||
|
||||
|
||||
buf: bytes.Buffer;
|
||||
|
||||
// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
|
||||
err := inflate(ODIN_DEMO, &buf);
|
||||
err := inflate(input=ODIN_DEMO, buf=&buf, expected_output_size=OUTPUT_SIZE);
|
||||
defer bytes.buffer_destroy(&buf);
|
||||
|
||||
if err != nil {
|
||||
@@ -47,5 +49,5 @@ main :: proc() {
|
||||
}
|
||||
s := bytes.buffer_to_string(&buf);
|
||||
fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s);
|
||||
assert(len(s) == 438);
|
||||
assert(len(s) == OUTPUT_SIZE);
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@ import "core:io"
|
||||
import "core:bytes"
|
||||
import "core:hash"
|
||||
|
||||
// import "core:fmt"
|
||||
|
||||
// when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
|
||||
|
||||
/*
|
||||
@@ -397,7 +399,7 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check {
|
||||
inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
|
||||
/*
|
||||
ctx.input must be an io.Stream backed by an implementation that supports:
|
||||
- read
|
||||
@@ -461,7 +463,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
|
||||
}
|
||||
|
||||
// Parse ZLIB stream without header.
|
||||
err = inflate_raw(ctx, cb);
|
||||
err = inflate_raw(z=ctx, cb=cb, expected_output_size=expected_output_size);
|
||||
if err != nil {
|
||||
return err;
|
||||
}
|
||||
@@ -483,12 +485,29 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := context.allocator) -> (err: Error) #no_bounds_check {
|
||||
inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
|
||||
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
|
||||
final := u32(0);
|
||||
type := u32(0);
|
||||
|
||||
cb.num_bits = 0;
|
||||
buf := (^bytes.Buffer)(z.output.stream_data);
|
||||
z.output_buf = &buf.buf;
|
||||
|
||||
// fmt.printf("ZLIB: Expected Payload Size: %v\n", expected_output_size);
|
||||
|
||||
if expected_output_size > -1 && expected_output_size <= compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
|
||||
reserve(z.output_buf, expected_output_size);
|
||||
// resize (z.output_buf, expected_output_size);
|
||||
} else {
|
||||
reserve(z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
|
||||
}
|
||||
|
||||
// reserve(&z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
|
||||
// resize (&z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN);
|
||||
// fmt.printf("ZLIB: buf: %v\n", buf);
|
||||
// fmt.printf("ZLIB: output_buf: %v\n", z.output_buf);
|
||||
// fmt.printf("ZLIB: z.output: %v\n", z.output);
|
||||
|
||||
|
||||
cb.num_bits = 0;
|
||||
cb.code_buffer = 0;
|
||||
|
||||
z_repeat: ^Huffman_Table;
|
||||
@@ -519,6 +538,10 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
|
||||
cb.last = mem.make_dynamic_array_len_cap([dynamic]u8, cb.window_mask + 1, cb.window_mask + 1, allocator);
|
||||
defer delete(cb.last);
|
||||
|
||||
|
||||
final := u32(0);
|
||||
type := u32(0);
|
||||
|
||||
for {
|
||||
final = compress.read_bits_lsb(z, cb, 1);
|
||||
type = compress.read_bits_lsb(z, cb, 2);
|
||||
@@ -659,10 +682,15 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
|
||||
}
|
||||
}
|
||||
|
||||
// fmt.printf("ZLIB: Bytes written: %v\n", z.bytes_written);
|
||||
if int(z.bytes_written) != len(buf.buf) {
|
||||
resize(&buf.buf, int(z.bytes_written));
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
|
||||
inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
|
||||
ctx := Context{};
|
||||
|
||||
r := bytes.Reader{};
|
||||
@@ -673,15 +701,15 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -
|
||||
ctx.input_fully_in_memory = true;
|
||||
|
||||
buf := buf;
|
||||
ws := bytes.buffer_to_stream(buf);
|
||||
ws := bytes.buffer_to_stream(buf);
|
||||
ctx.output = ws;
|
||||
|
||||
err = inflate_from_stream(&ctx, raw);
|
||||
err = inflate_from_stream(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false) -> (err: Error) {
|
||||
inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
|
||||
ctx := Context{};
|
||||
|
||||
r := bytes.Reader{};
|
||||
@@ -692,10 +720,10 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_B
|
||||
ctx.input_fully_in_memory = true;
|
||||
|
||||
buf := buf;
|
||||
ws := bytes.buffer_to_stream(buf);
|
||||
ws := bytes.buffer_to_stream(buf);
|
||||
ctx.output = ws;
|
||||
|
||||
return inflate_from_stream_raw(&ctx, cb);
|
||||
return inflate_from_stream_raw(z=&ctx, cb=cb, expected_output_size=expected_output_size);
|
||||
}
|
||||
|
||||
inflate :: proc{inflate_from_stream, inflate_from_byte_array};
|
||||
|
||||
Reference in New Issue
Block a user