From 352494cbb4ad2ddb650b59ce8102da3ea0942e79 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Mon, 21 Jun 2021 21:05:52 +0200 Subject: [PATCH] ZLIB: Start optimization. --- core/bytes/util.odin | 14 ++-- core/compress/common.odin | 31 ++++++++- core/compress/gzip/example.odin | 11 ++++ core/compress/gzip/gzip.odin | 21 ++++-- core/compress/zlib/example.odin | 10 +++ core/compress/zlib/zlib.odin | 110 +++++++++++++++++++++++++++----- core/image/common.odin | 17 +++-- core/image/png/example.odin | 11 ++++ core/image/png/helpers.odin | 15 +++-- core/image/png/png.odin | 9 +++ 10 files changed, 211 insertions(+), 38 deletions(-) diff --git a/core/bytes/util.odin b/core/bytes/util.odin index 1749230db..a93e3e479 100644 --- a/core/bytes/util.odin +++ b/core/bytes/util.odin @@ -1,12 +1,18 @@ package bytes +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. + + `bytes.Buffer` type conversion helpers. +*/ + import "core:intrinsics" import "core:mem" -/* - Buffer type helpers -*/ - need_endian_conversion :: proc($FT: typeid, $TT: typeid) -> (res: bool) { // true if platform endian diff --git a/core/compress/common.odin b/core/compress/common.odin index a0e092643..5a5b9edc6 100644 --- a/core/compress/common.odin +++ b/core/compress/common.odin @@ -1,8 +1,20 @@ package compress +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation, optimization. +*/ + import "core:io" import "core:image" +when #config(TRACY_ENABLE, false) { + import tracy "shared:odin-tracy" +} + Error :: union { General_Error, Deflate_Error, @@ -71,15 +83,24 @@ Context :: struct { */ eof: b8, - input: io.Stream, + input: io.Stream, output: io.Stream, bytes_written: i64, - // Used to update hash as we write instead of all at once + /* + Used to update hash as we write instead of all at once. + */ rolling_hash: u32, // Sliding window buffer. Size must be a power of two. window_size: i64, + window_mask: i64, last: ^[dynamic]byte, + + /* + If we know the raw data size, we can optimize the reads. + */ + uncompressed_size: i64, + input_data: []u8, } // Stream helpers @@ -93,6 +114,7 @@ Context :: struct { */ read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); } b := make([]u8, size_of(T), context.temp_allocator); r, e1 := io.to_reader(c.input); _, e2 := io.read(r, b); @@ -105,10 +127,12 @@ read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Err } read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read u8"); } return read_data(z, u8); } peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Peek Data"); } // Get current position to read from. curr, e1 := c.input->impl_seek(0, .Current); if e1 != .None { @@ -136,6 +160,7 @@ peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) { // Generalized bit reader LSB refill_lsb :: proc(z: ^Context, width := i8(24)) { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); } for { if z.num_bits > width { break; @@ -146,7 +171,7 @@ refill_lsb :: proc(z: ^Context, width := i8(24)) { if z.code_buffer >= 1 << uint(z.num_bits) { // Code buffer is malformed. z.num_bits = -100; - return; + return; } c, err := read_u8(z); if err != .None { diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin index 1ab899e00..81935a43a 100644 --- a/core/compress/gzip/example.odin +++ b/core/compress/gzip/example.odin @@ -1,6 +1,17 @@ //+ignore package gzip +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. + Ginger Bill: Cosmetic changes. + + A small GZIP implementation as an example. +*/ + import "core:compress/gzip" import "core:bytes" import "core:os" diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin index 82488a5a8..55e00198a 100644 --- a/core/compress/gzip/gzip.odin +++ b/core/compress/gzip/gzip.odin @@ -1,5 +1,19 @@ package gzip +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. + + This package implements support for the GZIP file format v4.3, + as specified in RFC 1952. + + It is implemented in such a way that it lends itself naturally + to be the input to a complementary TAR implementation. +*/ + import "core:compress/zlib" import "core:compress" import "core:os" @@ -9,11 +23,6 @@ import "core:hash" /* - This package implements support for the GZIP file format v4.3, - as specified in RFC 1952. - - It is implemented in such a way that it lends itself naturally - to be the input to a complementary TAR implementation. */ @@ -200,7 +209,7 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con xlen -= field_length; // printf("%v\n", string(field_data)); - } + } if xlen != 0 { return E_GZIP.Invalid_Extra_Data; diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin index 9af61e4b3..7c538b7af 100644 --- a/core/compress/zlib/example.odin +++ b/core/compress/zlib/example.odin @@ -1,6 +1,16 @@ //+ignore package zlib +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. + + An example of how to use `zlib.inflate`. +*/ + import "core:compress/zlib" import "core:bytes" import "core:fmt" diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin index d0e99d820..956ddaca1 100644 --- a/core/compress/zlib/zlib.odin +++ b/core/compress/zlib/zlib.odin @@ -1,11 +1,23 @@ package zlib +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation, optimization. + Ginger Bill: Cosmetic changes. +*/ + import "core:compress" import "core:mem" import "core:io" import "core:bytes" import "core:hash" + +when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" } + /* zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream. Returns: Error. @@ -118,6 +130,7 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) { } write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); } c := c; buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1}; z.rolling_hash = hash.adler32(buf, z.rolling_hash); @@ -126,17 +139,67 @@ write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_boun if e != .None { return e; } - z.last[z.bytes_written % z.window_size] = c; + z.last[z.bytes_written & z.window_mask] = c; z.bytes_written += 1; return .None; } +repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); } + /* + TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it + without having to worry about wrapping, so no need for a temp allocation to give to + the output stream, just give it _that_ slice. + */ + buf := make([]u8, count, context.temp_allocator); + #no_bounds_check for i in 0..impl_write(buf); + if e != .None { + return e; + } + return .None; +} + +repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Bytes"); } + /* + TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it + without having to worry about wrapping, so no need for a temp allocation to give to + the output stream, just give it _that_ slice. + */ + buf := make([]u8, count, context.temp_allocator); + + offset := z.bytes_written - i64(distance); + #no_bounds_check for i in 0..impl_write(buf); + if e != .None { + return e; + } + return .None; +} + + allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) { return new(Huffman_Table, allocator), nil; } build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Build Huffman Table"); } sizes: [HUFFMAN_MAX_BITS+1]int; next_code: [HUFFMAN_MAX_BITS]int; @@ -195,6 +258,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) { } decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman Slow"); } code := u16(compress.peek_bits_lsb(z, 16)); k := int(z_bit_reverse(code, 16)); @@ -225,6 +289,7 @@ decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: } decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman"); } if z.num_bits < 16 { if z.num_bits == -100 { return 0, E_ZLIB.Code_Buffer_Malformed; @@ -244,6 +309,7 @@ decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) # } parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Parse Huffman Block"); } #no_bounds_check for { value, e := decode_huffman(z, z_repeat); if e != nil { @@ -256,8 +322,8 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> } } else { if value == 256 { - // End of block - return nil; + // End of block + return nil; } value -= 257; @@ -294,24 +360,30 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> Replicate the last outputted byte, length times. */ if length > 0 { - b, e := compress.peek_back_byte(z, offset); - if e != .None { + if offset >= 0 && offset < z.window_size { + c := z.last[offset]; + e := repl_byte(z, length, c); + if e != .None { + return E_General.Output_Too_Short; + } + } else { return E_General.Output_Too_Short; } - #no_bounds_check for _ in 0.. 0 { - #no_bounds_check for _ in 0.. (err: Error) #no_bounds_check { + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); } final := u32(0); type := u32(0); @@ -426,6 +499,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> if z.window_size == 0 { z.window_size = DEFLATE_MAX_DISTANCE; } + z.window_mask = z.window_size - 1; // Allocate rolling window buffer. last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator); @@ -440,6 +514,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> switch type { case 0: + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Literal Block"); } // Uncompressed block // Discard bits until next byte boundary @@ -468,6 +543,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> case 3: return E_Deflate.BType_3; case: + when #config(TRACY_ENABLE, false) { tracy.ZoneN("Huffman Block"); } // log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type); if type == 1 { // Use fixed code lengths. @@ -531,7 +607,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> case 18: c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11); case: - return E_Deflate.Huffman_Bad_Code_Lengths; + return E_Deflate.Huffman_Bad_Code_Lengths; } if ntot - n < u32(c) { diff --git a/core/image/common.odin b/core/image/common.odin index 8443a2d22..7a678f5b0 100644 --- a/core/image/common.odin +++ b/core/image/common.odin @@ -1,5 +1,14 @@ package image +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation, optimization. + Ginger Bill: Cosmetic changes. +*/ + import "core:bytes" import "core:mem" @@ -66,10 +75,10 @@ Image_Option: If the image has an alpha channel, drop it. You may want to use `.alpha_premultiply` in this case. - NOTE: For PNG, this also skips handling of the tRNS chunk, if present, - unless you select `alpha_premultiply`. - In this case it'll premultiply the specified pixels in question only, - as the others are implicitly fully opaque. + NOTE: For PNG, this also skips handling of the tRNS chunk, if present, + unless you select `alpha_premultiply`. + In this case it'll premultiply the specified pixels in question only, + as the others are implicitly fully opaque. `.alpha_premultiply` If the image has an alpha channel, returns image data as follows: diff --git a/core/image/png/example.odin b/core/image/png/example.odin index 3dd4af2ff..3891a88e5 100644 --- a/core/image/png/example.odin +++ b/core/image/png/example.odin @@ -1,6 +1,17 @@ //+ignore package png +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. + Ginger Bill: Cosmetic changes. + + An example of how to use `png.load`. +*/ + import "core:compress" import "core:image" import "core:image/png" diff --git a/core/image/png/helpers.odin b/core/image/png/helpers.odin index 3a811f5c9..b28e4aead 100644 --- a/core/image/png/helpers.odin +++ b/core/image/png/helpers.odin @@ -1,5 +1,16 @@ package png +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. + Ginger Bill: Cosmetic changes. + + These are a few useful utility functions to work with PNG images. +*/ + import "core:image" import "core:compress/zlib" import coretime "core:time" @@ -7,10 +18,6 @@ import "core:strings" import "core:bytes" import "core:mem" -/* - These are a few useful utility functions to work with PNG images. -*/ - /* Cleanup of image-specific data. There are other helpers for cleanup of PNG-specific data. diff --git a/core/image/png/png.odin b/core/image/png/png.odin index 18295793d..b4f25201f 100644 --- a/core/image/png/png.odin +++ b/core/image/png/png.odin @@ -1,5 +1,14 @@ package png +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. + Ginger Bill: Cosmetic changes. +*/ + import "core:compress" import "core:compress/zlib" import "core:image"