ZLIB: Start optimization.

This commit is contained in:
Jeroen van Rijn
2021-06-21 21:05:52 +02:00
parent 797c41950a
commit 352494cbb4
10 changed files with 211 additions and 38 deletions

View File

@@ -1,12 +1,18 @@
package bytes
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
`bytes.Buffer` type conversion helpers.
*/
import "core:intrinsics"
import "core:mem"
/*
Buffer type helpers
*/
need_endian_conversion :: proc($FT: typeid, $TT: typeid) -> (res: bool) {
// true if platform endian

View File

@@ -1,8 +1,20 @@
package compress
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation, optimization.
*/
import "core:io"
import "core:image"
when #config(TRACY_ENABLE, false) {
import tracy "shared:odin-tracy"
}
Error :: union {
General_Error,
Deflate_Error,
@@ -71,15 +83,24 @@ Context :: struct {
*/
eof: b8,
input: io.Stream,
input: io.Stream,
output: io.Stream,
bytes_written: i64,
// Used to update hash as we write instead of all at once
/*
Used to update hash as we write instead of all at once.
*/
rolling_hash: u32,
// Sliding window buffer. Size must be a power of two.
window_size: i64,
window_mask: i64,
last: ^[dynamic]byte,
/*
If we know the raw data size, we can optimize the reads.
*/
uncompressed_size: i64,
input_data: []u8,
}
// Stream helpers
@@ -93,6 +114,7 @@ Context :: struct {
*/
read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); }
b := make([]u8, size_of(T), context.temp_allocator);
r, e1 := io.to_reader(c.input);
_, e2 := io.read(r, b);
@@ -105,10 +127,12 @@ read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Err
}
read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read u8"); }
return read_data(z, u8);
}
peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Peek Data"); }
// Get current position to read from.
curr, e1 := c.input->impl_seek(0, .Current);
if e1 != .None {
@@ -136,6 +160,7 @@ peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) {
// Generalized bit reader LSB
refill_lsb :: proc(z: ^Context, width := i8(24)) {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); }
for {
if z.num_bits > width {
break;
@@ -146,7 +171,7 @@ refill_lsb :: proc(z: ^Context, width := i8(24)) {
if z.code_buffer >= 1 << uint(z.num_bits) {
// Code buffer is malformed.
z.num_bits = -100;
return;
return;
}
c, err := read_u8(z);
if err != .None {

View File

@@ -1,6 +1,17 @@
//+ignore
package gzip
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
Ginger Bill: Cosmetic changes.
A small GZIP implementation as an example.
*/
import "core:compress/gzip"
import "core:bytes"
import "core:os"

View File

@@ -1,5 +1,19 @@
package gzip
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
This package implements support for the GZIP file format v4.3,
as specified in RFC 1952.
It is implemented in such a way that it lends itself naturally
to be the input to a complementary TAR implementation.
*/
import "core:compress/zlib"
import "core:compress"
import "core:os"
@@ -9,11 +23,6 @@ import "core:hash"
/*
This package implements support for the GZIP file format v4.3,
as specified in RFC 1952.
It is implemented in such a way that it lends itself naturally
to be the input to a complementary TAR implementation.
*/
@@ -200,7 +209,7 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
xlen -= field_length;
// printf("%v\n", string(field_data));
}
}
if xlen != 0 {
return E_GZIP.Invalid_Extra_Data;

View File

@@ -1,6 +1,16 @@
//+ignore
package zlib
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
An example of how to use `zlib.inflate`.
*/
import "core:compress/zlib"
import "core:bytes"
import "core:fmt"

View File

@@ -1,11 +1,23 @@
package zlib
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation, optimization.
Ginger Bill: Cosmetic changes.
*/
import "core:compress"
import "core:mem"
import "core:io"
import "core:bytes"
import "core:hash"
when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
/*
zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
Returns: Error.
@@ -118,6 +130,7 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
}
write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); }
c := c;
buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
z.rolling_hash = hash.adler32(buf, z.rolling_hash);
@@ -126,17 +139,67 @@ write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_boun
if e != .None {
return e;
}
z.last[z.bytes_written % z.window_size] = c;
z.last[z.bytes_written & z.window_mask] = c;
z.bytes_written += 1;
return .None;
}
repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); }
/*
TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
without having to worry about wrapping, so no need for a temp allocation to give to
the output stream, just give it _that_ slice.
*/
buf := make([]u8, count, context.temp_allocator);
#no_bounds_check for i in 0..<count {
buf[i] = c;
z.last[z.bytes_written & z.window_mask] = c;
z.bytes_written += 1;
}
z.rolling_hash = hash.adler32(buf, z.rolling_hash);
_, e := z.output->impl_write(buf);
if e != .None {
return e;
}
return .None;
}
repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Bytes"); }
/*
TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
without having to worry about wrapping, so no need for a temp allocation to give to
the output stream, just give it _that_ slice.
*/
buf := make([]u8, count, context.temp_allocator);
offset := z.bytes_written - i64(distance);
#no_bounds_check for i in 0..<count {
c := z.last[offset & z.window_mask];
z.last[z.bytes_written & z.window_mask] = c;
buf[i] = c;
z.bytes_written += 1; offset += 1;
}
z.rolling_hash = hash.adler32(buf, z.rolling_hash);
_, e := z.output->impl_write(buf);
if e != .None {
return e;
}
return .None;
}
allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) {
return new(Huffman_Table, allocator), nil;
}
build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Build Huffman Table"); }
sizes: [HUFFMAN_MAX_BITS+1]int;
next_code: [HUFFMAN_MAX_BITS]int;
@@ -195,6 +258,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
}
decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman Slow"); }
code := u16(compress.peek_bits_lsb(z, 16));
k := int(z_bit_reverse(code, 16));
@@ -225,6 +289,7 @@ decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err:
}
decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman"); }
if z.num_bits < 16 {
if z.num_bits == -100 {
return 0, E_ZLIB.Code_Buffer_Malformed;
@@ -244,6 +309,7 @@ decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #
}
parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Parse Huffman Block"); }
#no_bounds_check for {
value, e := decode_huffman(z, z_repeat);
if e != nil {
@@ -256,8 +322,8 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
}
} else {
if value == 256 {
// End of block
return nil;
// End of block
return nil;
}
value -= 257;
@@ -294,24 +360,30 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
Replicate the last outputted byte, length times.
*/
if length > 0 {
b, e := compress.peek_back_byte(z, offset);
if e != .None {
if offset >= 0 && offset < z.window_size {
c := z.last[offset];
e := repl_byte(z, length, c);
if e != .None {
return E_General.Output_Too_Short;
}
} else {
return E_General.Output_Too_Short;
}
#no_bounds_check for _ in 0..<length {
write_byte(z, b);
}
}
} else {
if length > 0 {
#no_bounds_check for _ in 0..<length {
b, e := compress.peek_back_byte(z, offset);
if e != .None {
return E_General.Output_Too_Short;
}
write_byte(z, b);
offset += 1;
e := repl_bytes(z, length, distance);
if e != .None {
return E_General.Output_Too_Short;
}
// #no_bounds_check for _ in 0..<length {
// b, e := compress.peek_back_byte(z, offset);
// if e != .None {
// return E_General.Output_Too_Short;
// }
// write_byte(z, b);
// offset += 1;
// }
}
}
}
@@ -378,7 +450,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
ctx.rolling_hash = 1;
}
// Parse ZLIB stream without header.
// Parse ZLIB stream without header.
err = inflate_raw(ctx);
if err != nil {
return err;
@@ -397,6 +469,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
// @(optimization_mode="speed")
inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check {
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
final := u32(0);
type := u32(0);
@@ -426,6 +499,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
if z.window_size == 0 {
z.window_size = DEFLATE_MAX_DISTANCE;
}
z.window_mask = z.window_size - 1;
// Allocate rolling window buffer.
last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator);
@@ -440,6 +514,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
switch type {
case 0:
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Literal Block"); }
// Uncompressed block
// Discard bits until next byte boundary
@@ -468,6 +543,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
case 3:
return E_Deflate.BType_3;
case:
when #config(TRACY_ENABLE, false) { tracy.ZoneN("Huffman Block"); }
// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
if type == 1 {
// Use fixed code lengths.
@@ -531,7 +607,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
case 18:
c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
case:
return E_Deflate.Huffman_Bad_Code_Lengths;
return E_Deflate.Huffman_Bad_Code_Lengths;
}
if ntot - n < u32(c) {

View File

@@ -1,5 +1,14 @@
package image
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation, optimization.
Ginger Bill: Cosmetic changes.
*/
import "core:bytes"
import "core:mem"
@@ -66,10 +75,10 @@ Image_Option:
If the image has an alpha channel, drop it.
You may want to use `.alpha_premultiply` in this case.
NOTE: For PNG, this also skips handling of the tRNS chunk, if present,
unless you select `alpha_premultiply`.
In this case it'll premultiply the specified pixels in question only,
as the others are implicitly fully opaque.
NOTE: For PNG, this also skips handling of the tRNS chunk, if present,
unless you select `alpha_premultiply`.
In this case it'll premultiply the specified pixels in question only,
as the others are implicitly fully opaque.
`.alpha_premultiply`
If the image has an alpha channel, returns image data as follows:

View File

@@ -1,6 +1,17 @@
//+ignore
package png
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
Ginger Bill: Cosmetic changes.
An example of how to use `png.load`.
*/
import "core:compress"
import "core:image"
import "core:image/png"

View File

@@ -1,5 +1,16 @@
package png
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
Ginger Bill: Cosmetic changes.
These are a few useful utility functions to work with PNG images.
*/
import "core:image"
import "core:compress/zlib"
import coretime "core:time"
@@ -7,10 +18,6 @@ import "core:strings"
import "core:bytes"
import "core:mem"
/*
These are a few useful utility functions to work with PNG images.
*/
/*
Cleanup of image-specific data.
There are other helpers for cleanup of PNG-specific data.

View File

@@ -1,5 +1,14 @@
package png
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
Ginger Bill: Cosmetic changes.
*/
import "core:compress"
import "core:compress/zlib"
import "core:image"