mirror of
https://github.com/odin-lang/Odin.git
synced 2026-02-16 08:04:07 +00:00
[varint] Add signed LEB128 encoding.
This commit is contained in:
@@ -6,21 +6,31 @@
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
*/
|
||||
|
||||
// package varint implements variable length integer encoding and decoding
|
||||
// using the LEB128 format as used by DWARF debug and other file formats
|
||||
// package varint implements variable length integer encoding and decoding using
|
||||
// the LEB128 format as used by DWARF debug info, Android .dex and other file formats.
|
||||
package varint
|
||||
|
||||
// Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used.
|
||||
// Returns `size` == 0 for an invalid value, empty slice, or a varint > 16 bytes.
|
||||
// In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file.
|
||||
decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int) {
|
||||
// Instead we'll set limits on the values we'll encode/decode
|
||||
// 18 * 7 bits = 126, which means that a possible 19th byte may at most be `0b0000_0011`.
|
||||
LEB128_MAX_BYTES :: 19
|
||||
|
||||
Error :: enum {
|
||||
None = 0,
|
||||
Buffer_Too_Small = 1,
|
||||
Value_Too_Large = 2,
|
||||
}
|
||||
|
||||
// Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used.
|
||||
// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes.
|
||||
decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int, err: Error) {
|
||||
more := true
|
||||
|
||||
for v, i in buf {
|
||||
size = i + 1
|
||||
|
||||
if size > size_of(u128) {
|
||||
return
|
||||
if size == LEB128_MAX_BYTES && v > 0b0000_0011 {
|
||||
return 0, 0, .Value_Too_Large
|
||||
}
|
||||
|
||||
val |= u128(v & 0x7f) << uint(i * 7)
|
||||
@@ -33,25 +43,26 @@ decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int) {
|
||||
|
||||
// If the buffer runs out before the number ends, return an error.
|
||||
if more {
|
||||
return 0, 0
|
||||
return 0, 0, .Buffer_Too_Small
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Decode a slice of bytes encoding a signed LEB128 integer into value and number of bytes used.
|
||||
// Returns `size` == 0 for an invalid value, empty slice, or a varint > 16 bytes.
|
||||
// In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file.
|
||||
decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int) {
|
||||
// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes.
|
||||
decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int, err: Error) {
|
||||
shift: uint
|
||||
|
||||
if len(buf) == 0 {
|
||||
return
|
||||
return 0, 0, .Buffer_Too_Small
|
||||
}
|
||||
|
||||
for v in buf {
|
||||
size += 1
|
||||
if size > size_of(i128) {
|
||||
return
|
||||
|
||||
// 18 * 7 bits = 126, which means that a possible 19th byte may at most be 0b0000_0011.
|
||||
if size == LEB128_MAX_BYTES && v > 0b0000_0011 {
|
||||
return 0, 0, .Value_Too_Large
|
||||
}
|
||||
|
||||
val |= i128(v & 0x7f) << shift
|
||||
@@ -64,4 +75,61 @@ decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int) {
|
||||
val |= max(i128) << shift
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Encode `val` into `buf` as an unsigned LEB128 encoded series of bytes.
|
||||
// `buf` must be appropriately sized.
|
||||
encode_uleb128 :: proc(buf: []u8, val: u128) -> (size: int, err: Error) {
|
||||
val := val
|
||||
|
||||
for {
|
||||
size += 1
|
||||
|
||||
if size > len(buf) {
|
||||
return 0, .Buffer_Too_Small
|
||||
}
|
||||
|
||||
low := val & 0x7f
|
||||
val >>= 7
|
||||
|
||||
if val > 0 {
|
||||
low |= 0x80 // more bytes to follow
|
||||
}
|
||||
buf[size - 1] = u8(low)
|
||||
|
||||
if val == 0 { break }
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@(private)
|
||||
SIGN_MASK :: (i128(1) << 121) // sign extend mask
|
||||
|
||||
// Encode `val` into `buf` as a signed LEB128 encoded series of bytes.
|
||||
// `buf` must be appropriately sized.
|
||||
encode_ileb128 :: proc(buf: []u8, val: i128) -> (size: int, err: Error) {
|
||||
val := val
|
||||
more := true
|
||||
|
||||
for more {
|
||||
size += 1
|
||||
|
||||
if size > len(buf) {
|
||||
return 0, .Buffer_Too_Small
|
||||
}
|
||||
|
||||
low := val & 0x7f
|
||||
val >>= 7
|
||||
|
||||
low = (low ~ SIGN_MASK) - SIGN_MASK
|
||||
|
||||
if (val == 0 && low & 0x40 != 0x40) || (val == -1 && low & 0x40 == 0x40) {
|
||||
more = false
|
||||
} else {
|
||||
low |= 0x80
|
||||
}
|
||||
|
||||
buf[size - 1] = u8(low)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import "core:encoding/varint"
|
||||
import "core:testing"
|
||||
import "core:fmt"
|
||||
import "core:os"
|
||||
import "core:slice"
|
||||
|
||||
TEST_count := 0
|
||||
TEST_fail := 0
|
||||
@@ -39,18 +40,40 @@ main :: proc() {
|
||||
|
||||
@(test)
|
||||
test_dwarf :: proc(t: ^testing.T) {
|
||||
buf: [varint.LEB128_MAX_BYTES]u8
|
||||
|
||||
for vector in ULEB_Vectors {
|
||||
val, size := varint.decode_uleb128(vector.encoded)
|
||||
val, size, err := varint.decode_uleb128(vector.encoded)
|
||||
|
||||
msg := fmt.tprintf("Expected %02x to decode to %v consuming %v bytes, got %v and %v", vector.encoded, vector.value, vector.size, val, size)
|
||||
expect(t, size == vector.size && val == vector.value, msg)
|
||||
|
||||
msg = fmt.tprintf("Expected decoder to return error %v, got %v", vector.error, err)
|
||||
expect(t, err == vector.error, msg)
|
||||
|
||||
if err == .None { // Try to roundtrip
|
||||
size, err = varint.encode_uleb128(buf[:], vector.value)
|
||||
|
||||
msg = fmt.tprintf("Expected %v to encode to %02x, got %02x", vector.value, vector.encoded, buf[:size])
|
||||
expect(t, size == vector.size && slice.simple_equal(vector.encoded, buf[:size]), msg)
|
||||
}
|
||||
}
|
||||
|
||||
for vector in ILEB_Vectors {
|
||||
val, size := varint.decode_ileb128(vector.encoded)
|
||||
val, size, err := varint.decode_ileb128(vector.encoded)
|
||||
|
||||
msg := fmt.tprintf("Expected %02x to decode to %v consuming %v bytes, got %v and %v", vector.encoded, vector.value, vector.size, val, size)
|
||||
expect(t, size == vector.size && val == vector.value, msg)
|
||||
|
||||
msg = fmt.tprintf("Expected decoder to return error %v, got %v", vector.error, err)
|
||||
expect(t, err == vector.error, msg)
|
||||
|
||||
if err == .None { // Try to roundtrip
|
||||
size, err = varint.encode_ileb128(buf[:], vector.value)
|
||||
|
||||
msg = fmt.tprintf("Expected %v to encode to %02x, got %02x", vector.value, vector.encoded, buf[:size])
|
||||
expect(t, size == vector.size && slice.simple_equal(vector.encoded, buf[:size]), msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,24 +81,28 @@ ULEB_Test_Vector :: struct {
|
||||
encoded: []u8,
|
||||
value: u128,
|
||||
size: int,
|
||||
error: varint.Error,
|
||||
}
|
||||
|
||||
ULEB_Vectors :: []ULEB_Test_Vector{
|
||||
{ []u8{0x00}, 0, 1 },
|
||||
{ []u8{0x7f}, 127, 1 },
|
||||
{ []u8{0xE5, 0x8E, 0x26}, 624485, 3 },
|
||||
{ []u8{0x80}, 0, 0 },
|
||||
{ []u8{}, 0, 0 },
|
||||
{ []u8{0x00}, 0, 1, .None },
|
||||
{ []u8{0x7f}, 127, 1, .None },
|
||||
{ []u8{0xE5, 0x8E, 0x26}, 624485, 3, .None },
|
||||
{ []u8{0x80}, 0, 0, .Buffer_Too_Small },
|
||||
{ []u8{}, 0, 0, .Buffer_Too_Small },
|
||||
}
|
||||
|
||||
ILEB_Test_Vector :: struct {
|
||||
encoded: []u8,
|
||||
value: i128,
|
||||
size: int,
|
||||
error: varint.Error,
|
||||
}
|
||||
|
||||
ILEB_Vectors :: []ILEB_Test_Vector{
|
||||
{ []u8{0x00}, 0, 1 },
|
||||
{ []u8{0xC0, 0xBB, 0x78}, -123456, 3 },
|
||||
{ []u8{}, 0, 0 },
|
||||
{ []u8{0x00}, 0, 1, .None },
|
||||
{ []u8{0x3f}, 63, 1, .None },
|
||||
{ []u8{0x40}, -64, 1, .None },
|
||||
{ []u8{0xC0, 0xBB, 0x78}, -123456, 3, .None },
|
||||
{ []u8{}, 0, 0, .Buffer_Too_Small },
|
||||
}
|
||||
Reference in New Issue
Block a user