diff --git a/core/encoding/base64/base64.odin b/core/encoding/base64/base64.odin index cf2ea1c12..793f22c57 100644 --- a/core/encoding/base64/base64.odin +++ b/core/encoding/base64/base64.odin @@ -1,5 +1,9 @@ package base64 +import "core:io" +import "core:mem" +import "core:strings" + // @note(zh): Encoding utility for Base64 // A secondary param can be used to supply a custom alphabet to // @link(encode) and a matching decoding table to @link(decode). @@ -39,59 +43,85 @@ DEC_TABLE := [128]int { 49, 50, 51, -1, -1, -1, -1, -1, } -encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> string #no_bounds_check { - length := len(data) - if length == 0 { - return "" - } +encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> (encoded: string, err: mem.Allocator_Error) #optional_allocator_error { + out_length := encoded_length(data) + if out_length == 0 { + return + } - out_length := ((4 * length / 3) + 3) &~ 3 - out := make([]byte, out_length, allocator) + out: strings.Builder + strings.builder_init(&out, 0, out_length, allocator) or_return - c0, c1, c2, block: int + ioerr := encode_into(strings.to_stream(&out), data, ENC_TBL) + assert(ioerr == nil) - for i, d := 0, 0; i < length; i, d = i + 3, d + 4 { - c0, c1, c2 = int(data[i]), -1, -1 - - if i + 1 < length { c1 = int(data[i + 1]) } - if i + 2 < length { c2 = int(data[i + 2]) } - - block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) - - out[d] = ENC_TBL[block >> 18 & 63] - out[d + 1] = ENC_TBL[block >> 12 & 63] - out[d + 2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] - out[d + 3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] - } - return string(out) + return strings.to_string(out), nil } -decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> []byte #no_bounds_check { - length := len(data) - if length == 0 { - return nil - } +encoded_length :: #force_inline proc(data: []byte) -> int { + length := len(data) + if length == 0 { + return 0 + } - pad_count := data[length - 1] == PADDING ? (data[length - 2] == PADDING ? 2 : 1) : 0 - out_length := ((length * 6) >> 3) - pad_count - out := make([]byte, out_length, allocator) - - c0, c1, c2, c3: int - b0, b1, b2: int - - for i, j := 0, 0; i < length; i, j = i + 4, j + 3 { - c0 = DEC_TBL[data[i]] - c1 = DEC_TBL[data[i + 1]] - c2 = DEC_TBL[data[i + 2]] - c3 = DEC_TBL[data[i + 3]] - - b0 = (c0 << 2) | (c1 >> 4) - b1 = (c1 << 4) | (c2 >> 2) - b2 = (c2 << 6) | c3 - - out[j] = byte(b0) - out[j + 1] = byte(b1) - out[j + 2] = byte(b2) - } - return out + return ((4 * length / 3) + 3) &~ 3 +} + +encode_into :: proc(w: io.Writer, data: []byte, ENC_TBL := ENC_TABLE) -> (err: io.Error) #no_bounds_check { + length := len(data) + if length == 0 { + return + } + + c0, c1, c2, block: int + + for i, d := 0, 0; i < length; i, d = i + 3, d + 4 { + c0, c1, c2 = int(data[i]), -1, -1 + + if i + 1 < length { c1 = int(data[i + 1]) } + if i + 2 < length { c2 = int(data[i + 2]) } + + block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) + + out: [4]byte + out[0] = ENC_TBL[block >> 18 & 63] + out[1] = ENC_TBL[block >> 12 & 63] + out[2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] + out[3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] + + #bounds_check { io.write_full(w, out[:]) or_return } + } + return +} + +decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> (out: []byte, err: mem.Allocator_Error) #optional_allocator_error { + #no_bounds_check { + length := len(data) + if length == 0 { + return + } + + pad_count := data[length - 1] == PADDING ? (data[length - 2] == PADDING ? 2 : 1) : 0 + out_length := ((length * 6) >> 3) - pad_count + out = make([]byte, out_length, allocator) or_return + + c0, c1, c2, c3: int + b0, b1, b2: int + + for i, j := 0, 0; i < length; i, j = i + 4, j + 3 { + c0 = DEC_TBL[data[i]] + c1 = DEC_TBL[data[i + 1]] + c2 = DEC_TBL[data[i + 2]] + c3 = DEC_TBL[data[i + 3]] + + b0 = (c0 << 2) | (c1 >> 4) + b1 = (c1 << 4) | (c2 >> 2) + b2 = (c2 << 6) | c3 + + out[j] = byte(b0) + out[j + 1] = byte(b1) + out[j + 2] = byte(b2) + } + return + } } diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin new file mode 100644 index 000000000..e91c53f3c --- /dev/null +++ b/core/encoding/cbor/cbor.odin @@ -0,0 +1,680 @@ +package cbor + +import "core:encoding/json" +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:runtime" +import "core:strconv" +import "core:strings" + +// If we are decoding a stream of either a map or list, the initial capacity will be this value. +INITIAL_STREAMED_CONTAINER_CAPACITY :: 8 +// If we are decoding a stream of either text or bytes, the initial capacity will be this value. +INITIAL_STREAMED_BYTES_CAPACITY :: 16 + +// Known/common headers are defined, undefined headers can still be valid. +// Higher 3 bits is for the major type and lower 5 bits for the additional information. +Header :: enum u8 { + U8 = (u8(Major.Unsigned) << 5) | u8(Add.One_Byte), + U16 = (u8(Major.Unsigned) << 5) | u8(Add.Two_Bytes), + U32 = (u8(Major.Unsigned) << 5) | u8(Add.Four_Bytes), + U64 = (u8(Major.Unsigned) << 5) | u8(Add.Eight_Bytes), + + Neg_U8 = (u8(Major.Negative) << 5) | u8(Add.One_Byte), + Neg_U16 = (u8(Major.Negative) << 5) | u8(Add.Two_Bytes), + Neg_U32 = (u8(Major.Negative) << 5) | u8(Add.Four_Bytes), + Neg_U64 = (u8(Major.Negative) << 5) | u8(Add.Eight_Bytes), + + False = (u8(Major.Other) << 5) | u8(Add.False), + True = (u8(Major.Other) << 5) | u8(Add.True), + + Nil = (u8(Major.Other) << 5) | u8(Add.Nil), + Undefined = (u8(Major.Other) << 5) | u8(Add.Undefined), + + Simple = (u8(Major.Other) << 5) | u8(Add.One_Byte), + + F16 = (u8(Major.Other) << 5) | u8(Add.Two_Bytes), + F32 = (u8(Major.Other) << 5) | u8(Add.Four_Bytes), + F64 = (u8(Major.Other) << 5) | u8(Add.Eight_Bytes), + + Break = (u8(Major.Other) << 5) | u8(Add.Break), +} + +// The higher 3 bits of the header which denotes what type of value it is. +Major :: enum u8 { + Unsigned, + Negative, + Bytes, + Text, + Array, + Map, + Tag, + Other, +} + +// The lower 3 bits of the header which denotes additional information for the type of value. +Add :: enum u8 { + False = 20, + True = 21, + Nil = 22, + Undefined = 23, + + One_Byte = 24, + Two_Bytes = 25, + Four_Bytes = 26, + Eight_Bytes = 27, + + Length_Unknown = 31, + Break = Length_Unknown, +} + +Value :: union { + u8, + u16, + u32, + u64, + + Negative_U8, + Negative_U16, + Negative_U32, + Negative_U64, + + // Pointers so the size of the Value union stays small. + ^Bytes, + ^Text, + ^Array, + ^Map, + ^Tag, + + Simple, + f16, + f32, + f64, + bool, + Undefined, + Nil, +} + +Bytes :: []byte +Text :: string + +Array :: []Value + +Map :: []Map_Entry +Map_Entry :: struct { + key: Value, // Can be any unsigned, negative, float, Simple, bool, Text. + value: Value, +} + +Tag :: struct { + number: Tag_Number, + value: Value, // Value based on the number. +} + +Tag_Number :: u64 + +Nil :: distinct rawptr +Undefined :: distinct rawptr + +// A distinct atom-like number, range from `0..=19` and `32..=max(u8)`. +Simple :: distinct u8 +Atom :: Simple + +Unmarshal_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Decode_Data_Error, + Unmarshal_Data_Error, + Maybe(Unsupported_Type_Error), +} + +Marshal_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Encode_Data_Error, + Marshal_Data_Error, + Maybe(Unsupported_Type_Error), +} + +Decode_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Decode_Data_Error, +} + +Encode_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Encode_Data_Error, +} + +Decode_Data_Error :: enum { + None, + Bad_Major, // An invalid major type was encountered. + Bad_Argument, // A general unexpected value (most likely invalid additional info in header). + Bad_Tag_Value, // When the type of value for the given tag is not valid. + Nested_Indefinite_Length, // When an streamed/indefinite length container nests another, this is not allowed. + Nested_Tag, // When a tag's value is another tag, this is not allowed. + Length_Too_Big, // When the length of a container (map, array, bytes, string) is more than `max(int)`. + Break, +} + +Encode_Data_Error :: enum { + None, + Invalid_Simple, // When a simple is being encoded that is out of the range `0..=19` and `32..=max(u8)`. + Int_Too_Big, // When an int is being encoded that is larger than `max(u64)` or smaller than `min(u64)`. + Bad_Tag_Value, // When the type of value is not supported by the tag implementation. +} + +Unmarshal_Data_Error :: enum { + None, + Invalid_Parameter, // When the given `any` can not be unmarshalled into. + Non_Pointer_Parameter, // When the given `any` is not a pointer. +} + +Marshal_Data_Error :: enum { + None, + Invalid_CBOR_Tag, // When the struct tag `cbor_tag:""` is not a registered name or number. +} + +// Error that is returned when a type couldn't be marshalled into or out of, as much information +// as possible/available is added. +Unsupported_Type_Error :: struct { + id: typeid, + hdr: Header, + add: Add, +} + +_unsupported :: proc(v: any, hdr: Header, add: Add = nil) -> Maybe(Unsupported_Type_Error) { + return Unsupported_Type_Error{ + id = v.id, + hdr = hdr, + add = add, + } +} + +// Actual value is `-1 - x` (be careful of overflows). + +Negative_U8 :: distinct u8 +Negative_U16 :: distinct u16 +Negative_U32 :: distinct u32 +Negative_U64 :: distinct u64 + +// Turns the CBOR negative unsigned int type into a signed integer type. +negative_to_int :: proc { + negative_u8_to_int, + negative_u16_to_int, + negative_u32_to_int, + negative_u64_to_int, +} + +negative_u8_to_int :: #force_inline proc(u: Negative_U8) -> i16 { + return -1 - i16(u) +} + +negative_u16_to_int :: #force_inline proc(u: Negative_U16) -> i32 { + return -1 - i32(u) +} + +negative_u32_to_int :: #force_inline proc(u: Negative_U32) -> i64 { + return -1 - i64(u) +} + +negative_u64_to_int :: #force_inline proc(u: Negative_U64) -> i128 { + return -1 - i128(u) +} + +// Utility for converting between the different errors when they are subsets of the other. +err_conv :: proc { + encode_to_marshal_err, + decode_to_unmarshal_err, + decode_to_unmarshal_err_p, + decode_to_unmarshal_err_p2, +} + +encode_to_marshal_err :: #force_inline proc(err: Encode_Error) -> Marshal_Error { + switch e in err { + case nil: return nil + case io.Error: return e + case mem.Allocator_Error: return e + case Encode_Data_Error: return e + case: return nil + } +} + +decode_to_unmarshal_err :: #force_inline proc(err: Decode_Error) -> Unmarshal_Error { + switch e in err { + case nil: return nil + case io.Error: return e + case mem.Allocator_Error: return e + case Decode_Data_Error: return e + case: return nil + } +} + +decode_to_unmarshal_err_p :: #force_inline proc(v: $T, err: Decode_Error) -> (T, Unmarshal_Error) { + return v, err_conv(err) +} + +decode_to_unmarshal_err_p2 :: #force_inline proc(v: $T, v2: $T2, err: Decode_Error) -> (T, T2, Unmarshal_Error) { + return v, v2, err_conv(err) +} + +// Recursively frees all memory allocated when decoding the passed value. +destroy :: proc(val: Value, allocator := context.allocator) { + context.allocator = allocator + #partial switch v in val { + case ^Map: + if v == nil { return } + for entry in v { + destroy(entry.key) + destroy(entry.value) + } + delete(v^) + free(v) + case ^Array: + if v == nil { return } + for entry in v { + destroy(entry) + } + delete(v^) + free(v) + case ^Text: + if v == nil { return } + delete(v^) + free(v) + case ^Bytes: + if v == nil { return } + delete(v^) + free(v) + case ^Tag: + if v == nil { return } + destroy(v.value) + free(v) + } +} + +/* +diagnose either writes or returns a human-readable representation of the value, +optionally formatted, defined as the diagnostic format in section 8 of RFC 8949. + +Incidentally, if the CBOR does not contain any of the additional types defined on top of JSON +this will also be valid JSON. +*/ +diagnose :: proc { + diagnostic_string, + diagnose_to_writer, +} + +// Turns the given CBOR value into a human-readable string. +// See docs on the proc group `diagnose` for more info. +diagnostic_string :: proc(val: Value, padding := 0, allocator := context.allocator) -> (string, mem.Allocator_Error) #optional_allocator_error { + b := strings.builder_make(allocator) + w := strings.to_stream(&b) + err := diagnose_to_writer(w, val, padding) + if err == .EOF { + // The string builder stream only returns .EOF, and only if it can't write (out of memory). + return "", .Out_Of_Memory + } + assert(err == nil) + + return strings.to_string(b), nil +} + +// Writes the given CBOR value into the writer as human-readable text. +// See docs on the proc group `diagnose` for more info. +diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { + @(require_results) + indent :: proc(padding: int) -> int { + padding := padding + if padding != -1 { + padding += 1 + } + return padding + } + + @(require_results) + dedent :: proc(padding: int) -> int { + padding := padding + if padding != -1 { + padding -= 1 + } + return padding + } + + comma :: proc(w: io.Writer, padding: int) -> io.Error { + _ = io.write_string(w, ", " if padding == -1 else ",") or_return + return nil + } + + newline :: proc(w: io.Writer, padding: int) -> io.Error { + if padding != -1 { + io.write_string(w, "\n") or_return + for _ in 0.. (Value, mem.Allocator_Error) #optional_allocator_error { + internal :: proc(val: json.Value) -> (ret: Value, err: mem.Allocator_Error) { + switch v in val { + case json.Null: return Nil{}, nil + case json.Integer: + i, major := _int_to_uint(v) + #partial switch major { + case .Unsigned: return i, nil + case .Negative: return Negative_U64(i), nil + case: unreachable() + } + case json.Float: return v, nil + case json.Boolean: return v, nil + case json.String: + container := new(Text) or_return + + // We need the string to have a nil byte at the end so we clone to cstring. + container^ = string(strings.clone_to_cstring(v) or_return) + return container, nil + case json.Array: + arr := new(Array) or_return + arr^ = make([]Value, len(v)) or_return + for _, i in arr { + arr[i] = internal(v[i]) or_return + } + return arr, nil + case json.Object: + m := new(Map) or_return + dm := make([dynamic]Map_Entry, 0, len(v)) or_return + for mkey, mval in v { + append(&dm, Map_Entry{from_json(mkey) or_return, from_json(mval) or_return}) + } + m^ = dm[:] + return m, nil + } + return nil, nil + } + + context.allocator = allocator + return internal(val) +} + +/* +Converts from CBOR to JSON. + +NOTE: overflow on integers or floats is not handled. + +Everything is copied to the given allocator, the passed in CBOR value can be `destroy`'ed after. + +If a CBOR map with non-string keys is encountered it is turned into an array of tuples. +*/ +to_json :: proc(val: Value, allocator := context.allocator) -> (json.Value, mem.Allocator_Error) #optional_allocator_error { + internal :: proc(val: Value) -> (ret: json.Value, err: mem.Allocator_Error) { + switch v in val { + case Simple: return json.Integer(v), nil + + case u8: return json.Integer(v), nil + case u16: return json.Integer(v), nil + case u32: return json.Integer(v), nil + case u64: return json.Integer(v), nil + + case Negative_U8: return json.Integer(negative_to_int(v)), nil + case Negative_U16: return json.Integer(negative_to_int(v)), nil + case Negative_U32: return json.Integer(negative_to_int(v)), nil + case Negative_U64: return json.Integer(negative_to_int(v)), nil + + case f16: return json.Float(v), nil + case f32: return json.Float(v), nil + case f64: return json.Float(v), nil + + case bool: return json.Boolean(v), nil + + case Undefined: return json.Null{}, nil + case Nil: return json.Null{}, nil + + case ^Bytes: return json.String(strings.clone(string(v^)) or_return), nil + case ^Text: return json.String(strings.clone(v^) or_return), nil + + case ^Map: + keys_all_strings :: proc(m: ^Map) -> bool { + for entry in m { + #partial switch kv in entry.key { + case ^Bytes: + case ^Text: + case: return false + } + } + return false + } + + if keys_all_strings(v) { + obj := make(json.Object, len(v)) or_return + for entry in v { + k: string + #partial switch kv in entry.key { + case ^Bytes: k = string(kv^) + case ^Text: k = kv^ + case: unreachable() + } + + v := internal(entry.value) or_return + obj[k] = v + } + return obj, nil + } else { + // Resort to an array of tuples if keys aren't all strings. + arr := make(json.Array, 0, len(v)) or_return + for entry in v { + entry_arr := make(json.Array, 0, 2) or_return + append(&entry_arr, internal(entry.key) or_return) or_return + append(&entry_arr, internal(entry.value) or_return) or_return + append(&arr, entry_arr) or_return + } + return arr, nil + } + + case ^Array: + arr := make(json.Array, 0, len(v)) or_return + for entry in v { + append(&arr, internal(entry) or_return) or_return + } + return arr, nil + + case ^Tag: + obj := make(json.Object, 2) or_return + obj[strings.clone("number") or_return] = internal(v.number) or_return + obj[strings.clone("value") or_return] = internal(v.value) or_return + return obj, nil + + case: return json.Null{}, nil + } + } + + context.allocator = allocator + return internal(val) +} + +_int_to_uint :: proc { + _i8_to_uint, + _i16_to_uint, + _i32_to_uint, + _i64_to_uint, + _i128_to_uint, +} + +_u128_to_u64 :: #force_inline proc(v: u128) -> (u64, Encode_Data_Error) { + if v > u128(max(u64)) { + return 0, .Int_Too_Big + } + + return u64(v), nil +} + +_i8_to_uint :: #force_inline proc(v: i8) -> (u: u8, m: Major) { + if v < 0 { + return u8(abs(v)-1), .Negative + } + + return u8(v), .Unsigned +} + +_i16_to_uint :: #force_inline proc(v: i16) -> (u: u16, m: Major) { + if v < 0 { + return u16(abs(v)-1), .Negative + } + + return u16(v), .Unsigned +} + +_i32_to_uint :: #force_inline proc(v: i32) -> (u: u32, m: Major) { + if v < 0 { + return u32(abs(v)-1), .Negative + } + + return u32(v), .Unsigned +} + +_i64_to_uint :: #force_inline proc(v: i64) -> (u: u64, m: Major) { + if v < 0 { + return u64(abs(v)-1), .Negative + } + + return u64(v), .Unsigned +} + +_i128_to_uint :: proc(v: i128) -> (u: u64, m: Major, err: Encode_Data_Error) { + if v < 0 { + m = .Negative + u, err = _u128_to_u64(u128(abs(v) - 1)) + return + } + + m = .Unsigned + u, err = _u128_to_u64(u128(v)) + return +} + +@(private) +is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool { + if ti == nil { + return false + } + t := runtime.type_info_base(ti) + #partial switch info in t.variant { + case runtime.Type_Info_Integer: + switch info.endianness { + case .Platform: return false + case .Little: return ODIN_ENDIAN != .Little + case .Big: return ODIN_ENDIAN != .Big + } + } + return false +} + diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin new file mode 100644 index 000000000..5c14d8f87 --- /dev/null +++ b/core/encoding/cbor/coding.odin @@ -0,0 +1,825 @@ +package cbor + +import "core:bytes" +import "core:encoding/endian" +import "core:intrinsics" +import "core:io" +import "core:slice" +import "core:strings" + +Encoder_Flag :: enum { + // CBOR defines a tag header that also acts as a file/binary header, + // this way decoders can check the first header of the binary and see if it is CBOR. + Self_Described_CBOR, + + // Integers are stored in the smallest integer type it fits. + // This involves checking each int against the max of all its smaller types. + Deterministic_Int_Size, + + // Floats are stored in the smallest size float type without losing precision. + // This involves casting each float down to its smaller types and checking if it changed. + Deterministic_Float_Size, + + // Sort maps by their keys in bytewise lexicographic order of their deterministic encoding. + // NOTE: In order to do this, all keys of a map have to be pre-computed, sorted, and + // then written, this involves temporary allocations for the keys and a copy of the map itself. + Deterministic_Map_Sorting, + + // Internal flag to do initialization. + _In_Progress, +} + +Encoder_Flags :: bit_set[Encoder_Flag] + +// Flags for fully deterministic output (if you are not using streaming/indeterminate length). +ENCODE_FULLY_DETERMINISTIC :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size, .Deterministic_Map_Sorting} +// Flags for the smallest encoding output. +ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} +// Flags for the fastest encoding output. +ENCODE_FAST :: Encoder_Flags{} + +Encoder :: struct { + flags: Encoder_Flags, + writer: io.Writer, +} + +/* +Decodes both deterministic and non-deterministic CBOR into a `Value` variant. + +`Text` and `Bytes` can safely be cast to cstrings because of an added 0 byte. + +Allocations are done using the given allocator, +*no* allocations are done on the `context.temp_allocator`. + +A value can be (fully and recursively) deallocated using the `destroy` proc in this package. +*/ +decode :: proc { + decode_string, + decode_reader, +} + +// Decodes the given string as CBOR. +// See docs on the proc group `decode` for more information. +decode_string :: proc(s: string, allocator := context.allocator) -> (v: Value, err: Decode_Error) { + context.allocator = allocator + + r: strings.Reader + strings.reader_init(&r, s) + return decode(strings.reader_to_stream(&r), allocator=allocator) +} + +// Reads a CBOR value from the given reader. +// See docs on the proc group `decode` for more information. +decode_reader :: proc(r: io.Reader, hdr: Header = Header(0), allocator := context.allocator) -> (v: Value, err: Decode_Error) { + context.allocator = allocator + + hdr := hdr + if hdr == Header(0) { hdr = _decode_header(r) or_return } + switch hdr { + case .U8: return _decode_u8 (r) + case .U16: return _decode_u16(r) + case .U32: return _decode_u32(r) + case .U64: return _decode_u64(r) + + case .Neg_U8: return Negative_U8 (_decode_u8 (r) or_return), nil + case .Neg_U16: return Negative_U16(_decode_u16(r) or_return), nil + case .Neg_U32: return Negative_U32(_decode_u32(r) or_return), nil + case .Neg_U64: return Negative_U64(_decode_u64(r) or_return), nil + + case .Simple: return _decode_simple(r) + + case .F16: return _decode_f16(r) + case .F32: return _decode_f32(r) + case .F64: return _decode_f64(r) + + case .True: return true, nil + case .False: return false, nil + + case .Nil: return Nil{}, nil + case .Undefined: return Undefined{}, nil + + case .Break: return nil, .Break + } + + maj, add := _header_split(hdr) + switch maj { + case .Unsigned: return _decode_tiny_u8(add) + case .Negative: return Negative_U8(_decode_tiny_u8(add) or_return), nil + case .Bytes: return _decode_bytes_ptr(r, add) + case .Text: return _decode_text_ptr(r, add) + case .Array: return _decode_array_ptr(r, add) + case .Map: return _decode_map_ptr(r, add) + case .Tag: return _decode_tag_ptr(r, add) + case .Other: return _decode_tiny_simple(add) + case: return nil, .Bad_Major + } +} + +/* +Encodes the CBOR value into a binary CBOR. + +Flags can be used to control the output (mainly determinism, which coincidently affects size). + +The default flags `ENCODE_SMALL` (`.Deterministic_Int_Size`, `.Deterministic_Float_Size`) will try +to put ints and floats into their smallest possible byte size without losing equality. + +Adding the `.Self_Described_CBOR` flag will wrap the value in a tag that lets generic decoders know +the contents are CBOR from just reading the first byte. + +Adding the `.Deterministic_Map_Sorting` flag will sort the encoded maps by the byte content of the +encoded key. This flag has a cost on performance and memory efficiency because all keys in a map +have to be precomputed, sorted and only then written to the output. + +Empty flags will do nothing extra to the value. + +The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +but are followed by the necessary `delete` and `free` calls if the allocator supports them. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary +allocations until the end. +*/ +encode_into :: proc { + encode_into_bytes, + encode_into_builder, + encode_into_writer, + encode_into_encoder, +} +encode :: encode_into + +// Encodes the CBOR value into binary CBOR allocated on the given allocator. +// See the docs on the proc group `encode_into` for more info. +encode_into_bytes :: proc(v: Value, flags := ENCODE_SMALL, allocator := context.allocator) -> (data: []byte, err: Encode_Error) { + b := strings.builder_make(allocator) or_return + encode_into_builder(&b, v, flags) or_return + return b.buf[:], nil +} + +// Encodes the CBOR value into binary CBOR written to the given builder. +// See the docs on the proc group `encode_into` for more info. +encode_into_builder :: proc(b: ^strings.Builder, v: Value, flags := ENCODE_SMALL) -> Encode_Error { + return encode_into_writer(strings.to_stream(b), v, flags) +} + +// Encodes the CBOR value into binary CBOR written to the given writer. +// See the docs on the proc group `encode_into` for more info. +encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Encode_Error { + return encode_into_encoder(Encoder{flags, w}, v) +} + +// Encodes the CBOR value into binary CBOR written to the given encoder. +// See the docs on the proc group `encode_into` for more info. +encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { + e := e + + outer: bool + defer if outer { + e.flags &~= {._In_Progress} + } + + if ._In_Progress not_in e.flags { + outer = true + e.flags |= {._In_Progress} + + if .Self_Described_CBOR in e.flags { + _encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return + } + } + + switch v_spec in v { + case u8: return _encode_u8(e.writer, v_spec, .Unsigned) + case u16: return _encode_u16(e, v_spec, .Unsigned) + case u32: return _encode_u32(e, v_spec, .Unsigned) + case u64: return _encode_u64(e, v_spec, .Unsigned) + case Negative_U8: return _encode_u8(e.writer, u8(v_spec), .Negative) + case Negative_U16: return _encode_u16(e, u16(v_spec), .Negative) + case Negative_U32: return _encode_u32(e, u32(v_spec), .Negative) + case Negative_U64: return _encode_u64(e, u64(v_spec), .Negative) + case ^Bytes: return _encode_bytes(e, v_spec^) + case ^Text: return _encode_text(e, v_spec^) + case ^Array: return _encode_array(e, v_spec^) + case ^Map: return _encode_map(e, v_spec^) + case ^Tag: return _encode_tag(e, v_spec^) + case Simple: return _encode_simple(e.writer, v_spec) + case f16: return _encode_f16(e.writer, v_spec) + case f32: return _encode_f32(e, v_spec) + case f64: return _encode_f64(e, v_spec) + case bool: return _encode_bool(e.writer, v_spec) + case Nil: return _encode_nil(e.writer) + case Undefined: return _encode_undefined(e.writer) + case: return nil + } +} + +_decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { + buf: [1]byte + io.read_full(r, buf[:]) or_return + return Header(buf[0]), nil +} + +_header_split :: proc(hdr: Header) -> (Major, Add) { + return Major(u8(hdr) >> 5), Add(u8(hdr) & 0x1f) +} + +_decode_u8 :: proc(r: io.Reader) -> (v: u8, err: io.Error) { + byte: [1]byte + io.read_full(r, byte[:]) or_return + return byte[0], nil +} + +_encode_uint :: proc { + _encode_u8, + _encode_u16, + _encode_u32, + _encode_u64, +} + +_encode_u8 :: proc(w: io.Writer, v: u8, major: Major = .Unsigned) -> (err: io.Error) { + header := u8(major) << 5 + if v < u8(Add.One_Byte) { + header |= v + _, err = io.write_full(w, {header}) + return + } + + header |= u8(Add.One_Byte) + _, err = io.write_full(w, {header, v}) + return +} + +_decode_tiny_u8 :: proc(additional: Add) -> (u8, Decode_Data_Error) { + if intrinsics.expect(additional < .One_Byte, true) { + return u8(additional), nil + } + + return 0, .Bad_Argument +} + +_decode_u16 :: proc(r: io.Reader) -> (v: u16, err: io.Error) { + bytes: [2]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u16be(bytes[:]), nil +} + +_encode_u16 :: proc(e: Encoder, v: u16, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u16_exact(e.writer, v, major) +} + +_encode_u16_exact :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [3]byte + bytes[0] = (u8(major) << 5) | u8(Add.Two_Bytes) + endian.unchecked_put_u16be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_u32 :: proc(r: io.Reader) -> (v: u32, err: io.Error) { + bytes: [4]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u32be(bytes[:]), nil +} + +_encode_u32 :: proc(e: Encoder, v: u32, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u32_exact(e.writer, v, major) +} + +_encode_u32_exact :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [5]byte + bytes[0] = (u8(major) << 5) | u8(Add.Four_Bytes) + endian.unchecked_put_u32be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_u64 :: proc(r: io.Reader) -> (v: u64, err: io.Error) { + bytes: [8]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u64be(bytes[:]), nil +} + +_encode_u64 :: proc(e: Encoder, v: u64, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u64_exact(e.writer, v, major) +} + +_encode_u64_exact :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [9]byte + bytes[0] = (u8(major) << 5) | u8(Add.Eight_Bytes) + endian.unchecked_put_u64be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_bytes_ptr :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: ^Bytes, err: Decode_Error) { + v = new(Bytes) or_return + defer if err != nil { free(v) } + + v^ = _decode_bytes(r, add, type) or_return + return +} + +_decode_bytes :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + + n_items := _n_items.? or_else INITIAL_STREAMED_BYTES_CAPACITY + + if length_is_unknown { + buf: strings.Builder + buf.buf = make([dynamic]byte, 0, n_items) or_return + defer if err != nil { strings.builder_destroy(&buf) } + + buf_stream := strings.to_stream(&buf) + + for { + header := _decode_header(r) or_return + maj, add := _header_split(header) + + #partial switch maj { + case type: + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + if length_is_unknown { + return nil, .Nested_Indefinite_Length + } + n_items := i64(_n_items.?) + + copied := io.copy_n(buf_stream, r, n_items) or_return + assert(copied == n_items) + + case .Other: + if add != .Break { return nil, .Bad_Argument } + + v = buf.buf[:] + + // Write zero byte so this can be converted to cstring. + io.write_full(buf_stream, {0}) or_return + shrink(&buf.buf) // Ignoring error, this is not critical to succeed. + return + + case: + return nil, .Bad_Major + } + } + } else { + v = make([]byte, n_items + 1) or_return // Space for the bytes and a zero byte. + defer if err != nil { delete(v) } + + io.read_full(r, v[:n_items]) or_return + + v = v[:n_items] // Take off zero byte. + return + } +} + +_encode_bytes :: proc(e: Encoder, val: Bytes, major: Major = .Bytes) -> (err: Encode_Error) { + assert(len(val) >= 0) + _encode_u64(e, u64(len(val)), major) or_return + _, err = io.write_full(e.writer, val[:]) + return +} + +_decode_text_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Text, err: Decode_Error) { + v = new(Text) or_return + defer if err != nil { free(v) } + + v^ = _decode_text(r, add) or_return + return +} + +_decode_text :: proc(r: io.Reader, add: Add) -> (v: Text, err: Decode_Error) { + return (Text)(_decode_bytes(r, add, .Text) or_return), nil +} + +_encode_text :: proc(e: Encoder, val: Text) -> Encode_Error { + return _encode_bytes(e, transmute([]byte)val, .Text) +} + +_decode_array_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Array, err: Decode_Error) { + v = new(Array) or_return + defer if err != nil { free(v) } + + v^ = _decode_array(r, add) or_return + return +} + +_decode_array :: proc(r: io.Reader, add: Add) -> (v: Array, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + array := make([dynamic]Value, 0, n_items) or_return + defer if err != nil { + for entry in array { destroy(entry) } + delete(array) + } + + for i := 0; length_is_unknown || i < n_items; i += 1 { + val, verr := decode(r) + if length_is_unknown && verr == .Break { + break + } else if verr != nil { + err = verr + return + } + + append(&array, val) or_return + } + + shrink(&array) + v = array[:] + return +} + +_encode_array :: proc(e: Encoder, arr: Array) -> Encode_Error { + assert(len(arr) >= 0) + _encode_u64(e, u64(len(arr)), .Array) + for val in arr { + encode(e, val) or_return + } + return nil +} + +_decode_map_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Map, err: Decode_Error) { + v = new(Map) or_return + defer if err != nil { free(v) } + + v^ = _decode_map(r, add) or_return + return +} + +_decode_map :: proc(r: io.Reader, add: Add) -> (v: Map, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + items := make([dynamic]Map_Entry, 0, n_items) or_return + defer if err != nil { + for entry in items { + destroy(entry.key) + destroy(entry.value) + } + delete(items) + } + + for i := 0; length_is_unknown || i < n_items; i += 1 { + key, kerr := decode(r) + if length_is_unknown && kerr == .Break { + break + } else if kerr != nil { + return nil, kerr + } + + value := decode(r) or_return + + append(&items, Map_Entry{ + key = key, + value = value, + }) or_return + } + + shrink(&items) + v = items[:] + return +} + +_encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { + assert(len(m) >= 0) + _encode_u64(e, u64(len(m)), .Map) or_return + + if .Deterministic_Map_Sorting not_in e.flags { + for entry in m { + encode(e, entry.key) or_return + encode(e, entry.value) or_return + } + return + } + + // Deterministic_Map_Sorting needs us to sort the entries by the byte contents of the + // encoded key. + // + // This means we have to store and sort them before writing incurring extra (temporary) allocations. + + Map_Entry_With_Key :: struct { + encoded_key: []byte, + entry: Map_Entry, + } + + entries := make([]Map_Entry_With_Key, len(m), context.temp_allocator) or_return + defer delete(entries, context.temp_allocator) + + for &entry, i in entries { + entry.entry = m[i] + + buf := strings.builder_make(0, 8, context.temp_allocator) or_return + + ke := e + ke.writer = strings.to_stream(&buf) + + encode(ke, entry.entry.key) or_return + entry.encoded_key = buf.buf[:] + } + + // Sort lexicographic on the bytes of the key. + slice.sort_by_cmp(entries, proc(a, b: Map_Entry_With_Key) -> slice.Ordering { + return slice.Ordering(bytes.compare(a.encoded_key, b.encoded_key)) + }) + + for entry in entries { + io.write_full(e.writer, entry.encoded_key) or_return + delete(entry.encoded_key, context.temp_allocator) + + encode(e, entry.entry.value) or_return + } + + return nil +} + +_decode_tag_ptr :: proc(r: io.Reader, add: Add) -> (v: Value, err: Decode_Error) { + tag := _decode_tag(r, add) or_return + if t, ok := tag.?; ok { + defer if err != nil { destroy(t.value) } + tp := new(Tag) or_return + tp^ = t + return tp, nil + } + + // no error, no tag, this was the self described CBOR tag, skip it. + return decode(r) +} + +_decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error) { + num := _decode_tag_nr(r, add) or_return + + // CBOR can be wrapped in a tag that decoders can use to see/check if the binary data is CBOR. + // We can ignore it here. + if num == TAG_SELF_DESCRIBED_CBOR { + return + } + + t := Tag{ + number = num, + value = decode(r) or_return, + } + + if nested, ok := t.value.(^Tag); ok { + destroy(nested) + return nil, .Nested_Tag + } + + return t, nil +} + +_decode_tag_nr :: proc(r: io.Reader, add: Add) -> (nr: Tag_Number, err: Decode_Error) { + #partial switch add { + case .One_Byte: return u64(_decode_u8(r) or_return), nil + case .Two_Bytes: return u64(_decode_u16(r) or_return), nil + case .Four_Bytes: return u64(_decode_u32(r) or_return), nil + case .Eight_Bytes: return u64(_decode_u64(r) or_return), nil + case: return u64(_decode_tiny_u8(add) or_return), nil + } +} + +_encode_tag :: proc(e: Encoder, val: Tag) -> Encode_Error { + _encode_u64(e, val.number, .Tag) or_return + return encode(e, val.value) +} + +_decode_simple :: proc(r: io.Reader) -> (v: Simple, err: io.Error) { + buf: [1]byte + io.read_full(r, buf[:]) or_return + return Simple(buf[0]), nil +} + +_encode_simple :: proc(w: io.Writer, v: Simple) -> (err: Encode_Error) { + header := u8(Major.Other) << 5 + + if v < Simple(Add.False) { + header |= u8(v) + _, err = io.write_full(w, {header}) + return + } else if v <= Simple(Add.Break) { + return .Invalid_Simple + } + + header |= u8(Add.One_Byte) + _, err = io.write_full(w, {header, u8(v)}) + return +} + +_decode_tiny_simple :: proc(add: Add) -> (Simple, Decode_Data_Error) { + if add < Add.False { + return Simple(add), nil + } + + return 0, .Bad_Argument +} + +_decode_f16 :: proc(r: io.Reader) -> (v: f16, err: io.Error) { + bytes: [2]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u16be(bytes[:]) + return transmute(f16)n, nil +} + +_encode_f16 :: proc(w: io.Writer, v: f16) -> (err: io.Error) { + bytes: [3]byte + bytes[0] = u8(Header.F16) + endian.unchecked_put_u16be(bytes[1:], transmute(u16)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_f32 :: proc(r: io.Reader) -> (v: f32, err: io.Error) { + bytes: [4]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u32be(bytes[:]) + return transmute(f32)n, nil +} + +_encode_f32 :: proc(e: Encoder, v: f32) -> io.Error { + if .Deterministic_Float_Size in e.flags { + return _encode_deterministic_float(e.writer, v) + } + return _encode_f32_exact(e.writer, v) +} + +_encode_f32_exact :: proc(w: io.Writer, v: f32) -> (err: io.Error) { + bytes: [5]byte + bytes[0] = u8(Header.F32) + endian.unchecked_put_u32be(bytes[1:], transmute(u32)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_f64 :: proc(r: io.Reader) -> (v: f64, err: io.Error) { + bytes: [8]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u64be(bytes[:]) + return transmute(f64)n, nil +} + +_encode_f64 :: proc(e: Encoder, v: f64) -> io.Error { + if .Deterministic_Float_Size in e.flags { + return _encode_deterministic_float(e.writer, v) + } + return _encode_f64_exact(e.writer, v) +} + +_encode_f64_exact :: proc(w: io.Writer, v: f64) -> (err: io.Error) { + bytes: [9]byte + bytes[0] = u8(Header.F64) + endian.unchecked_put_u64be(bytes[1:], transmute(u64)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_encode_bool :: proc(w: io.Writer, v: bool) -> (err: io.Error) { + switch v { + case true: _, err = io.write_full(w, {u8(Header.True )}); return + case false: _, err = io.write_full(w, {u8(Header.False)}); return + case: unreachable() + } +} + +_encode_undefined :: proc(w: io.Writer) -> io.Error { + _, err := io.write_full(w, {u8(Header.Undefined)}) + return err +} + +_encode_nil :: proc(w: io.Writer) -> io.Error { + _, err := io.write_full(w, {u8(Header.Nil)}) + return err +} + +// Streaming + +encode_stream_begin :: proc(w: io.Writer, major: Major) -> (err: io.Error) { + assert(major >= Major(.Bytes) && major <= Major(.Map), "illegal stream type") + + header := (u8(major) << 5) | u8(Add.Length_Unknown) + _, err = io.write_full(w, {header}) + return +} + +encode_stream_end :: proc(w: io.Writer) -> io.Error { + header := (u8(Major.Other) << 5) | u8(Add.Break) + _, err := io.write_full(w, {header}) + return err +} + +encode_stream_bytes :: _encode_bytes +encode_stream_text :: _encode_text +encode_stream_array_item :: encode + +encode_stream_map_entry :: proc(e: Encoder, key: Value, val: Value) -> Encode_Error { + encode(e, key) or_return + return encode(e, val) +} + +// + +_decode_container_length :: proc(r: io.Reader, add: Add) -> (length: Maybe(int), is_unknown: bool, err: Decode_Error) { + if add == Add.Length_Unknown { return nil, true, nil } + #partial switch add { + case .One_Byte: length = int(_decode_u8(r) or_return) + case .Two_Bytes: length = int(_decode_u16(r) or_return) + case .Four_Bytes: + big_length := _decode_u32(r) or_return + if u64(big_length) > u64(max(int)) { + err = .Length_Too_Big + return + } + length = int(big_length) + case .Eight_Bytes: + big_length := _decode_u64(r) or_return + if big_length > u64(max(int)) { + err = .Length_Too_Big + return + } + length = int(big_length) + case: + length = int(_decode_tiny_u8(add) or_return) + } + return +} + +// Deterministic encoding is (among other things) encoding all values into their smallest +// possible representation. +// See section 4 of RFC 8949. + +_encode_deterministic_uint :: proc { + _encode_u8, + _encode_deterministic_u16, + _encode_deterministic_u32, + _encode_deterministic_u64, + _encode_deterministic_u128, +} + +_encode_deterministic_u16 :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u16(max(u8)): return _encode_u8(w, u8(v), major) + case: return _encode_u16_exact(w, v, major) + } +} + +_encode_deterministic_u32 :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u32(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u32(max(u16)): return _encode_u16_exact(w, u16(v), major) + case: return _encode_u32_exact(w, u32(v), major) + } +} + +_encode_deterministic_u64 :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u64(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u64(max(u16)): return _encode_u16_exact(w, u16(v), major) + case v <= u64(max(u32)): return _encode_u32_exact(w, u32(v), major) + case: return _encode_u64_exact(w, u64(v), major) + } +} + +_encode_deterministic_u128 :: proc(w: io.Writer, v: u128, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u128(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u128(max(u16)): return _encode_u16_exact(w, u16(v), major) + case v <= u128(max(u32)): return _encode_u32_exact(w, u32(v), major) + case v <= u128(max(u64)): return _encode_u64_exact(w, u64(v), major) + case: return .Int_Too_Big + } +} + +_encode_deterministic_negative :: #force_inline proc(w: io.Writer, v: $T) -> Encode_Error + where T == Negative_U8 || T == Negative_U16 || T == Negative_U32 || T == Negative_U64 { + return _encode_deterministic_uint(w, v, .Negative) +} + +// A Deterministic float is a float in the smallest type that stays the same after down casting. +_encode_deterministic_float :: proc { + _encode_f16, + _encode_deterministic_f32, + _encode_deterministic_f64, +} + +_encode_deterministic_f32 :: proc(w: io.Writer, v: f32) -> io.Error { + if (f32(f16(v)) == v) { + return _encode_f16(w, f16(v)) + } + + return _encode_f32_exact(w, v) +} + +_encode_deterministic_f64 :: proc(w: io.Writer, v: f64) -> io.Error { + if (f64(f16(v)) == v) { + return _encode_f16(w, f16(v)) + } + + if (f64(f32(v)) == v) { + return _encode_f32_exact(w, f32(v)) + } + + return _encode_f64_exact(w, v) +} diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin new file mode 100644 index 000000000..aab2defb2 --- /dev/null +++ b/core/encoding/cbor/marshal.odin @@ -0,0 +1,541 @@ +package cbor + +import "core:bytes" +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:slice" +import "core:strconv" +import "core:strings" +import "core:unicode/utf8" + +/* +Marshal a value into binary CBOR. + +Flags can be used to control the output (mainly determinism, which coincidently affects size). + +The default flags `ENCODE_SMALL` (`.Deterministic_Int_Size`, `.Deterministic_Float_Size`) will try +to put ints and floats into their smallest possible byte size without losing equality. + +Adding the `.Self_Described_CBOR` flag will wrap the value in a tag that lets generic decoders know +the contents are CBOR from just reading the first byte. + +Adding the `.Deterministic_Map_Sorting` flag will sort the encoded maps by the byte content of the +encoded key. This flag has a cost on performance and memory efficiency because all keys in a map +have to be precomputed, sorted and only then written to the output. + +Empty flags will do nothing extra to the value. + +The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +but are followed by the necessary `delete` and `free` calls if the allocator supports them. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary +allocations until the end. +*/ +marshal_into :: proc { + marshal_into_bytes, + marshal_into_builder, + marshal_into_writer, + marshal_into_encoder, +} + +marshal :: marshal_into + +// Marshals the given value into a CBOR byte stream (allocated using the given allocator). +// See docs on the `marshal_into` proc group for more info. +marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.allocator) -> (bytes: []byte, err: Marshal_Error) { + b, alloc_err := strings.builder_make(allocator) + // The builder as a stream also returns .EOF if it ran out of memory so this is consistent. + if alloc_err != nil { + return nil, .EOF + } + + defer if err != nil { strings.builder_destroy(&b) } + + if err = marshal_into_builder(&b, v, flags); err != nil { + return + } + + return b.buf[:], nil +} + +// Marshals the given value into a CBOR byte stream written to the given builder. +// See docs on the `marshal_into` proc group for more info. +marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL) -> Marshal_Error { + return marshal_into_writer(strings.to_writer(b), v, flags) +} + +// Marshals the given value into a CBOR byte stream written to the given writer. +// See docs on the `marshal_into` proc group for more info. +marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Marshal_Error { + encoder := Encoder{flags, w} + return marshal_into_encoder(encoder, v) +} + +// Marshals the given value into a CBOR byte stream written to the given encoder. +// See docs on the `marshal_into` proc group for more info. +marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { + e := e + + init: bool + defer if init { + e.flags &~= {._In_Progress} + } + + // If not in progress we do initialization and set in progress. + if ._In_Progress not_in e.flags { + init = true + e.flags |= {._In_Progress} + + if .Self_Described_CBOR in e.flags { + err_conv(_encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag)) or_return + } + } + + if v == nil { + return _encode_nil(e.writer) + } + + // Check if type has a tag implementation to use. + if impl, ok := _tag_implementations_type[v.id]; ok { + return impl->marshal(e, v) + } + + ti := runtime.type_info_base(type_info_of(v.id)) + a := any{v.data, ti.id} + + #partial switch info in ti.variant { + case runtime.Type_Info_Named: + unreachable() + + case runtime.Type_Info_Pointer: + switch vv in v { + case Undefined: return _encode_undefined(e.writer) + case Nil: return _encode_nil(e.writer) + } + + case runtime.Type_Info_Integer: + switch vv in v { + case Simple: return err_conv(_encode_simple(e.writer, vv)) + case Negative_U8: return _encode_u8(e.writer, u8(vv), .Negative) + case Negative_U16: return err_conv(_encode_u16(e, u16(vv), .Negative)) + case Negative_U32: return err_conv(_encode_u32(e, u32(vv), .Negative)) + case Negative_U64: return err_conv(_encode_u64(e, u64(vv), .Negative)) + } + + switch i in a { + case i8: return _encode_uint(e.writer, _int_to_uint(i)) + case i16: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i32: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i64: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i128: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + case int: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + + case u8: return _encode_uint(e.writer, i) + case u16: return err_conv(_encode_uint(e, i)) + case u32: return err_conv(_encode_uint(e, i)) + case u64: return err_conv(_encode_uint(e, i)) + case u128: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + case uint: return err_conv(_encode_uint(e, u64(i))) + case uintptr: return err_conv(_encode_uint(e, u64(i))) + + case i16le: return err_conv(_encode_uint(e, _int_to_uint(i16(i)))) + case i32le: return err_conv(_encode_uint(e, _int_to_uint(i32(i)))) + case i64le: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + case i128le: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + + case u16le: return err_conv(_encode_uint(e, u16(i))) + case u32le: return err_conv(_encode_uint(e, u32(i))) + case u64le: return err_conv(_encode_uint(e, u64(i))) + case u128le: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + + case i16be: return err_conv(_encode_uint(e, _int_to_uint(i16(i)))) + case i32be: return err_conv(_encode_uint(e, _int_to_uint(i32(i)))) + case i64be: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + case i128be: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + + case u16be: return err_conv(_encode_uint(e, u16(i))) + case u32be: return err_conv(_encode_uint(e, u32(i))) + case u64be: return err_conv(_encode_uint(e, u64(i))) + case u128be: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + } + + case runtime.Type_Info_Rune: + buf, w := utf8.encode_rune(a.(rune)) + return err_conv(_encode_text(e, string(buf[:w]))) + + case runtime.Type_Info_Float: + switch f in a { + case f16: return _encode_f16(e.writer, f) + case f32: return _encode_f32(e, f) + case f64: return _encode_f64(e, f) + + case f16le: return _encode_f16(e.writer, f16(f)) + case f32le: return _encode_f32(e, f32(f)) + case f64le: return _encode_f64(e, f64(f)) + + case f16be: return _encode_f16(e.writer, f16(f)) + case f32be: return _encode_f32(e, f32(f)) + case f64be: return _encode_f64(e, f64(f)) + } + + case runtime.Type_Info_Complex: + switch z in a { + case complex32: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + case complex64: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + case complex128: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + } + + case runtime.Type_Info_Quaternion: + switch q in a { + case quaternion64: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + case quaternion128: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + case quaternion256: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + } + + case runtime.Type_Info_String: + switch s in a { + case string: return err_conv(_encode_text(e, s)) + case cstring: return err_conv(_encode_text(e, string(s))) + } + + case runtime.Type_Info_Boolean: + val: bool + switch b in a { + case bool: return _encode_bool(e.writer, b) + case b8: return _encode_bool(e.writer, bool(b)) + case b16: return _encode_bool(e.writer, bool(b)) + case b32: return _encode_bool(e.writer, bool(b)) + case b64: return _encode_bool(e.writer, bool(b)) + } + + case runtime.Type_Info_Array: + if info.elem.id == byte { + raw := ([^]byte)(v.data) + return err_conv(_encode_bytes(e, raw[:info.count])) + } + + err_conv(_encode_u64(e, u64(info.count), .Array)) or_return + for i in 0.. (res: [10]byte) { + e := e + builder := strings.builder_from_slice(res[:]) + e.writer = strings.to_stream(&builder) + + assert(_encode_u64(e, u64(len(str)), .Text) == nil) + res[9] = u8(len(builder.buf)) + assert(res[9] < 10) + return + } + + Encoded_Entry_Fast :: struct($T: typeid) { + pre_key: [10]byte, + key: T, + val_idx: uintptr, + } + + Encoded_Entry :: struct { + key: ^[dynamic]byte, + val_idx: uintptr, + } + + switch info.key.id { + case string: + entries := make([dynamic]Encoded_Entry_Fast(^[]byte), 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + a, b := a, b + pre_cmp := slice.Ordering(bytes.compare(a.pre_key[:a.pre_key[9]], b.pre_key[:b.pre_key[9]])) + if pre_cmp != .Equal { + return pre_cmp + } + + return slice.Ordering(bytes.compare(a.key^, b.key^)) + }) + + for &entry in entries { + io.write_full(e.writer, entry.pre_key[:entry.pre_key[9]]) or_return + io.write_full(e.writer, entry.key^) or_return + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + + case cstring: + entries := make([dynamic]Encoded_Entry_Fast(^cstring), 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + a, b := a, b + pre_cmp := slice.Ordering(bytes.compare(a.pre_key[:a.pre_key[9]], b.pre_key[:b.pre_key[9]])) + if pre_cmp != .Equal { + return pre_cmp + } + + ab := transmute([]byte)string(a.key^) + bb := transmute([]byte)string(b.key^) + return slice.Ordering(bytes.compare(ab, bb)) + }) + + for &entry in entries { + io.write_full(e.writer, entry.pre_key[:entry.pre_key[9]]) or_return + io.write_full(e.writer, transmute([]byte)string(entry.key^)) or_return + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + + case: + entries := make([dynamic]Encoded_Entry, 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + return slice.Ordering(bytes.compare(a.key[:], b.key[:])) + }) + + for entry in entries { + io.write_full(e.writer, entry.key[:]) or_return + delete(entry.key^) + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + } + } + + case runtime.Type_Info_Struct: + switch vv in v { + case Tag: return err_conv(_encode_tag(e, vv)) + } + + err_conv(_encode_u16(e, u16(len(info.names)), .Map)) or_return + + marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, name: string, i: int) -> Marshal_Error { + err_conv(_encode_text(e, name)) or_return + + id := info.types[i].id + data := rawptr(uintptr(v.data) + info.offsets[i]) + field_any := any{data, id} + + if tag := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor_tag")); tag != "" { + if impl, ok := _tag_implementations_id[tag]; ok { + return impl->marshal(e, field_any) + } + + nr, ok := strconv.parse_u64_of_base(tag, 10) + if !ok { return .Invalid_CBOR_Tag } + + if impl, nok := _tag_implementations_nr[nr]; nok { + return impl->marshal(e, field_any) + } + + err_conv(_encode_u64(e, nr, .Tag)) or_return + } + + return marshal_into(e, field_any) + } + + field_name :: #force_inline proc(info: runtime.Type_Info_Struct, i: int) -> string { + if cbor_name := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor")); cbor_name != "" { + return cbor_name + } else { + return info.names[i] + } + } + + if .Deterministic_Map_Sorting in e.flags { + Name :: struct { + name: string, + field: int, + } + entries := make([dynamic]Name, 0, len(info.names), context.temp_allocator) or_return + defer delete(entries) + + for name, i in info.names { + append(&entries, Name{field_name(info, i), i}) or_return + } + + // Sort lexicographic on the bytes of the key. + slice.sort_by_cmp(entries[:], proc(a, b: Name) -> slice.Ordering { + return slice.Ordering(bytes.compare(transmute([]byte)a.name, transmute([]byte)b.name)) + }) + + for entry in entries { + marshal_entry(e, info, v, entry.name, entry.field) or_return + } + } else { + for name, i in info.names { + marshal_entry(e, info, v, field_name(info, i), i) or_return + } + } + return + + case runtime.Type_Info_Union: + switch vv in v { + case Value: return err_conv(encode(e, vv)) + } + + tag := reflect.get_union_variant_raw_tag(v) + if v.data == nil || tag <= 0 { + return _encode_nil(e.writer) + } + id := info.variants[tag-1].id + return marshal_into(e, any{v.data, id}) + + case runtime.Type_Info_Enum: + return marshal_into(e, any{v.data, info.base.id}) + + case runtime.Type_Info_Bit_Set: + do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying) + switch ti.size * 8 { + case 0: + return _encode_u8(e.writer, 0) + case 8: + x := (^u8)(v.data)^ + return _encode_u8(e.writer, x) + case 16: + x := (^u16)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u16(e, x)) + case 32: + x := (^u32)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u32(e, x)) + case 64: + x := (^u64)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u64(e, x)) + case: + panic("unknown bit_size size") + } + } + + return _unsupported(v.id, nil) +} diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin new file mode 100644 index 000000000..54bc7dd15 --- /dev/null +++ b/core/encoding/cbor/tags.odin @@ -0,0 +1,361 @@ +package cbor + +import "core:encoding/base64" +import "core:io" +import "core:math" +import "core:math/big" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:strings" +import "core:time" + +// Tags defined in RFC 7049 that we provide implementations for. + +// UTC time in seconds, unmarshalled into a `core:time` `time.Time` or integer. +TAG_EPOCH_TIME_NR :: 1 +TAG_EPOCH_TIME_ID :: "epoch" + +// Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. +TAG_UNSIGNED_BIG_NR :: 2 +// Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. +TAG_NEGATIVE_BIG_NR :: 3 + +// TAG_DECIMAL_FRACTION :: 4 // NOTE: We could probably implement this with `math/fixed`. + +// Sometimes it is beneficial to carry an embedded CBOR data item that is not meant to be decoded +// immediately at the time the enclosing data item is being decoded. Tag number 24 (CBOR data item) +// can be used to tag the embedded byte string as a single data item encoded in CBOR format. +TAG_CBOR_NR :: 24 +TAG_CBOR_ID :: "cbor" + +// The contents of this tag are base64 encoded during marshal and decoded during unmarshal. +TAG_BASE64_NR :: 34 +TAG_BASE64_ID :: "base64" + +// A tag that is used to detect the contents of a binary buffer (like a file) are CBOR. +// This tag would wrap everything else, decoders can then check for this header and see if the +// given content is definitely CBOR. +TAG_SELF_DESCRIBED_CBOR :: 55799 + +// A tag implementation that handles marshals and unmarshals for the tag it is registered on. +Tag_Implementation :: struct { + data: rawptr, + unmarshal: Tag_Unmarshal_Proc, + marshal: Tag_Marshal_Proc, +} + +// Procedure responsible for umarshalling the tag out of the reader into the given `any`. +Tag_Unmarshal_Proc :: #type proc(self: ^Tag_Implementation, r: io.Reader, tag_nr: Tag_Number, v: any) -> Unmarshal_Error + +// Procedure responsible for marshalling the tag in the given `any` into the given encoder. +Tag_Marshal_Proc :: #type proc(self: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error + +// When encountering a tag in the CBOR being unmarshalled, the implementation is used to unmarshal it. +// When encountering a struct tag like `cbor_tag:"Tag_Number"`, the implementation is used to marshal it. +_tag_implementations_nr: map[Tag_Number]Tag_Implementation + +// Same as the number implementations but friendlier to use as a struct tag. +// Instead of `cbor_tag:"34"` you can use `cbor_tag:"base64"`. +_tag_implementations_id: map[string]Tag_Implementation + +// Tag implementations that are always used by a type, if that type is encountered in marshal it +// will rely on the implementation to marshal it. +// +// This is good for types that don't make sense or can't marshal in its default form. +_tag_implementations_type: map[typeid]Tag_Implementation + +// Register a custom tag implementation to be used when marshalling that type and unmarshalling that tag number. +tag_register_type :: proc(impl: Tag_Implementation, nr: Tag_Number, type: typeid) { + _tag_implementations_nr[nr] = impl + _tag_implementations_type[type] = impl +} + +// Register a custom tag implementation to be used when marshalling that tag number or marshalling +// a field with the struct tag `cbor_tag:"nr"`. +tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string) { + _tag_implementations_nr[nr] = impl + _tag_implementations_id[id] = impl +} + +// Controls initialization of default tag implementations. +// JS and WASI default to a panic allocator so we don't want to do it on those. +INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, ODIN_OS != .JS && ODIN_OS != .WASI) + +@(private, init, disabled=!INITIALIZE_DEFAULT_TAGS) +tags_initialize_defaults :: proc() { + tags_register_defaults() +} + +// Registers tags that have implementations provided by this package. +// This is done by default and can be controlled with the `CBOR_INITIALIZE_DEFAULT_TAGS` define. +tags_register_defaults :: proc() { + // NOTE: Not registering this the other way around, user can opt-in using the `cbor_tag:"1"` struct + // tag instead, it would lose precision and marshalling the `time.Time` struct normally is valid. + tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID) + + // Use the struct tag `cbor_tag:"34"` to have your field encoded in a base64. + tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID) + + // Use the struct tag `cbor_tag:"24"` to keep a non-decoded field of raw CBOR. + tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID) + + // These following tags are registered at the type level and don't require an opt-in struct tag. + // Encoding these types on its own make no sense or no data is lost to encode it. + + tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_UNSIGNED_BIG_NR, big.Int) + tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_NEGATIVE_BIG_NR, big.Int) +} + +// Tag number 1 contains a numerical value counting the number of seconds from 1970-01-01T00:00Z +// in UTC time to the represented point in civil time. +// +// See RFC 8949 section 3.4.2. +@(private) +tag_time_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + #partial switch hdr { + case .U8, .U16, .U32, .U64, .Neg_U8, .Neg_U16, .Neg_U32, .Neg_U64: + switch &dst in v { + case time.Time: + i: i64 + _unmarshal_any_ptr(r, &i, hdr) or_return + dst = time.unix(i64(i), 0) + return + case: + return _unmarshal_value(r, v, hdr) + } + + case .F16, .F32, .F64: + switch &dst in v { + case time.Time: + f: f64 + _unmarshal_any_ptr(r, &f, hdr) or_return + whole, fract := math.modf(f) + dst = time.unix(i64(whole), i64(fract * 1e9)) + return + case: + return _unmarshal_value(r, v, hdr) + } + + case: + maj, add := _header_split(hdr) + if maj == .Other { + i := _decode_tiny_u8(add) or_return + + switch &dst in v { + case time.Time: + dst = time.unix(i64(i), 0) + case: + if _assign_int(v, i) { return } + } + } + + // Only numbers and floats are allowed in this tag. + return .Bad_Tag_Value + } + + return _unsupported(v, hdr) +} + +@(private) +tag_time_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + switch vv in v { + case time.Time: + // NOTE: we lose precision here, which is one of the reasons for this tag being opt-in. + i := time.time_to_unix(vv) + + _encode_u8(e.writer, TAG_EPOCH_TIME_NR, .Tag) or_return + return err_conv(_encode_uint(e, _int_to_uint(i))) + case: + unreachable() + } +} + +@(private) +tag_big_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, tnr: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + maj, add := _header_split(hdr) + if maj != .Bytes { + // Only bytes are supported in this tag. + return .Bad_Tag_Value + } + + switch &dst in v { + case big.Int: + bytes := err_conv(_decode_bytes(r, add)) or_return + defer delete(bytes) + + if err := big.int_from_bytes_big(&dst, bytes); err != nil { + return .Bad_Tag_Value + } + + if tnr == TAG_NEGATIVE_BIG_NR { + dst.sign = .Negative + } + + return + } + + return _unsupported(v, hdr) +} + +@(private) +tag_big_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + switch &vv in v { + case big.Int: + if !big.int_is_initialized(&vv) { + _encode_u8(e.writer, TAG_UNSIGNED_BIG_NR, .Tag) or_return + return _encode_u8(e.writer, 0, .Bytes) + } + + // NOTE: using the panic_allocator because all procedures should only allocate if the Int + // is uninitialized (which we checked). + + is_neg, err := big.is_negative(&vv, mem.panic_allocator()) + assert(err == nil, "only errors if not initialized, which has been checked") + + tnr: u8 = TAG_NEGATIVE_BIG_NR if is_neg else TAG_UNSIGNED_BIG_NR + _encode_u8(e.writer, tnr, .Tag) or_return + + size_in_bytes, berr := big.int_to_bytes_size(&vv, false, mem.panic_allocator()) + assert(berr == nil, "only errors if not initialized, which has been checked") + assert(size_in_bytes >= 0) + + err_conv(_encode_u64(e, u64(size_in_bytes), .Bytes)) or_return + + for offset := (size_in_bytes*8)-8; offset >= 0; offset -= 8 { + bits, derr := big.int_bitfield_extract(&vv, offset, 8, mem.panic_allocator()) + assert(derr == nil, "only errors if not initialized or invalid argument (offset and count), which won't happen") + + io.write_full(e.writer, {u8(bits & 255)}) or_return + } + return nil + + case: unreachable() + } +} + +@(private) +tag_cbor_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> Unmarshal_Error { + hdr := _decode_header(r) or_return + major, add := _header_split(hdr) + #partial switch major { + case .Bytes: + ti := reflect.type_info_base(type_info_of(v.id)) + return _unmarshal_bytes(r, v, ti, hdr, add) + + case: return .Bad_Tag_Value + } +} + +@(private) +tag_cbor_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + _encode_u8(e.writer, TAG_CBOR_NR, .Tag) or_return + ti := runtime.type_info_base(type_info_of(v.id)) + #partial switch t in ti.variant { + case runtime.Type_Info_String: + return marshal_into(e, v) + case runtime.Type_Info_Array: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case runtime.Type_Info_Slice: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case runtime.Type_Info_Dynamic_Array: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case: + return .Bad_Tag_Value + } +} + +// NOTE: this could probably be more efficient by decoding bytes from CBOR and then from base64 at the same time. +@(private) +tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + major, add := _header_split(hdr) + #partial switch major { + case .Text: + ti := reflect.type_info_base(type_info_of(v.id)) + _unmarshal_bytes(r, v, ti, hdr, add) or_return + #partial switch t in ti.variant { + case runtime.Type_Info_String: + switch t.is_cstring { + case true: + str := string((^cstring)(v.data)^) + decoded := base64.decode(str) or_return + (^cstring)(v.data)^ = strings.clone_to_cstring(string(decoded)) or_return + delete(decoded) + delete(str) + case false: + str := (^string)(v.data)^ + decoded := base64.decode(str) or_return + (^string)(v.data)^ = string(decoded) + delete(str) + } + return + + case runtime.Type_Info_Array: + raw := ([^]byte)(v.data) + decoded := base64.decode(string(raw[:t.count])) or_return + copy(raw[:t.count], decoded) + delete(decoded) + return + + case runtime.Type_Info_Slice: + raw := (^[]byte)(v.data) + decoded := base64.decode(string(raw^)) or_return + delete(raw^) + raw^ = decoded + return + + case runtime.Type_Info_Dynamic_Array: + raw := (^mem.Raw_Dynamic_Array)(v.data) + str := string(((^[dynamic]byte)(v.data)^)[:]) + + decoded := base64.decode(str) or_return + delete(str) + + raw.data = raw_data(decoded) + raw.len = len(decoded) + raw.cap = len(decoded) + return + + case: unreachable() + } + + case: return .Bad_Tag_Value + } +} + +@(private) +tag_base64_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + _encode_u8(e.writer, TAG_BASE64_NR, .Tag) or_return + + ti := runtime.type_info_base(type_info_of(v.id)) + a := any{v.data, ti.id} + + bytes: []byte + switch val in a { + case string: bytes = transmute([]byte)val + case cstring: bytes = transmute([]byte)string(val) + case []byte: bytes = val + case [dynamic]byte: bytes = val[:] + case: + #partial switch t in ti.variant { + case runtime.Type_Info_Array: + if t.elem.id != byte { return .Bad_Tag_Value } + bytes = ([^]byte)(v.data)[:t.count] + case: + return .Bad_Tag_Value + } + } + + out_len := base64.encoded_length(bytes) + err_conv(_encode_u64(e, u64(out_len), .Text)) or_return + return base64.encode_into(e.writer, bytes) +} diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin new file mode 100644 index 000000000..0da8e3f2a --- /dev/null +++ b/core/encoding/cbor/unmarshal.odin @@ -0,0 +1,832 @@ +package cbor + +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:strings" +import "core:unicode/utf8" + +// `strings` is only used in poly procs, but -vet thinks it is fully unused. +_ :: strings + +/* +Unmarshals the given CBOR into the given pointer using reflection. +Types that require allocation are allocated using the given allocator. + +Some temporary allocations are done on the `context.temp_allocator`, but, if you want to, +this can be set to a "normal" allocator, because the necessary `delete` and `free` calls are still made. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. +*/ +unmarshal :: proc { + unmarshal_from_reader, + unmarshal_from_string, +} + +// Unmarshals from a reader, see docs on the proc group `Unmarshal` for more info. +unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { + return _unmarshal_any_ptr(r, ptr, allocator=allocator) +} + +// Unmarshals from a string, see docs on the proc group `Unmarshal` for more info. +unmarshal_from_string :: proc(s: string, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { + sr: strings.Reader + r := strings.to_reader(&sr, s) + return _unmarshal_any_ptr(r, ptr, allocator=allocator) +} + +_unmarshal_any_ptr :: proc(r: io.Reader, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { + context.allocator = allocator + v := v + + if v == nil || v.id == nil { + return .Invalid_Parameter + } + + v = reflect.any_base(v) + ti := type_info_of(v.id) + if !reflect.is_pointer(ti) || ti.id == rawptr { + return .Non_Pointer_Parameter + } + + data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id} + return _unmarshal_value(r, data, hdr.? or_else (_decode_header(r) or_return)) +} + +_unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_Error) { + v := v + ti := reflect.type_info_base(type_info_of(v.id)) + + // If it's a union with only one variant, then treat it as that variant + if u, ok := ti.variant.(reflect.Type_Info_Union); ok && len(u.variants) == 1 { + #partial switch hdr { + case .Nil, .Undefined, nil: // no-op. + case: + variant := u.variants[0] + v.id = variant.id + ti = reflect.type_info_base(variant) + if !reflect.is_pointer_internally(variant) { + tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id} + assert(_assign_int(tag, 1)) + } + } + } + + // Allow generic unmarshal by doing it into a `Value`. + switch &dst in v { + case Value: + dst = err_conv(decode(r, hdr)) or_return + return + } + + switch hdr { + case .U8: + decoded := _decode_u8(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U16: + decoded := _decode_u16(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U32: + decoded := _decode_u32(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U64: + decoded := _decode_u64(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .Neg_U8: + decoded := Negative_U8(_decode_u8(r) or_return) + + switch &dst in v { + case Negative_U8: + dst = decoded + return + case Negative_U16: + dst = Negative_U16(decoded) + return + case Negative_U32: + dst = Negative_U32(decoded) + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U16: + decoded := Negative_U16(_decode_u16(r) or_return) + + switch &dst in v { + case Negative_U16: + dst = decoded + return + case Negative_U32: + dst = Negative_U32(decoded) + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U32: + decoded := Negative_U32(_decode_u32(r) or_return) + + switch &dst in v { + case Negative_U32: + dst = decoded + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U64: + decoded := Negative_U64(_decode_u64(r) or_return) + + switch &dst in v { + case Negative_U64: + dst = decoded + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Simple: + decoded := _decode_simple(r) or_return + + // NOTE: Because this is a special type and not to be treated as a general integer, + // We only put the value of it in fields that are explicitly of type `Simple`. + switch &dst in v { + case Simple: + dst = decoded + return + case: + return _unsupported(v, hdr) + } + + case .F16: + decoded := _decode_f16(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .F32: + decoded := _decode_f32(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .F64: + decoded := _decode_f64(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .True: + if !_assign_bool(v, true) { return _unsupported(v, hdr) } + return + + case .False: + if !_assign_bool(v, false) { return _unsupported(v, hdr) } + return + + case .Nil, .Undefined: + mem.zero(v.data, ti.size) + return + + case .Break: + return .Break + } + + maj, add := _header_split(hdr) + switch maj { + case .Unsigned: + decoded := _decode_tiny_u8(add) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr, add) } + return + + case .Negative: + decoded := Negative_U8(_decode_tiny_u8(add) or_return) + + switch &dst in v { + case Negative_U8: + dst = decoded + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr, add) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr, add) } + return + + case .Other: + decoded := _decode_tiny_simple(add) or_return + + // NOTE: Because this is a special type and not to be treated as a general integer, + // We only put the value of it in fields that are explicitly of type `Simple`. + switch &dst in v { + case Simple: + dst = decoded + return + case: + return _unsupported(v, hdr, add) + } + + case .Tag: + switch &dst in v { + case ^Tag: + tval := err_conv(_decode_tag_ptr(r, add)) or_return + if t, is_tag := tval.(^Tag); is_tag { + dst = t + return + } + + destroy(tval) + return .Bad_Tag_Value + case Tag: + t := err_conv(_decode_tag(r, add)) or_return + if t, is_tag := t.?; is_tag { + dst = t + return + } + + return .Bad_Tag_Value + } + + nr := err_conv(_decode_tag_nr(r, add)) or_return + + // Custom tag implementations. + if impl, ok := _tag_implementations_nr[nr]; ok { + return impl->unmarshal(r, nr, v) + } else { + // Discard the tag info and unmarshal as its value. + return _unmarshal_value(r, v, _decode_header(r) or_return) + } + + return _unsupported(v, hdr, add) + + case .Bytes: return _unmarshal_bytes(r, v, ti, hdr, add) + case .Text: return _unmarshal_string(r, v, ti, hdr, add) + case .Array: return _unmarshal_array(r, v, ti, hdr, add) + case .Map: return _unmarshal_map(r, v, ti, hdr, add) + + case: return .Bad_Major + } +} + +_unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + #partial switch t in ti.variant { + case reflect.Type_Info_String: + bytes := err_conv(_decode_bytes(r, add)) or_return + + if t.is_cstring { + raw := (^cstring)(v.data) + assert_safe_for_cstring(string(bytes)) + raw^ = cstring(raw_data(bytes)) + } else { + // String has same memory layout as a slice, so we can directly use it as a slice. + raw := (^mem.Raw_String)(v.data) + raw^ = transmute(mem.Raw_String)bytes + } + + return + + case reflect.Type_Info_Slice: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes := err_conv(_decode_bytes(r, add)) or_return + raw := (^mem.Raw_Slice)(v.data) + raw^ = transmute(mem.Raw_Slice)bytes + return + + case reflect.Type_Info_Dynamic_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes := err_conv(_decode_bytes(r, add)) or_return + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(bytes) + raw.len = len(bytes) + raw.cap = len(bytes) + raw.allocator = context.allocator + return + + case reflect.Type_Info_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes: []byte; { + context.allocator = context.temp_allocator + bytes = err_conv(_decode_bytes(r, add)) or_return + } + defer delete(bytes, context.temp_allocator) + + if len(bytes) > t.count { return _unsupported(v, hdr) } + + // Copy into array type, delete original. + slice := ([^]byte)(v.data)[:len(bytes)] + n := copy(slice, bytes) + assert(n == len(bytes)) + return + } + + return _unsupported(v, hdr) +} + +_unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + #partial switch t in ti.variant { + case reflect.Type_Info_String: + text := err_conv(_decode_text(r, add)) or_return + + if t.is_cstring { + raw := (^cstring)(v.data) + + assert_safe_for_cstring(text) + raw^ = cstring(raw_data(text)) + } else { + raw := (^string)(v.data) + raw^ = text + } + return + + // Enum by its variant name. + case reflect.Type_Info_Enum: + context.allocator = context.temp_allocator + text := err_conv(_decode_text(r, add)) or_return + defer delete(text, context.temp_allocator) + + for name, i in t.names { + if name == text { + if !_assign_int(any{v.data, ti.id}, t.values[i]) { return _unsupported(v, hdr) } + return + } + } + + case reflect.Type_Info_Rune: + context.allocator = context.temp_allocator + text := err_conv(_decode_text(r, add)) or_return + defer delete(text, context.temp_allocator) + + r := (^rune)(v.data) + dr, n := utf8.decode_rune(text) + if dr == utf8.RUNE_ERROR || n < len(text) { + return _unsupported(v, hdr) + } + + r^ = dr + return + } + + return _unsupported(v, hdr) +} + +_unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + + assign_array :: proc( + r: io.Reader, + da: ^mem.Raw_Dynamic_Array, + elemt: ^reflect.Type_Info, + _length: Maybe(int), + growable := true, + ) -> (out_of_space: bool, err: Unmarshal_Error) { + length, has_length := _length.? + for idx: uintptr = 0; !has_length || idx < uintptr(length); idx += 1 { + elem_ptr := rawptr(uintptr(da.data) + idx*uintptr(elemt.size)) + elem := any{elem_ptr, elemt.id} + + hdr := _decode_header(r) or_return + + // Double size if out of capacity. + if da.cap <= da.len { + // Not growable, error out. + if !growable { return true, .Out_Of_Memory } + + cap := 2 * da.cap + ok := runtime.__dynamic_array_reserve(da, elemt.size, elemt.align, cap) + + // NOTE: Might be lying here, but it is at least an allocator error. + if !ok { return false, .Out_Of_Memory } + } + + err = _unmarshal_value(r, elem, hdr) + if !has_length && err == .Break { break } + if err != nil { return } + + da.len += 1 + } + + return false, nil + } + + // Allow generically storing the values array. + switch &dst in v { + case ^Array: + dst = err_conv(_decode_array_ptr(r, add)) or_return + return + case Array: + dst = err_conv(_decode_array(r, add)) or_return + return + } + + #partial switch t in ti.variant { + case reflect.Type_Info_Slice: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + defer if err != nil { mem.free_bytes(data) } + + da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator } + + assign_array(r, &da, t.elem, _length) or_return + + if da.len < da.cap { + // Ignoring an error here, but this is not critical to succeed. + _ = runtime.__dynamic_array_shrink(&da, t.elem.size, t.elem.align, da.len) + } + + raw := (^mem.Raw_Slice)(v.data) + raw.data = da.data + raw.len = da.len + return + + case reflect.Type_Info_Dynamic_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + defer if err != nil { mem.free_bytes(data) } + + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(data) + raw.len = 0 + raw.cap = length + raw.allocator = context.allocator + + _ = assign_array(r, raw, t.elem, _length) or_return + return + + case reflect.Type_Info_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else t.count + + if !unknown && length > t.count { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } + + out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Enumerated_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else t.count + + if !unknown && length > t.count { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } + + out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Complex: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else 2 + + if !unknown && length > 2 { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, 2, context.allocator } + + info: ^runtime.Type_Info + switch ti.id { + case complex32: info = type_info_of(f16) + case complex64: info = type_info_of(f32) + case complex128: info = type_info_of(f64) + case: unreachable() + } + + out_of_space := assign_array(r, &da, info, 2, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Quaternion: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else 4 + + if !unknown && length > 4 { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, 4, context.allocator } + + info: ^runtime.Type_Info + switch ti.id { + case quaternion64: info = type_info_of(f16) + case quaternion128: info = type_info_of(f32) + case quaternion256: info = type_info_of(f64) + case: unreachable() + } + + out_of_space := assign_array(r, &da, info, 4, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case: return _unsupported(v, hdr) + } +} + +_unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + + decode_key :: proc(r: io.Reader, v: any) -> (k: string, err: Unmarshal_Error) { + entry_hdr := _decode_header(r) or_return + entry_maj, entry_add := _header_split(entry_hdr) + #partial switch entry_maj { + case .Text: + k = err_conv(_decode_text(r, entry_add)) or_return + return + case .Bytes: + bytes := err_conv(_decode_bytes(r, entry_add)) or_return + k = string(bytes) + return + case: + err = _unsupported(v, entry_hdr) + return + } + } + + // Allow generically storing the map array. + switch &dst in v { + case ^Map: + dst = err_conv(_decode_map_ptr(r, add)) or_return + return + case Map: + dst = err_conv(_decode_map(r, add)) or_return + return + } + + #partial switch t in ti.variant { + case reflect.Type_Info_Struct: + if t.is_raw_union { + return _unsupported(v, hdr) + } + + length, unknown := err_conv(_decode_container_length(r, add)) or_return + fields := reflect.struct_fields_zipped(ti.id) + + for idx := 0; unknown || idx < length.?; idx += 1 { + // Decode key, keys can only be strings. + key: string; { + context.allocator = context.temp_allocator + if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + break + } else if kerr != nil { + err = kerr + return + } else { + key = keyv + } + } + defer delete(key, context.temp_allocator) + + // Find matching field. + use_field_idx := -1 + { + for field, field_idx in fields { + tag_value := string(reflect.struct_tag_get(field.tag, "cbor")) + if key == tag_value { + use_field_idx = field_idx + break + } + + if key == field.name { + // No break because we want to still check remaining struct tags. + use_field_idx = field_idx + } + } + + // Skips unused map entries. + if use_field_idx < 0 { + continue + } + } + + field := fields[use_field_idx] + name := field.name + ptr := rawptr(uintptr(v.data) + field.offset) + fany := any{ptr, field.type.id} + _unmarshal_value(r, fany, _decode_header(r) or_return) or_return + } + return + + case reflect.Type_Info_Map: + if !reflect.is_string(t.key) { + return _unsupported(v, hdr) + } + + raw_map := (^mem.Raw_Map)(v.data) + if raw_map.allocator.procedure == nil { + raw_map.allocator = context.allocator + } + + defer if err != nil { + _ = runtime.map_free_dynamic(raw_map^, t.map_info) + } + + length, unknown := err_conv(_decode_container_length(r, add)) or_return + if !unknown { + // Reserve space before setting so we can return allocation errors and be efficient on big maps. + new_len := uintptr(runtime.map_len(raw_map^)+length.?) + runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return + } + + // Temporary memory to unmarshal keys into before inserting them into the map. + elem_backing := mem.alloc_bytes_non_zeroed(t.value.size, t.value.align, context.temp_allocator) or_return + defer delete(elem_backing, context.temp_allocator) + + map_backing_value := any{raw_data(elem_backing), t.value.id} + + for idx := 0; unknown || idx < length.?; idx += 1 { + // Decode key, keys can only be strings. + key: string + if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + break + } else if kerr != nil { + err = kerr + return + } else { + key = keyv + } + + if unknown { + // Reserve space for new element so we can return allocator errors. + new_len := uintptr(runtime.map_len(raw_map^)+1) + runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return + } + + mem.zero_slice(elem_backing) + _unmarshal_value(r, map_backing_value, _decode_header(r) or_return) or_return + + key_ptr := rawptr(&key) + key_cstr: cstring + if reflect.is_cstring(t.key) { + assert_safe_for_cstring(key) + key_cstr = cstring(raw_data(key)) + key_ptr = &key_cstr + } + + set_ptr := runtime.__dynamic_map_set_without_hash(raw_map, t.map_info, key_ptr, map_backing_value.data) + // We already reserved space for it, so this shouldn't fail. + assert(set_ptr != nil) + } + return + + case: + return _unsupported(v, hdr) + } +} + +_assign_int :: proc(val: any, i: $T) -> bool { + v := reflect.any_core(val) + + // NOTE: should under/over flow be checked here? `encoding/json` doesn't, but maybe that is a + // less strict encoding?. + + switch &dst in v { + case i8: dst = i8 (i) + case i16: dst = i16 (i) + case i16le: dst = i16le (i) + case i16be: dst = i16be (i) + case i32: dst = i32 (i) + case i32le: dst = i32le (i) + case i32be: dst = i32be (i) + case i64: dst = i64 (i) + case i64le: dst = i64le (i) + case i64be: dst = i64be (i) + case i128: dst = i128 (i) + case i128le: dst = i128le (i) + case i128be: dst = i128be (i) + case u8: dst = u8 (i) + case u16: dst = u16 (i) + case u16le: dst = u16le (i) + case u16be: dst = u16be (i) + case u32: dst = u32 (i) + case u32le: dst = u32le (i) + case u32be: dst = u32be (i) + case u64: dst = u64 (i) + case u64le: dst = u64le (i) + case u64be: dst = u64be (i) + case u128: dst = u128 (i) + case u128le: dst = u128le (i) + case u128be: dst = u128be (i) + case int: dst = int (i) + case uint: dst = uint (i) + case uintptr: dst = uintptr(i) + case: + ti := type_info_of(v.id) + do_byte_swap := is_bit_set_different_endian_to_platform(ti) + #partial switch info in ti.variant { + case runtime.Type_Info_Bit_Set: + switch ti.size * 8 { + case 0: + case 8: + x := (^u8)(v.data) + x^ = u8(i) + case 16: + x := (^u16)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u16(i)) : u16(i) + case 32: + x := (^u32)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u32(i)) : u32(i) + case 64: + x := (^u64)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u64(i)) : u64(i) + case: + panic("unknown bit_size size") + } + case: + return false + } + } + return true +} + +_assign_float :: proc(val: any, f: $T) -> bool { + v := reflect.any_core(val) + + // NOTE: should under/over flow be checked here? `encoding/json` doesn't, but maybe that is a + // less strict encoding?. + + switch &dst in v { + case f16: dst = f16 (f) + case f16le: dst = f16le(f) + case f16be: dst = f16be(f) + case f32: dst = f32 (f) + case f32le: dst = f32le(f) + case f32be: dst = f32be(f) + case f64: dst = f64 (f) + case f64le: dst = f64le(f) + case f64be: dst = f64be(f) + + case complex32: dst = complex(f16(f), 0) + case complex64: dst = complex(f32(f), 0) + case complex128: dst = complex(f64(f), 0) + + case quaternion64: dst = quaternion(f16(f), 0, 0, 0) + case quaternion128: dst = quaternion(f32(f), 0, 0, 0) + case quaternion256: dst = quaternion(f64(f), 0, 0, 0) + + case: return false + } + return true +} + +_assign_bool :: proc(val: any, b: bool) -> bool { + v := reflect.any_core(val) + switch &dst in v { + case bool: dst = bool(b) + case b8: dst = b8 (b) + case b16: dst = b16 (b) + case b32: dst = b32 (b) + case b64: dst = b64 (b) + case: return false + } + return true +} + +// Sanity check that the decoder added a nil byte to the end. +@(private, disabled=ODIN_DISABLE_ASSERT) +assert_safe_for_cstring :: proc(s: string, loc := #caller_location) { + assert(([^]byte)(raw_data(s))[len(s)] == 0, loc = loc) +} diff --git a/core/io/io.odin b/core/io/io.odin index ea8e240b0..961dbe43e 100644 --- a/core/io/io.odin +++ b/core/io/io.odin @@ -29,7 +29,7 @@ Error :: enum i32 { // Invalid_Write means that a write returned an impossible count Invalid_Write, - // Short_Buffer means that a read required a longer buffer than was provided + // Short_Buffer means that a read/write required a longer buffer than was provided Short_Buffer, // No_Progress is returned by some implementations of `io.Reader` when many calls @@ -359,6 +359,29 @@ read_at_least :: proc(r: Reader, buf: []byte, min: int) -> (n: int, err: Error) return } +// write_full writes until the entire contents of `buf` has been written or an error occurs. +write_full :: proc(w: Writer, buf: []byte) -> (n: int, err: Error) { + return write_at_least(w, buf, len(buf)) +} + +// write_at_least writes at least `buf[:min]` to the writer and returns the amount written. +// If an error occurs before writing everything it is returned. +write_at_least :: proc(w: Writer, buf: []byte, min: int) -> (n: int, err: Error) { + if len(buf) < min { + return 0, .Short_Buffer + } + for n < min && err == nil { + nn: int + nn, err = write(w, buf[n:]) + n += nn + } + + if err == nil && n < min { + err = .Short_Write + } + return +} + // copy copies from src to dst till either EOF is reached on src or an error occurs // It returns the number of bytes copied and the first error that occurred whilst copying, if any. copy :: proc(dst: Writer, src: Reader) -> (written: i64, err: Error) { diff --git a/core/net/common.odin b/core/net/common.odin index 2a6f44602..3cd1459a6 100644 --- a/core/net/common.odin +++ b/core/net/common.odin @@ -413,4 +413,5 @@ DNS_Record_Header :: struct #packed { DNS_Host_Entry :: struct { name: string, addr: Address, -} \ No newline at end of file +} + diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index fff344b22..22374f3b5 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -53,6 +53,7 @@ import json "core:encoding/json" import varint "core:encoding/varint" import xml "core:encoding/xml" import endian "core:encoding/endian" +import cbor "core:encoding/cbor" import fmt "core:fmt" import hash "core:hash" @@ -167,6 +168,7 @@ _ :: json _ :: varint _ :: xml _ :: endian +_ :: cbor _ :: fmt _ :: hash _ :: xxhash diff --git a/tests/core/Makefile b/tests/core/Makefile index 1207eeec5..1fca7bf97 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -55,6 +55,7 @@ encoding_test: $(ODIN) run encoding/json $(COMMON) -out:test_json $(ODIN) run encoding/varint $(COMMON) -out:test_varint $(ODIN) run encoding/xml $(COMMON) -out:test_xml + $(ODIN) run encoding/cbor $(COMMON) -out:test_cbor math_test: $(ODIN) run math $(COMMON) $(COLLECTION) -out:test_core_math diff --git a/tests/core/build.bat b/tests/core/build.bat index d5f528f0c..5bf8e1ead 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -40,6 +40,7 @@ rem %PATH_TO_ODIN% run encoding/hxa %COMMON% %COLLECTION% -out:test_hxa.exe | %PATH_TO_ODIN% run encoding/json %COMMON% -out:test_json.exe || exit /b %PATH_TO_ODIN% run encoding/varint %COMMON% -out:test_varint.exe || exit /b %PATH_TO_ODIN% run encoding/xml %COMMON% -out:test_xml.exe || exit /b +%PATH_TO_ODIN% test encoding/cbor %COMMON% -out:test_cbor.exe || exit /b echo --- echo Running core:math/noise tests diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin new file mode 100644 index 000000000..22359d830 --- /dev/null +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -0,0 +1,719 @@ +package test_encoding_cbor + +import "core:bytes" +import "core:encoding/cbor" +import "core:fmt" +import "core:intrinsics" +import "core:math/big" +import "core:mem" +import "core:reflect" +import "core:testing" +import "core:time" + +Foo :: struct { + str: string, + cstr: cstring, + value: cbor.Value, + neg: cbor.Negative_U16, + pos: u16, + iamint: int, + base64: string `cbor_tag:"base64"`, + renamed: f32 `cbor:"renamed :)"`, + now: time.Time `cbor_tag:"1"`, + nowie: time.Time, + child: struct{ + dyn: [dynamic]string, + mappy: map[string]int, + my_integers: [10]int, + }, + my_bytes: []byte, + ennie: FooBar, + ennieb: FooBars, + quat: quaternion64, + comp: complex128, + important: rune, + no: cbor.Nil, + nos: cbor.Undefined, + yes: b32, + biggie: u64, + smallie: cbor.Negative_U64, + onetwenty: i128, + small_onetwenty: i128, + biggest: big.Int, + smallest: big.Int, +} + +FooBar :: enum { + EFoo, + EBar, +} + +FooBars :: bit_set[FooBar; u16] + +@(test) +test_marshalling :: proc(t: ^testing.T) { + tracker: mem.Tracking_Allocator + mem.tracking_allocator_init(&tracker, context.allocator) + context.allocator = mem.tracking_allocator(&tracker) + context.temp_allocator = context.allocator + defer mem.tracking_allocator_destroy(&tracker) + + ev :: testing.expect_value + + { + nice := "16 is a nice number" + now := time.Time{_nsec = 1701117968 * 1e9} + f: Foo = { + str = "Hellope", + cstr = "Hellnope", + value = &cbor.Map{{u8(16), &nice}, {u8(32), u8(69)}}, + neg = 68, + pos = 1212, + iamint = -256, + base64 = nice, + renamed = 123123.125, + + now = now, + nowie = now, + + child = { + dyn = [dynamic]string{"one", "two", "three", "four"}, + mappy = map[string]int{"one" = 1, "two" = 2, "three" = 3, "four" = 4}, + my_integers = [10]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }, + + my_bytes = []byte{}, + + ennie = .EFoo, + ennieb = {.EBar}, + + quat = quaternion(16, 17, 18, 19), + comp = complex(32, 33), + + important = '!', + + no = cbor.Nil(uintptr(3)), + + yes = true, + + biggie = max(u64), + smallie = cbor.Negative_U64(max(u64)), + onetwenty = i128(12345), + small_onetwenty = -i128(max(u64)), + } + + big.atoi(&f.biggest, "1234567891011121314151617181920") + big.atoi(&f.smallest, "-1234567891011121314151617181920") + + defer { + delete(f.child.dyn) + delete(f.child.mappy) + big.destroy(&f.biggest) + big.destroy(&f.smallest) + } + + data, err := cbor.marshal(f, cbor.ENCODE_FULLY_DETERMINISTIC) + ev(t, err, nil) + defer delete(data) + + decoded, derr := cbor.decode_string(string(data)) + ev(t, derr, nil) + defer cbor.destroy(decoded) + + diagnosis, eerr := cbor.diagnose(decoded) + ev(t, eerr, nil) + defer delete(diagnosis) + + ev(t, diagnosis, `{ + "base64": 34("MTYgaXMgYSBuaWNlIG51bWJlcg=="), + "biggest": 2(h'f951a9fd3c158afdff08ab8e0'), + "biggie": 18446744073709551615, + "child": { + "dyn": [ + "one", + "two", + "three", + "four" + ], + "mappy": { + "one": 1, + "two": 2, + "four": 4, + "three": 3 + }, + "my_integers": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ] + }, + "comp": [ + 32.0000, + 33.0000 + ], + "cstr": "Hellnope", + "ennie": 0, + "ennieb": 2, + "iamint": -256, + "important": "!", + "my_bytes": h'', + "neg": -69, + "no": nil, + "nos": undefined, + "now": 1(1701117968), + "nowie": { + "_nsec": 1701117968000000000 + }, + "onetwenty": 12345, + "pos": 1212, + "quat": [ + 17.0000, + 18.0000, + 19.0000, + 16.0000 + ], + "renamed :)": 123123.12500000, + "small_onetwenty": -18446744073709551615, + "smallest": 3(h'f951a9fd3c158afdff08ab8e0'), + "smallie": -18446744073709551616, + "str": "Hellope", + "value": { + 16: "16 is a nice number", + 32: 69 + }, + "yes": true +}`) + + backf: Foo + uerr := cbor.unmarshal(string(data), &backf) + ev(t, uerr, nil) + defer { + delete(backf.str) + delete(backf.cstr) + cbor.destroy(backf.value) + delete(backf.base64) + + for e in backf.child.dyn { delete(e) } + delete(backf.child.dyn) + + for k in backf.child.mappy { delete(k) } + delete(backf.child.mappy) + + delete(backf.my_bytes) + + big.destroy(&backf.biggest) + big.destroy(&backf.smallest) + } + + ev(t, backf.str, f.str) + ev(t, backf.cstr, f.cstr) + + #partial switch v in backf.value { + case ^cbor.Map: + for entry, i in v { + fm := f.value.(^cbor.Map) + ev(t, entry.key, fm[i].key) + + if str, is_str := entry.value.(^cbor.Text); is_str { + ev(t, str^, fm[i].value.(^cbor.Text)^) + } else { + ev(t, entry.value, fm[i].value) + } + } + + case: testing.error(t, v) + } + + ev(t, backf.neg, f.neg) + ev(t, backf.iamint, f.iamint) + ev(t, backf.base64, f.base64) + ev(t, backf.renamed, f.renamed) + ev(t, backf.now, f.now) + ev(t, backf.nowie, f.nowie) + for e, i in f.child.dyn { ev(t, backf.child.dyn[i], e) } + for key, value in f.child.mappy { ev(t, backf.child.mappy[key], value) } + ev(t, backf.child.my_integers, f.child.my_integers) + ev(t, len(backf.my_bytes), 0) + ev(t, len(backf.my_bytes), len(f.my_bytes)) + ev(t, backf.ennie, f.ennie) + ev(t, backf.ennieb, f.ennieb) + ev(t, backf.quat, f.quat) + ev(t, backf.comp, f.comp) + ev(t, backf.important, f.important) + ev(t, backf.no, nil) + ev(t, backf.nos, nil) + ev(t, backf.yes, f.yes) + ev(t, backf.biggie, f.biggie) + ev(t, backf.smallie, f.smallie) + ev(t, backf.onetwenty, f.onetwenty) + ev(t, backf.small_onetwenty, f.small_onetwenty) + + s_equals, s_err := big.equals(&backf.smallest, &f.smallest) + ev(t, s_err, nil) + if !s_equals { + testing.errorf(t, "smallest: %v does not equal %v", big.itoa(&backf.smallest), big.itoa(&f.smallest)) + } + + b_equals, b_err := big.equals(&backf.biggest, &f.biggest) + ev(t, b_err, nil) + if !b_equals { + testing.errorf(t, "biggest: %v does not equal %v", big.itoa(&backf.biggest), big.itoa(&f.biggest)) + } + } + + for _, leak in tracker.allocation_map { + testing.errorf(t, "%v leaked %m\n", leak.location, leak.size) + } + + for bad_free in tracker.bad_free_array { + testing.errorf(t, "%v allocation %p was freed badly\n", bad_free.location, bad_free.memory) + } +} + +@(test) +test_decode_unsigned :: proc(t: ^testing.T) { + expect_decoding(t, "\x00", "0", u8) + expect_decoding(t, "\x01", "1", u8) + expect_decoding(t, "\x0a", "10", u8) + expect_decoding(t, "\x17", "23", u8) + expect_decoding(t, "\x18\x18", "24", u8) + expect_decoding(t, "\x18\x19", "25", u8) + expect_decoding(t, "\x18\x64", "100", u8) + expect_decoding(t, "\x19\x03\xe8", "1000", u16) + expect_decoding(t, "\x1a\x00\x0f\x42\x40", "1000000", u32) // Million. + expect_decoding(t, "\x1b\x00\x00\x00\xe8\xd4\xa5\x10\x00", "1000000000000", u64) // Trillion. + expect_decoding(t, "\x1b\xff\xff\xff\xff\xff\xff\xff\xff", "18446744073709551615", u64) // max(u64). +} + +@(test) +test_encode_unsigned :: proc(t: ^testing.T) { + expect_encoding(t, u8(0), "\x00") + expect_encoding(t, u8(1), "\x01") + expect_encoding(t, u8(10), "\x0a") + expect_encoding(t, u8(23), "\x17") + expect_encoding(t, u8(24), "\x18\x18") + expect_encoding(t, u8(25), "\x18\x19") + expect_encoding(t, u8(100), "\x18\x64") + expect_encoding(t, u16(1000), "\x19\x03\xe8") + expect_encoding(t, u32(1000000), "\x1a\x00\x0f\x42\x40") // Million. + expect_encoding(t, u64(1000000000000), "\x1b\x00\x00\x00\xe8\xd4\xa5\x10\x00") // Trillion. + expect_encoding(t, u64(18446744073709551615), "\x1b\xff\xff\xff\xff\xff\xff\xff\xff") // max(u64). +} + +@(test) +test_decode_negative :: proc(t: ^testing.T) { + expect_decoding(t, "\x20", "-1", cbor.Negative_U8) + expect_decoding(t, "\x29", "-10", cbor.Negative_U8) + expect_decoding(t, "\x38\x63", "-100", cbor.Negative_U8) + expect_decoding(t, "\x39\x03\xe7", "-1000", cbor.Negative_U16) + + // Negative max(u64). + expect_decoding(t, "\x3b\xff\xff\xff\xff\xff\xff\xff\xff", "-18446744073709551616", cbor.Negative_U64) +} + +@(test) +test_encode_negative :: proc(t: ^testing.T) { + expect_encoding(t, cbor.Negative_U8(0), "\x20") + expect_encoding(t, cbor.Negative_U8(9), "\x29") + expect_encoding(t, cbor.Negative_U8(99), "\x38\x63") + expect_encoding(t, cbor.Negative_U16(999), "\x39\x03\xe7") + + // Negative max(u64). + expect_encoding(t, cbor.Negative_U64(18446744073709551615), "\x3b\xff\xff\xff\xff\xff\xff\xff\xff") +} + +@(test) +test_decode_simples :: proc(t: ^testing.T) { + expect_decoding(t, "\xf4", "false", bool) + expect_decoding(t, "\xf5", "true", bool) + expect_decoding(t, "\xf6", "nil", cbor.Nil) + expect_decoding(t, "\xf7", "undefined", cbor.Undefined) + + expect_decoding(t, "\xf0", "simple(16)", cbor.Simple) + expect_decoding(t, "\xf8\xff", "simple(255)", cbor.Atom) +} + +@(test) +test_encode_simples :: proc(t: ^testing.T) { + expect_encoding(t, bool(false), "\xf4") + expect_encoding(t, bool(true), "\xf5") + expect_encoding(t, cbor.Nil{}, "\xf6") // default value for a distinct rawptr, in this case Nil. + expect_encoding(t, cbor.Undefined{}, "\xf7") // default value for a distinct rawptr, in this case Undefined. + + expect_encoding(t, cbor.Simple(16), "\xf0") // simple(16) + expect_encoding(t, cbor.Simple(255), "\xf8\xff") // simple(255) +} + +@(test) +test_decode_floats :: proc(t: ^testing.T) { + expect_float(t, "\xf9\x00\x00", f16(0.0)) + expect_float(t, "\xf9\x80\x00", f16(-0.0)) + expect_float(t, "\xf9\x3c\x00", f16(1.0)) + expect_float(t, "\xfb\x3f\xf1\x99\x99\x99\x99\x99\x9a", f64(1.1)) + expect_float(t, "\xf9\x3e\x00", f16(1.5)) + expect_float(t, "\xf9\x7b\xff", f16(65504.0)) + expect_float(t, "\xfa\x47\xc3\x50\x00", f32(100000.0)) + expect_float(t, "\xfa\x7f\x7f\xff\xff", f32(3.4028234663852886e+38)) + expect_float(t, "\xfb\x7e\x37\xe4\x3c\x88\x00\x75\x9c", f64(1.0e+300)) + expect_float(t, "\xf9\x00\x01", f16(5.960464477539063e-8)) + expect_float(t, "\xf9\x04\x00", f16(0.00006103515625)) + expect_float(t, "\xf9\xc4\x00", f16(-4.0)) + expect_float(t, "\xfb\xc0\x10\x66\x66\x66\x66\x66\x66", f64(-4.1)) + expect_decoding(t, "\xf9\x7c\x00", "+Inf", f16) + expect_decoding(t, "\xf9\x7e\x00", "NaN", f16) + expect_decoding(t, "\xf9\xfc\x00", "-Inf", f16) + expect_decoding(t, "\xfa\x7f\x80\x00\x00", "+Inf", f32) + expect_decoding(t, "\xfa\x7f\xc0\x00\x00", "NaN", f32) + expect_decoding(t, "\xfa\xff\x80\x00\x00", "-Inf", f32) + expect_decoding(t, "\xfb\x7f\xf0\x00\x00\x00\x00\x00\x00", "+Inf", f64) + expect_decoding(t, "\xfb\x7f\xf8\x00\x00\x00\x00\x00\x00", "NaN", f64) + expect_decoding(t, "\xfb\xff\xf0\x00\x00\x00\x00\x00\x00", "-Inf", f64) +} + +@(test) +test_encode_floats :: proc(t: ^testing.T) { + expect_encoding(t, f16(0.0), "\xf9\x00\x00") + expect_encoding(t, f16(-0.0), "\xf9\x80\x00") + expect_encoding(t, f16(1.0), "\xf9\x3c\x00") + expect_encoding(t, f64(1.1), "\xfb\x3f\xf1\x99\x99\x99\x99\x99\x9a") + expect_encoding(t, f16(1.5), "\xf9\x3e\x00") + expect_encoding(t, f16(65504.0), "\xf9\x7b\xff") + expect_encoding(t, f32(100000.0), "\xfa\x47\xc3\x50\x00") + expect_encoding(t, f32(3.4028234663852886e+38), "\xfa\x7f\x7f\xff\xff") + expect_encoding(t, f64(1.0e+300), "\xfb\x7e\x37\xe4\x3c\x88\x00\x75\x9c") + expect_encoding(t, f16(5.960464477539063e-8), "\xf9\x00\x01") + expect_encoding(t, f16(0.00006103515625), "\xf9\x04\x00") + expect_encoding(t, f16(-4.0), "\xf9\xc4\x00") + expect_encoding(t, f64(-4.1), "\xfb\xc0\x10\x66\x66\x66\x66\x66\x66") +} + +@(test) +test_decode_bytes :: proc(t: ^testing.T) { + expect_decoding(t, "\x40", "h''", ^cbor.Bytes) + expect_decoding(t, "\x44\x01\x02\x03\x04", "h'1234'", ^cbor.Bytes) + + // Indefinite lengths + + expect_decoding(t, "\x5f\x42\x01\x02\x43\x03\x04\x05\xff", "h'12345'", ^cbor.Bytes) +} + +@(test) +test_encode_bytes :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Bytes{}, "\x40") + expect_encoding(t, &cbor.Bytes{1, 2, 3, 4}, "\x44\x01\x02\x03\x04") + + // Indefinite lengths + + expect_streamed_encoding(t, "\x5f\x42\x01\x02\x43\x03\x04\x05\xff", &cbor.Bytes{1, 2}, &cbor.Bytes{3, 4, 5}) +} + +@(test) +test_decode_strings :: proc(t: ^testing.T) { + expect_decoding(t, "\x60", `""`, ^cbor.Text) + expect_decoding(t, "\x61\x61", `"a"`, ^cbor.Text) + expect_decoding(t, "\x64\x49\x45\x54\x46", `"IETF"`, ^cbor.Text) + expect_decoding(t, "\x62\x22\x5c", `""\"`, ^cbor.Text) + expect_decoding(t, "\x62\xc3\xbc", `"ü"`, ^cbor.Text) + expect_decoding(t, "\x63\xe6\xb0\xb4", `"水"`, ^cbor.Text) + expect_decoding(t, "\x64\xf0\x90\x85\x91", `"𐅑"`, ^cbor.Text) + + // Indefinite lengths + + expect_decoding(t, "\x7f\x65\x73\x74\x72\x65\x61\x64\x6d\x69\x6e\x67\xff", `"streaming"`, ^cbor.Text) +} + +@(test) +test_encode_strings :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Text{}, "\x60") + + a := "a" + expect_encoding(t, &a, "\x61\x61") + + b := "IETF" + expect_encoding(t, &b, "\x64\x49\x45\x54\x46") + + c := "\"\\" + expect_encoding(t, &c, "\x62\x22\x5c") + + d := "ü" + expect_encoding(t, &d, "\x62\xc3\xbc") + + e := "水" + expect_encoding(t, &e, "\x63\xe6\xb0\xb4") + + f := "𐅑" + expect_encoding(t, &f, "\x64\xf0\x90\x85\x91") + + // Indefinite lengths + + sa := "strea" + sb := "ming" + expect_streamed_encoding(t, "\x7f\x65\x73\x74\x72\x65\x61\x64\x6d\x69\x6e\x67\xff", &sa, &sb) +} + +@(test) +test_decode_lists :: proc(t: ^testing.T) { + expect_decoding(t, "\x80", "[]", ^cbor.Array) + expect_decoding(t, "\x83\x01\x02\x03", "[1, 2, 3]", ^cbor.Array) + expect_decoding(t, "\x83\x01\x82\x02\x03\x82\x04\x05", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x98\x19\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x18\x18\x19", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]", ^cbor.Array) + expect_decoding(t, "\x82\x61\x61\xa1\x61\x62\x61\x63", `["a", {"b": "c"}]`, ^cbor.Array) + + // Indefinite lengths + + expect_decoding(t, "\x9f\xff", "[]", ^cbor.Array) + expect_decoding(t, "\x9f\x01\x82\x02\x03\x9f\x04\x05\xff\xff", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x9f\x01\x82\x02\x03\x82\x04\x05\xff", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x83\x01\x82\x02\x03\x9f\x04\x05\xff", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x83\x01\x9f\x02\x03\xff\x82\x04\x05", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x9f\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x18\x18\x19\xff", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]", ^cbor.Array) + expect_decoding(t, "\x82\x61\x61\xbf\x61\x62\x61\x63\xff", `["a", {"b": "c"}]`, ^cbor.Array) +} + +@(test) +test_encode_lists :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Array{}, "\x80") + expect_encoding(t, &cbor.Array{u8(1), u8(2), u8(3)}, "\x83\x01\x02\x03") + expect_encoding(t, &cbor.Array{u8(1), &cbor.Array{u8(2), u8(3)}, &cbor.Array{u8(4), u8(5)}}, "\x83\x01\x82\x02\x03\x82\x04\x05") + expect_encoding(t, &cbor.Array{u8(1), u8(2), u8(3), u8(4), u8(5), u8(6), u8(7), u8(8), u8(9), u8(10), u8(11), u8(12), u8(13), u8(14), u8(15), u8(16), u8(17), u8(18), u8(19), u8(20), u8(21), u8(22), u8(23), u8(24), u8(25)}, "\x98\x19\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x18\x18\x19") + + { + a := "a" + b := "b" + c := "c" + expect_encoding(t, &cbor.Array{&a, &cbor.Map{{&b, &c}}}, "\x82\x61\x61\xa1\x61\x62\x61\x63") + } + + // Indefinite lengths + + expect_streamed_encoding(t, "\x9f\xff", &cbor.Array{}) + + { + bytes.buffer_reset(&buf) + + err: cbor.Encode_Error + err = cbor.encode_stream_begin(stream, .Array) + testing.expect_value(t, err, nil) + + { + err = cbor.encode_stream_array_item(encoder, u8(1)) + testing.expect_value(t, err, nil) + + err = cbor.encode_stream_array_item(encoder, &cbor.Array{u8(2), u8(3)}) + testing.expect_value(t, err, nil) + + err = cbor.encode_stream_begin(stream, .Array) + testing.expect_value(t, err, nil) + + { + err = cbor.encode_stream_array_item(encoder, u8(4)) + testing.expect_value(t, err, nil) + + err = cbor.encode_stream_array_item(encoder, u8(5)) + testing.expect_value(t, err, nil) + } + + err = cbor.encode_stream_end(stream) + testing.expect_value(t, err, nil) + } + + err = cbor.encode_stream_end(stream) + testing.expect_value(t, err, nil) + + testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x9f\x01\x82\x02\x03\x9f\x04\x05\xff\xff"))) + } + + { + bytes.buffer_reset(&buf) + + err: cbor.Encode_Error + err = cbor._encode_u8(stream, 2, .Array) + testing.expect_value(t, err, nil) + + a := "a" + err = cbor.encode(encoder, &a) + testing.expect_value(t, err, nil) + + { + err = cbor.encode_stream_begin(stream, .Map) + testing.expect_value(t, err, nil) + + b := "b" + c := "c" + err = cbor.encode_stream_map_entry(encoder, &b, &c) + testing.expect_value(t, err, nil) + + err = cbor.encode_stream_end(stream) + testing.expect_value(t, err, nil) + } + + testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x82\x61\x61\xbf\x61\x62\x61\x63\xff"))) + } +} + +@(test) +test_decode_maps :: proc(t: ^testing.T) { + expect_decoding(t, "\xa0", "{}", ^cbor.Map) + expect_decoding(t, "\xa2\x01\x02\x03\x04", "{1: 2, 3: 4}", ^cbor.Map) + expect_decoding(t, "\xa2\x61\x61\x01\x61\x62\x82\x02\x03", `{"a": 1, "b": [2, 3]}`, ^cbor.Map) + expect_decoding(t, "\xa5\x61\x61\x61\x41\x61\x62\x61\x42\x61\x63\x61\x43\x61\x64\x61\x44\x61\x65\x61\x45", `{"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}`, ^cbor.Map) + + // Indefinite lengths + + expect_decoding(t, "\xbf\x61\x61\x01\x61\x62\x9f\x02\x03\xff\xff", `{"a": 1, "b": [2, 3]}`, ^cbor.Map) + expect_decoding(t, "\xbf\x63\x46\x75\x6e\xf5\x63\x41\x6d\x74\x21\xff", `{"Fun": true, "Amt": -2}`, ^cbor.Map) +} + +@(test) +test_encode_maps :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Map{}, "\xa0") + expect_encoding(t, &cbor.Map{{u8(1), u8(2)}, {u8(3), u8(4)}}, "\xa2\x01\x02\x03\x04") + + a := "a" + b := "b" + // NOTE: also tests the deterministic nature because it has to swap/sort the entries. + expect_encoding(t, &cbor.Map{{&b, &cbor.Array{u8(2), u8(3)}}, {&a, u8(1)}}, "\xa2\x61\x61\x01\x61\x62\x82\x02\x03") + + fun := "Fun" + amt := "Amt" + expect_streamed_encoding(t, "\xbf\x63\x46\x75\x6e\xf5\x63\x41\x6d\x74\x21\xff", &cbor.Map{{&fun, true}, {&amt, cbor.Negative_U8(1)}}) +} + +@(test) +test_decode_tags :: proc(t: ^testing.T) { + // Tag number 2 (unsigned bignumber), value bytes, max(u64) + 1. + expect_tag(t, "\xc2\x49\x01\x00\x00\x00\x00\x00\x00\x00\x00", cbor.TAG_UNSIGNED_BIG_NR, "2(h'100000000')") + + // Tag number 3 (negative bignumber), value bytes, negative max(u64) - 1. + expect_tag(t, "\xc3\x49\x01\x00\x00\x00\x00\x00\x00\x00\x00", cbor.TAG_NEGATIVE_BIG_NR, "3(h'100000000')") + + expect_tag(t, "\xc1\x1a\x51\x4b\x67\xb0", cbor.TAG_EPOCH_TIME_NR, "1(1363896240)") + expect_tag(t, "\xc1\xfb\x41\xd4\x52\xd9\xec\x20\x00\x00", cbor.TAG_EPOCH_TIME_NR, "1(1363896240.5000000000000000)") + expect_tag(t, "\xd8\x18\x45\x64\x49\x45\x54\x46", cbor.TAG_CBOR_NR, "24(h'6449455446')") +} + +@(test) +test_encode_tags :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Tag{cbor.TAG_UNSIGNED_BIG_NR, &cbor.Bytes{1, 0, 0, 0, 0, 0, 0, 0, 0}}, "\xc2\x49\x01\x00\x00\x00\x00\x00\x00\x00\x00") + expect_encoding(t, &cbor.Tag{cbor.TAG_EPOCH_TIME_NR, u32(1363896240)}, "\xc1\x1a\x51\x4b\x67\xb0") + expect_encoding(t, &cbor.Tag{cbor.TAG_EPOCH_TIME_NR, f64(1363896240.500)}, "\xc1\xfb\x41\xd4\x52\xd9\xec\x20\x00\x00") +} + +// Helpers + +buf: bytes.Buffer +stream := bytes.buffer_to_stream(&buf) +encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream} + +expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: typeid, loc := #caller_location) { + bytes.buffer_reset(&buf) + bytes.buffer_write_string(&buf, encoded) + + res, err := cbor.decode(stream) + defer cbor.destroy(res) + + testing.expect_value(t, reflect.union_variant_typeid(res), type, loc) + testing.expect_value(t, err, nil, loc) + + str := cbor.diagnose(res, padding=-1) + defer delete(str) + + testing.expect_value(t, str, decoded, loc) +} + +expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_decoded: string, loc := #caller_location) { + bytes.buffer_reset(&buf) + bytes.buffer_write_string(&buf, encoded) + + res, err := cbor.decode(stream) + defer cbor.destroy(res) + + testing.expect_value(t, err, nil, loc) + + if tag, is_tag := res.(^cbor.Tag); is_tag { + testing.expect_value(t, tag.number, nr, loc) + + str := cbor.diagnose(tag, padding=-1) + defer delete(str) + + testing.expect_value(t, str, value_decoded, loc) + } else { + testing.errorf(t, "Value %#v is not a tag", res, loc) + } +} + +expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #caller_location) where intrinsics.type_is_float(T) { + bytes.buffer_reset(&buf) + bytes.buffer_write_string(&buf, encoded) + + res, err := cbor.decode(stream) + defer cbor.destroy(res) + + testing.expect_value(t, reflect.union_variant_typeid(res), typeid_of(T), loc) + testing.expect_value(t, err, nil, loc) + + #partial switch r in res { + case f16: + when T == f16 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + case f32: + when T == f32 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + case f64: + when T == f64 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + case: + unreachable() + } +} + +expect_encoding :: proc(t: ^testing.T, val: cbor.Value, encoded: string, loc := #caller_location) { + bytes.buffer_reset(&buf) + + err := cbor.encode(encoder, val) + testing.expect_value(t, err, nil, loc) + testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) +} + +expect_streamed_encoding :: proc(t: ^testing.T, encoded: string, values: ..cbor.Value, loc := #caller_location) { + bytes.buffer_reset(&buf) + + for value, i in values { + err: cbor.Encode_Error + err2: cbor.Encode_Error + #partial switch v in value { + case ^cbor.Bytes: + if i == 0 { err = cbor.encode_stream_begin(stream, .Bytes) } + err2 = cbor._encode_bytes(encoder, v^) + case ^cbor.Text: + if i == 0 { err = cbor.encode_stream_begin(stream, .Text) } + err2 = cbor._encode_text(encoder, v^) + case ^cbor.Array: + if i == 0 { err = cbor.encode_stream_begin(stream, .Array) } + for item in v { + err2 = cbor.encode_stream_array_item(encoder, item) + if err2 != nil { break } + } + case ^cbor.Map: + err = cbor.encode_stream_begin(stream, .Map) + for item in v { + err2 = cbor.encode_stream_map_entry(encoder, item.key, item.value) + if err2 != nil { break } + } + case: + testing.errorf(t, "%v does not support streamed encoding", reflect.union_variant_typeid(value)) + } + + testing.expect_value(t, err, nil, loc) + testing.expect_value(t, err2, nil, loc) + } + + err := cbor.encode_stream_end(stream) + testing.expect_value(t, err, nil, loc) + + testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) +}