diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index 4cf9264c5..d25015ac7 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -153,7 +153,7 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: case complex128: r, i = f64(real(z)), f64(imag(z)) case: return .Unsupported_Type } - + io.write_byte(w, '[') or_return io.write_f64(w, r) or_return io.write_string(w, ", ") or_return @@ -165,8 +165,8 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: case runtime.Type_Info_String: switch s in a { - case string: io.write_quoted_string(w, s) or_return - case cstring: io.write_quoted_string(w, string(s)) or_return + case string: io.write_quoted_string(w, s, '"', nil, true) or_return + case cstring: io.write_quoted_string(w, string(s), '"', nil, true) or_return } case runtime.Type_Info_Boolean: diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin index ed36ae33b..d007e16d7 100644 --- a/core/encoding/json/parser.odin +++ b/core/encoding/json/parser.odin @@ -2,6 +2,7 @@ package json import "core:mem" import "core:unicode/utf8" +import "core:unicode/utf16" import "core:strconv" Parser :: struct { @@ -403,11 +404,19 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a } i += 6 + // If this is a surrogate pair, decode as such by taking the next rune too. + if r >= utf8.SURROGATE_MIN && r <= utf8.SURROGATE_HIGH_MAX && len(s) > i + 2 && s[i:i+2] == "\\u" { + r2 := get_u4_rune(s[i:]) + if r2 >= utf8.SURROGATE_LOW_MIN && r2 <= utf8.SURROGATE_MAX { + i += 6 + r = utf16.decode_surrogate_pair(r, r2) + } + } + buf, buf_width := utf8.encode_rune(r) copy(b[w:], buf[:buf_width]) w += buf_width - case '0': if spec != .JSON { b[w] = '\x00' diff --git a/core/io/util.odin b/core/io/util.odin index 46aa97919..cfd7d3608 100644 --- a/core/io/util.odin +++ b/core/io/util.odin @@ -2,6 +2,7 @@ package io import "core:strconv" import "core:unicode/utf8" +import "core:unicode/utf16" read_ptr :: proc(r: Reader, p: rawptr, byte_size: int, n_read: ^int = nil) -> (n: int, err: Error) { return read(r, ([^]byte)(p)[:byte_size], n_read) @@ -146,7 +147,7 @@ write_encoded_rune :: proc(w: Writer, r: rune, write_quote := true, n_written: ^ return } -write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false, n_written: ^int = nil) -> (n: int, err: Error) { +write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false, n_written: ^int = nil, for_json := false) -> (n: int, err: Error) { is_printable :: proc(r: rune) -> bool { if r <= 0xff { switch r { @@ -163,7 +164,7 @@ write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false, defer if n_written != nil { n_written^ += n } - + if html_safe { switch r { case '<', '>', '&': @@ -211,17 +212,29 @@ write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false, write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return } case: - write_byte(w, '\\', &n) or_return - write_byte(w, 'U', &n) or_return - for s := 28; s >= 0; s -= 4 { - write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return + if for_json { + buf: [2]u16 + utf16.encode(buf[:], []rune{c}) + for bc in buf { + write_byte(w, '\\', &n) or_return + write_byte(w, 'u', &n) or_return + for s := 12; s >= 0; s -= 4 { + write_byte(w, DIGITS_LOWER[bc>>uint(s) & 0xf], &n) or_return + } + } + } else { + write_byte(w, '\\', &n) or_return + write_byte(w, 'U', &n) or_return + for s := 24; s >= 0; s -= 4 { + write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return + } } } } return } -write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written: ^int = nil) -> (n: int, err: Error) { +write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written: ^int = nil, for_json := false) -> (n: int, err: Error) { defer if n_written != nil { n_written^ += n } @@ -240,7 +253,7 @@ write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written continue } - n_wrapper(write_escaped_rune(w, r, quote), &n) or_return + n_wrapper(write_escaped_rune(w, r, quote, false, nil, for_json), &n) or_return } write_byte(w, quote, &n) or_return diff --git a/core/unicode/utf8/utf8.odin b/core/unicode/utf8/utf8.odin index a0da5c5d1..15c6c3650 100644 --- a/core/unicode/utf8/utf8.odin +++ b/core/unicode/utf8/utf8.odin @@ -10,6 +10,11 @@ UTF_MAX :: 4 SURROGATE_MIN :: 0xd800 SURROGATE_MAX :: 0xdfff +// A high/leading surrogate is in range SURROGATE_MIN..SURROGATE_HIGH_MAX, +// A low/trailing surrogate is in range SURROGATE_LOW_MIN..SURROGATE_MAX. +SURROGATE_HIGH_MAX :: 0xdbff +SURROGATE_LOW_MIN :: 0xdc00 + T1 :: 0b0000_0000 TX :: 0b1000_0000 T2 :: 0b1100_0000 diff --git a/tests/core/encoding/json/test_core_json.odin b/tests/core/encoding/json/test_core_json.odin index 0e6a6412f..937d1c738 100644 --- a/tests/core/encoding/json/test_core_json.odin +++ b/tests/core/encoding/json/test_core_json.odin @@ -32,6 +32,7 @@ main :: proc() { parse_json(&t) marshal_json(&t) unmarshal_json(&t) + surrogate(&t) fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) if TEST_fail > 0 { @@ -344,4 +345,17 @@ unmarshal_json :: proc(t: ^testing.T) { for p, i in g.products { expect(t, p == original_data.products[i], "Producted unmarshaled improperly") } -} \ No newline at end of file +} + +@test +surrogate :: proc(t: ^testing.T) { + input := `+ + * 😃 - /` + + out, err := json.marshal(input) + expect(t, err == nil, fmt.tprintf("Expected `json.marshal(%q)` to return a nil error, got %v", input, err)) + + back: string + uerr := json.unmarshal(out, &back) + expect(t, uerr == nil, fmt.tprintf("Expected `json.unmarshal(%q)` to return a nil error, got %v", string(out), uerr)) + expect(t, back == input, fmt.tprintf("Expected `json.unmarshal(%q)` to return %q, got %v", string(out), input, uerr)) +}