mirror of
https://github.com/odin-lang/Odin.git
synced 2025-12-28 17:04:34 +00:00
484 lines
10 KiB
Odin
484 lines
10 KiB
Odin
package encoding_json
|
|
|
|
import "core:mem"
|
|
import "core:unicode/utf8"
|
|
import "core:unicode/utf16"
|
|
import "core:strconv"
|
|
|
|
Parser :: struct {
|
|
tok: Tokenizer,
|
|
prev_token: Token,
|
|
curr_token: Token,
|
|
spec: Specification,
|
|
allocator: mem.Allocator,
|
|
parse_integers: bool,
|
|
}
|
|
|
|
make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
|
|
return make_parser_from_string(string(data), spec, parse_integers, allocator)
|
|
}
|
|
make_parser_from_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
|
|
p: Parser
|
|
p.tok = make_tokenizer(data, spec, parse_integers)
|
|
p.spec = spec
|
|
p.allocator = allocator
|
|
assert(p.allocator.procedure != nil)
|
|
advance_token(&p)
|
|
return p
|
|
}
|
|
|
|
|
|
parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator, loc := #caller_location) -> (Value, Error) {
|
|
return parse_string(string(data), spec, parse_integers, allocator, loc)
|
|
}
|
|
|
|
parse_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator, loc := #caller_location) -> (Value, Error) {
|
|
context.allocator = allocator
|
|
p := make_parser_from_string(data, spec, parse_integers, allocator)
|
|
|
|
switch p.spec {
|
|
case .JSON:
|
|
return parse_object(&p, loc)
|
|
case .JSON5:
|
|
return parse_value(&p, loc)
|
|
case .SJSON:
|
|
#partial switch p.curr_token.kind {
|
|
case .Ident, .String:
|
|
return parse_object_body(&p, .EOF, loc)
|
|
}
|
|
return parse_value(&p, loc)
|
|
}
|
|
return parse_object(&p, loc)
|
|
}
|
|
|
|
token_end_pos :: proc(tok: Token) -> Pos {
|
|
end := tok.pos
|
|
end.offset += len(tok.text)
|
|
return end
|
|
}
|
|
|
|
advance_token :: proc(p: ^Parser) -> (Token, Error) {
|
|
err: Error
|
|
p.prev_token = p.curr_token
|
|
p.curr_token, err = get_token(&p.tok)
|
|
return p.prev_token, err
|
|
}
|
|
|
|
|
|
allow_token :: proc(p: ^Parser, kind: Token_Kind) -> bool {
|
|
if p.curr_token.kind == kind {
|
|
advance_token(p)
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
|
|
prev := p.curr_token
|
|
advance_token(p)
|
|
if prev.kind == kind {
|
|
return nil
|
|
}
|
|
return .Unexpected_Token
|
|
}
|
|
|
|
|
|
parse_colon :: proc(p: ^Parser) -> (err: Error) {
|
|
colon_err := expect_token(p, .Colon)
|
|
if colon_err == nil {
|
|
return nil
|
|
}
|
|
return .Expected_Colon_After_Key
|
|
}
|
|
|
|
parse_comma :: proc(p: ^Parser) -> (do_break: bool) {
|
|
switch p.spec {
|
|
case .JSON5, .MJSON:
|
|
if allow_token(p, .Comma) {
|
|
return false
|
|
}
|
|
return false
|
|
case .JSON:
|
|
if !allow_token(p, .Comma) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
parse_value :: proc(p: ^Parser, loc := #caller_location) -> (value: Value, err: Error) {
|
|
err = .None
|
|
token := p.curr_token
|
|
#partial switch token.kind {
|
|
case .Null:
|
|
advance_token(p)
|
|
value = Null{}
|
|
return
|
|
case .False:
|
|
advance_token(p)
|
|
value = Boolean(false)
|
|
return
|
|
case .True:
|
|
advance_token(p)
|
|
value = Boolean(true)
|
|
return
|
|
|
|
case .Integer:
|
|
advance_token(p)
|
|
i, _ := strconv.parse_i64(token.text)
|
|
value = Integer(i)
|
|
return
|
|
case .Float:
|
|
advance_token(p)
|
|
f, _ := strconv.parse_f64(token.text)
|
|
value = Float(f)
|
|
return
|
|
|
|
case .Ident:
|
|
if p.spec == .MJSON {
|
|
advance_token(p)
|
|
return clone_string(token.text, p.allocator, loc)
|
|
}
|
|
|
|
case .String:
|
|
advance_token(p)
|
|
return unquote_string(token, p.spec, p.allocator, loc)
|
|
|
|
case .Open_Brace:
|
|
return parse_object(p, loc)
|
|
|
|
case .Open_Bracket:
|
|
return parse_array(p, loc)
|
|
|
|
case:
|
|
if p.spec != .JSON {
|
|
switch {
|
|
case allow_token(p, .Infinity):
|
|
inf: u64 = 0x7ff0000000000000
|
|
if token.text[0] == '-' {
|
|
inf = 0xfff0000000000000
|
|
}
|
|
value = transmute(f64)inf
|
|
return
|
|
case allow_token(p, .NaN):
|
|
nan: u64 = 0x7ff7ffffffffffff
|
|
if token.text[0] == '-' {
|
|
nan = 0xfff7ffffffffffff
|
|
}
|
|
value = transmute(f64)nan
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
err = .Unexpected_Token
|
|
advance_token(p)
|
|
return
|
|
}
|
|
|
|
parse_array :: proc(p: ^Parser, loc := #caller_location) -> (value: Value, err: Error) {
|
|
err = .None
|
|
expect_token(p, .Open_Bracket) or_return
|
|
|
|
array: Array
|
|
array.allocator = p.allocator
|
|
defer if err != nil {
|
|
for elem in array {
|
|
destroy_value(elem, loc=loc)
|
|
}
|
|
delete(array, loc)
|
|
}
|
|
|
|
for p.curr_token.kind != .Close_Bracket {
|
|
elem := parse_value(p, loc) or_return
|
|
append(&array, elem, loc)
|
|
|
|
if parse_comma(p) {
|
|
break
|
|
}
|
|
}
|
|
|
|
expect_token(p, .Close_Bracket) or_return
|
|
value = array
|
|
return
|
|
}
|
|
|
|
@(private)
|
|
bytes_make :: proc(size, alignment: int, allocator: mem.Allocator, loc := #caller_location) -> (bytes: []byte, err: Error) {
|
|
b, berr := mem.alloc_bytes(size, alignment, allocator, loc)
|
|
if berr != nil {
|
|
if berr == .Out_Of_Memory {
|
|
err = .Out_Of_Memory
|
|
} else {
|
|
err = .Invalid_Allocator
|
|
}
|
|
}
|
|
bytes = b
|
|
return
|
|
}
|
|
|
|
clone_string :: proc(s: string, allocator: mem.Allocator, loc := #caller_location) -> (str: string, err: Error) {
|
|
n := len(s)
|
|
b := bytes_make(n+1, 1, allocator, loc) or_return
|
|
copy(b, s)
|
|
if len(b) > n {
|
|
b[n] = 0
|
|
str = string(b[:n])
|
|
}
|
|
return
|
|
}
|
|
|
|
parse_object_key :: proc(p: ^Parser, key_allocator: mem.Allocator, loc := #caller_location) -> (key: string, err: Error) {
|
|
tok := p.curr_token
|
|
if p.spec != .JSON {
|
|
if allow_token(p, .Ident) {
|
|
return clone_string(tok.text, key_allocator, loc)
|
|
}
|
|
}
|
|
if tok_err := expect_token(p, .String); tok_err != nil {
|
|
err = .Expected_String_For_Object_Key
|
|
return
|
|
}
|
|
return unquote_string(tok, p.spec, key_allocator, loc)
|
|
}
|
|
|
|
parse_object_body :: proc(p: ^Parser, end_token: Token_Kind, loc := #caller_location) -> (obj: Object, err: Error) {
|
|
obj = make(Object, allocator=p.allocator, loc=loc)
|
|
|
|
defer if err != nil {
|
|
for key, elem in obj {
|
|
delete(key, p.allocator, loc)
|
|
destroy_value(elem, loc=loc)
|
|
}
|
|
delete(obj, loc)
|
|
}
|
|
|
|
for p.curr_token.kind != end_token {
|
|
key := parse_object_key(p, p.allocator, loc) or_return
|
|
parse_colon(p) or_return
|
|
elem := parse_value(p, loc) or_return
|
|
|
|
if key in obj {
|
|
err = .Duplicate_Object_Key
|
|
delete(key, p.allocator, loc)
|
|
return
|
|
}
|
|
|
|
// NOTE(gonz): There are code paths for which this traversal ends up
|
|
// inserting empty key/values into the object and for those we do not
|
|
// want to allocate anything
|
|
if key != "" {
|
|
reserve_error := reserve(&obj, len(obj) + 1, loc)
|
|
if reserve_error == mem.Allocator_Error.Out_Of_Memory {
|
|
return nil, .Out_Of_Memory
|
|
}
|
|
obj[key] = elem
|
|
}
|
|
|
|
if parse_comma(p) {
|
|
break
|
|
}
|
|
}
|
|
return obj, .None
|
|
}
|
|
|
|
parse_object :: proc(p: ^Parser, loc := #caller_location) -> (value: Value, err: Error) {
|
|
expect_token(p, .Open_Brace) or_return
|
|
obj := parse_object_body(p, .Close_Brace, loc) or_return
|
|
expect_token(p, .Close_Brace) or_return
|
|
return obj, .None
|
|
}
|
|
|
|
|
|
// IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
|
|
unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator, loc := #caller_location) -> (value: string, err: Error) {
|
|
get_u2_rune :: proc(s: string) -> rune {
|
|
if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
|
|
return -1
|
|
}
|
|
|
|
r: rune
|
|
for c in s[2:4] {
|
|
x: rune
|
|
switch c {
|
|
case '0'..='9': x = c - '0'
|
|
case 'a'..='f': x = c - 'a' + 10
|
|
case 'A'..='F': x = c - 'A' + 10
|
|
case: return -1
|
|
}
|
|
r = r*16 + x
|
|
}
|
|
return r
|
|
}
|
|
get_u4_rune :: proc(s: string) -> rune {
|
|
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
|
|
return -1
|
|
}
|
|
|
|
r: rune
|
|
for c in s[2:6] {
|
|
x: rune
|
|
switch c {
|
|
case '0'..='9': x = c - '0'
|
|
case 'a'..='f': x = c - 'a' + 10
|
|
case 'A'..='F': x = c - 'A' + 10
|
|
case: return -1
|
|
}
|
|
r = r*16 + x
|
|
}
|
|
return r
|
|
}
|
|
|
|
if token.kind != .String {
|
|
return "", nil
|
|
}
|
|
s := token.text
|
|
if len(s) <= 2 {
|
|
return "", nil
|
|
}
|
|
quote := s[0]
|
|
if s[0] != s[len(s)-1] {
|
|
// Invalid string
|
|
return "", nil
|
|
}
|
|
s = s[1:len(s)-1]
|
|
|
|
i := 0
|
|
for i < len(s) {
|
|
c := s[i]
|
|
if c == '\\' || c == quote || c < ' ' {
|
|
break
|
|
}
|
|
if c < utf8.RUNE_SELF {
|
|
i += 1
|
|
continue
|
|
}
|
|
r, w := utf8.decode_rune_in_string(s[i:])
|
|
if r == utf8.RUNE_ERROR && w == 1 {
|
|
break
|
|
}
|
|
i += w
|
|
}
|
|
if i == len(s) {
|
|
return clone_string(s, allocator, loc)
|
|
}
|
|
|
|
b := bytes_make(len(s) + 2*utf8.UTF_MAX, 1, allocator) or_return
|
|
w := copy(b, s[0:i])
|
|
|
|
if len(b) == 0 && allocator.data == nil {
|
|
// `unmarshal_count_array` calls us with a nil allocator
|
|
return string(b[:w]), nil
|
|
}
|
|
|
|
loop: for i < len(s) {
|
|
c := s[i]
|
|
switch {
|
|
case c == '\\':
|
|
i += 1
|
|
if i >= len(s) {
|
|
break loop
|
|
}
|
|
switch s[i] {
|
|
case: break loop
|
|
case '"', '\'', '\\', '/':
|
|
b[w] = s[i]
|
|
i += 1
|
|
w += 1
|
|
|
|
case 'b':
|
|
b[w] = '\b'
|
|
i += 1
|
|
w += 1
|
|
case 'f':
|
|
b[w] = '\f'
|
|
i += 1
|
|
w += 1
|
|
case 'r':
|
|
b[w] = '\r'
|
|
i += 1
|
|
w += 1
|
|
case 't':
|
|
b[w] = '\t'
|
|
i += 1
|
|
w += 1
|
|
case 'n':
|
|
b[w] = '\n'
|
|
i += 1
|
|
w += 1
|
|
case 'u':
|
|
i -= 1 // Include the \u in the check for sanity sake
|
|
r := get_u4_rune(s[i:])
|
|
if r < 0 {
|
|
break loop
|
|
}
|
|
i += 6
|
|
|
|
// If this is a surrogate pair, decode as such by taking the next rune too.
|
|
if r >= utf8.SURROGATE_MIN && r <= utf8.SURROGATE_HIGH_MAX && len(s) > i + 2 && s[i:i+2] == "\\u" {
|
|
r2 := get_u4_rune(s[i:])
|
|
if r2 >= utf8.SURROGATE_LOW_MIN && r2 <= utf8.SURROGATE_MAX {
|
|
i += 6
|
|
r = utf16.decode_surrogate_pair(r, r2)
|
|
}
|
|
}
|
|
|
|
buf, buf_width := utf8.encode_rune(r)
|
|
copy(b[w:], buf[:buf_width])
|
|
w += buf_width
|
|
|
|
case '0':
|
|
if spec != .JSON {
|
|
b[w] = '\x00'
|
|
i += 1
|
|
w += 1
|
|
} else {
|
|
break loop
|
|
}
|
|
case 'v':
|
|
if spec != .JSON {
|
|
b[w] = '\v'
|
|
i += 1
|
|
w += 1
|
|
} else {
|
|
break loop
|
|
}
|
|
|
|
case 'x':
|
|
if spec != .JSON {
|
|
i -= 1 // Include the \x in the check for sanity sake
|
|
r := get_u2_rune(s[i:])
|
|
if r < 0 {
|
|
break loop
|
|
}
|
|
i += 4
|
|
|
|
buf, buf_width := utf8.encode_rune(r)
|
|
copy(b[w:], buf[:buf_width])
|
|
w += buf_width
|
|
} else {
|
|
break loop
|
|
}
|
|
}
|
|
|
|
case c == quote, c < ' ':
|
|
break loop
|
|
|
|
case c < utf8.RUNE_SELF:
|
|
b[w] = c
|
|
i += 1
|
|
w += 1
|
|
|
|
case:
|
|
r, width := utf8.decode_rune_in_string(s[i:])
|
|
i += width
|
|
|
|
buf, buf_width := utf8.encode_rune(r)
|
|
assert(buf_width <= width)
|
|
copy(b[w:], buf[:buf_width])
|
|
w += buf_width
|
|
}
|
|
}
|
|
|
|
return string(b[:w]), nil
|
|
} |